├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── ci.yml
├── main.py
├── tests
    ├── samples
    │   ├── the_godfather_1972.da.srt
    │   ├── the_godfather_1972.es.srt
    │   ├── the_godfather_1972.he.srt
    │   ├── the_godfather_1972.it.srt
    │   ├── the_godfather_1972.ko.srt
    │   └── the_godfather_1972.pt.srt
    ├── __init__.py
    ├── tests_stream.py
    └── tests_analysis.py
├── publish.bat
├── .gitignore
├── .editorconfig
├── requirements.txt
├── test.py
├── curator
    ├── plans
    │   ├── __init__.py
    │   ├── link.py
    │   ├── sync.py
    │   ├── rename.py
    │   ├── tag.py
    │   ├── convert.py
    │   └── merge.py
    ├── __init__.py
    ├── database.py
    ├── databases
    │   ├── __init__.py
    │   ├── omdb.py
    │   ├── tmdb.py
    │   └── imdb.py
    ├── util.py
    ├── task.py
    ├── analysis.py
    ├── plan.py
    ├── media.py
    ├── cli.py
    ├── tui.py
    └── stream.py
├── docs
    ├── index.md
    └── images
    │   ├── curator-tag.svg
    │   ├── curator-rename.svg
    │   └── curator-merge.svg
├── setup.py
├── scripts
    └── generate_screenshots.py
├── README.md
└── LICENSE


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [AlexAltea]
2 | patreon: AlexAltea
3 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | from curator.cli import main
4 | 
5 | if __name__ == '__main__':
6 |     main()
7 | 


--------------------------------------------------------------------------------
/tests/samples/the_godfather_1972.da.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/curator/HEAD/tests/samples/the_godfather_1972.da.srt


--------------------------------------------------------------------------------
/tests/samples/the_godfather_1972.es.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/curator/HEAD/tests/samples/the_godfather_1972.es.srt


--------------------------------------------------------------------------------
/tests/samples/the_godfather_1972.he.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/curator/HEAD/tests/samples/the_godfather_1972.he.srt


--------------------------------------------------------------------------------
/tests/samples/the_godfather_1972.it.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/curator/HEAD/tests/samples/the_godfather_1972.it.srt


--------------------------------------------------------------------------------
/tests/samples/the_godfather_1972.ko.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/curator/HEAD/tests/samples/the_godfather_1972.ko.srt


--------------------------------------------------------------------------------
/tests/samples/the_godfather_1972.pt.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/curator/HEAD/tests/samples/the_godfather_1972.pt.srt


--------------------------------------------------------------------------------
/publish.bat:
--------------------------------------------------------------------------------
1 | del dist\*
2 | python setup.py bdist_wheel --universal
3 | gpg --detach-sign -u FA31DF0C -a dist/*
4 | twine upload dist/*
5 | pause
6 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Curator.
 5 | """
 6 | 
 7 | # Imports
 8 | from .tests_analysis import *
 9 | from .tests_stream import *
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDEs
 2 | .idea
 3 | .vscode
 4 | 
 5 | # Python
 6 | .venv
 7 | __pycache__
 8 | *.pyc
 9 | *.pyo
10 | *.pyd
11 | 
12 | # Package
13 | /*egg-info
14 | /build
15 | /dist
16 | 
17 | temp*
18 | private


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig
 2 | # http://editorconfig.org
 3 | 
 4 | root = true
 5 | 
 6 | [**.{py}]
 7 | indent_style = space
 8 | indent_size = 4
 9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | arrow==1.2.3
 2 | chardet==3.0.4
 3 | iso639-lang==2.1.0
 4 | langid==1.1.6
 5 | milli==1.0.0
 6 | numpy>=1.21.6
 7 | openai-whisper==20230918
 8 | pandas==1.5.3
 9 | pysrt==1.1.2
10 | requests==2.28.2
11 | textdistance==4.5.0
12 | textual==0.40.0
13 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Curator.
 5 | """
 6 | 
 7 | from tests import *
 8 | 
 9 | def test():
10 |     test_analysis()
11 |     test_stream()
12 |     print('All tests passed successfully.')
13 | 
14 | if __name__ == '__main__':
15 |     test()
16 | 


--------------------------------------------------------------------------------
/curator/plans/__init__.py:
--------------------------------------------------------------------------------
 1 | # Imports
 2 | from .convert import *
 3 | from .link import *
 4 | from .merge import *
 5 | from .rename import *
 6 | from .sync import *
 7 | from .tag import *
 8 | 
 9 | # Prevent polluting namespace
10 | del convert
11 | del link
12 | del merge
13 | del rename
14 | del sync
15 | del tag
16 | 


--------------------------------------------------------------------------------
/curator/__init__.py:
--------------------------------------------------------------------------------
 1 | # Imports
 2 | from .analysis import *
 3 | from .database import *
 4 | from .media import *
 5 | from .plan import *
 6 | from .stream import *
 7 | from .task import *
 8 | 
 9 | # Prevent polluting namespace
10 | del analysis
11 | del database
12 | del media
13 | del plan
14 | del stream
15 | del task
16 | 


--------------------------------------------------------------------------------
/curator/database.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | class Database:
 4 |     def __init__(self, name):
 5 |         self.name = name
 6 | 
 7 |         # Set cache directory
 8 |         cache_root = os.getenv("XDG_CACHE_HOME",
 9 |             os.path.join(os.path.expanduser("~"), ".cache"))
10 |         self.cache = os.path.join(cache_root, "curator", name)
11 |         os.makedirs(self.cache, exist_ok=True)
12 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | Curator documentation
 2 | =====================
 3 | 
 4 | ## Plan
 5 | 
 6 | Collection of one or more *Tasks*.
 7 | 
 8 | *TODO*.
 9 | 
10 | ## Task
11 | 
12 | Atomic operation over one or more *Media* or *Streams*.
13 | 
14 | *TODO*.
15 | 
16 | ## Media
17 | 
18 | File or link to a file that contains *Streams*.
19 | 
20 | *TODO*.
21 | 
22 | ## Stream
23 | 
24 | Video, audio, subtitle or data tracks pertaining to some *Media*.
25 | 
26 | *TODO*.
27 | 


--------------------------------------------------------------------------------
/curator/databases/__init__.py:
--------------------------------------------------------------------------------
 1 | # Import and return database
 2 | def get_database(name, *args, **kwargs):
 3 |     if name == 'imdb':
 4 |         from .imdb import ImdbDatabase
 5 |         return ImdbDatabase(*args, **kwargs)
 6 |     if name == 'omdb':
 7 |         from .omdb import OmdbDatabase
 8 |         return OmdbDatabase(*args, **kwargs)
 9 |     if name == 'tmdb':
10 |         from .tmdb import TmdbDatabase
11 |         return TmdbDatabase(*args, **kwargs)
12 |     else:
13 |         raise ValueError("Unknown database name")
14 | 


--------------------------------------------------------------------------------
/curator/plans/link.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from curator import Plan, Task, Media
 4 | 
 5 | class LinkPlan(Plan):
 6 |     def columns(self):
 7 |         return [
 8 |             { 'name': 'Name', 'width': '100%' },
 9 |         ]
10 | 
11 | class LinkTask(Task):
12 |     def __init__(self, input, output):
13 |         super().__init__([input], [output])
14 |         assert(output.type == Media.TYPE_LINK)
15 | 
16 |     def view(self):
17 |         return [(self.inputs[0].name,)]
18 | 
19 |     def apply(self):
20 |         src = self.inputs[0].path
21 |         lnk = self.outputs[0].path
22 |         os.symlink(src, lnk)
23 | 
24 | def plan_link(media, output):
25 |     plan = LinkPlan()
26 |     for m in media:
27 |         path = os.path.join(output, m.name)
28 |         link = Media(path, Media.TYPE_LINK)
29 |         task = LinkTask(m, link)
30 |         plan.add_task(task)
31 |     return plan
32 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     strategy:
 7 |       matrix:
 8 |         python-version: ["3.8"]
 9 | 
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |       
13 |       # Install and cache FFmpeg
14 |       - uses: FedericoCarboni/setup-ffmpeg@v2
15 |         name: Set up FFmpeg
16 | 
17 |       # Install and cache Python dependencies
18 |       - name: Set up Python ${{ matrix.python-version }}
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 |           cache: 'pip'
23 |       - name: Install Python dependencies
24 |         run: pip install -r requirements.txt
25 | 
26 |       # Package tests
27 |       - name: Test package
28 |         run: |
29 |           python test.py
30 |           python setup.py check --strict --metadata
31 |       - name: Install package
32 |         run: |
33 |           pip install .
34 | 


--------------------------------------------------------------------------------
/tests/tests_stream.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Curator.
 5 | """
 6 | 
 7 | from curator.media import *
 8 | from curator.stream import *
 9 | 
10 | def test_detect_subtitle_language():
11 |     srt_lang = lambda path: Media(path).get_streams()[0].detect_subtitle_language()
12 |     assert(srt_lang("tests/samples/the_godfather_1972.da.srt") == 'dan')
13 |     assert(srt_lang("tests/samples/the_godfather_1972.en.srt") == 'eng')
14 |     assert(srt_lang("tests/samples/the_godfather_1972.es.srt") == 'spa')
15 |     assert(srt_lang("tests/samples/the_godfather_1972.fr.srt") == 'fra')
16 |     assert(srt_lang("tests/samples/the_godfather_1972.he.srt") == 'heb')
17 |     assert(srt_lang("tests/samples/the_godfather_1972.it.srt") == 'ita')
18 |     assert(srt_lang("tests/samples/the_godfather_1972.ko.srt") == 'kor')
19 |     assert(srt_lang("tests/samples/the_godfather_1972.pl.srt") == 'pol')
20 |     assert(srt_lang("tests/samples/the_godfather_1972.pt.srt") == 'por')
21 |     assert(srt_lang("tests/samples/the_godfather_1972.zh.srt") == 'zho')
22 | 
23 | def test_stream():
24 |     test_detect_subtitle_language()
25 | 


--------------------------------------------------------------------------------
/curator/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import sys
 4 | 
 5 | from collections.abc import Iterable
 6 | 
 7 | def confirm(question, default="yes"):
 8 |     valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
 9 |     if default is None:
10 |         prompt = " [y/n] "
11 |     elif default == "yes":
12 |         prompt = " [Y/n] "
13 |     elif default == "no":
14 |         prompt = " [y/N] "
15 |     else:
16 |         raise ValueError(f"Invalid default answer: '{default}'")
17 | 
18 |     while True:
19 |         sys.stdout.write(question + prompt)
20 |         choice = input().lower()
21 |         if default is not None and choice == "":
22 |             return valid[default]
23 |         elif choice in valid:
24 |             return valid[choice]
25 |         else:
26 |             sys.stdout.write("Please respond 'yes' or 'no' ('y' or 'n').\n")
27 | 
28 | def flatten(xs):
29 |     for x in xs:
30 |         if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
31 |             yield from flatten(x)
32 |         else:
33 |             yield x
34 | 
35 | def find_executable(name, hints=[]):
36 |     path = shutil.which(name)
37 |     if path:
38 |         return path
39 |     for hint in hints:
40 |         if os.path.exists(hint):
41 |             return hint
42 |     return None
43 | 


--------------------------------------------------------------------------------
/curator/task.py:
--------------------------------------------------------------------------------
 1 | class Task:
 2 |     def __init__(self, inputs=[], outputs=[]):
 3 |         self.inputs = inputs
 4 |         self.outputs = outputs
 5 |         self.enabled = True
 6 |         self.warnings = set()
 7 |         self.errors = set()
 8 |         self.id = None
 9 |         self.failed = False
10 | 
11 |     def add_warning(self, warning):
12 |         self.warnings.add(warning)
13 | 
14 |     def add_error(self, error):
15 |         self.errors.add(error)
16 |         self.enabled = False
17 | 
18 |     def combine(self, other):
19 |         assert(self.inputs == other.inputs)
20 |         assert(self.outputs == other.outputs)
21 |         assert(self.enabled == other.enabled)
22 |         self.warnings |= other.warnings
23 |         self.errors |= other.errors
24 | 
25 |     def concat(self, other):
26 |         assert(self.outputs == self.inputs)
27 |         raise Exception("Unimplemented")
28 | 
29 |     # Helpers
30 |     def input_streams(self):
31 |         for media in self.inputs:
32 |             for stream in media.get_streams():
33 |                 yield stream
34 | 
35 |     def input_video_streams(self):
36 |         for stream in self.input_streams():
37 |             if stream.is_video():
38 |                 yield stream
39 | 
40 |     def input_audio_streams(self):
41 |         for stream in self.input_streams():
42 |             if stream.is_audio():
43 |                 yield stream
44 | 
45 |     def input_subtitle_streams(self):
46 |         for stream in self.input_streams():
47 |             if stream.is_subtitle():
48 |                 yield stream
49 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import setuptools
 5 | 
 6 | from pkg_resources import parse_requirements
 7 | 
 8 | CURATOR_VERSION = '0.1.1'
 9 | CURATOR_REPOSITORY_URL = 'https://github.com/AlexAltea/curator'
10 | CURATOR_DOWNLOAD_URL = 'https://github.com/AlexAltea/curator/tarball/' + CURATOR_VERSION
11 | 
12 | # Description
13 | CURATOR_DESCRIPTION = """Curator
14 | =========
15 | 
16 | .. image:: https://github.com/AlexAltea/curator/actions/workflows/ci.yml/badge.svg
17 |     :target: https://github.com/AlexAltea/curator/actions/workflows/ci.yml
18 | 
19 | Automated normalization and curating of media collections. Written in Python 3.x.
20 | 
21 | More information at: https://github.com/AlexAltea/curator
22 | """
23 | 
24 | with open('requirements.txt', 'r') as f:
25 |     requirements = [str(req) for req in parse_requirements(f)]
26 | 
27 | setuptools.setup(
28 |     name='curator',
29 |     version=CURATOR_VERSION,
30 |     description='Automated normalization and curating of media collections',
31 |     long_description=CURATOR_DESCRIPTION,
32 |     license='Apache-2.0',
33 |     author='Alexandro Sanchez Bach',
34 |     author_email='alexandro@phi.nz',
35 |     url=CURATOR_REPOSITORY_URL,
36 |     download_url=CURATOR_DOWNLOAD_URL,
37 |     packages=['curator', 'curator.databases', 'curator.plans'],
38 |     entry_points={
39 |         'console_scripts': ['curator=curator.cli:main'],
40 |     },
41 |     install_requires=requirements,
42 |     classifiers=[
43 |         'Intended Audience :: Developers',
44 |         'License :: OSI Approved :: Apache Software License v2.0',
45 |         'Programming Language :: Python :: 3.8',
46 |         'Natural Language :: English',
47 |     ],
48 | )
49 | 


--------------------------------------------------------------------------------
/curator/analysis.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | YEAR_MIN = 1800
 4 | YEAR_MAX = 2030
 5 | 
 6 | def detect_year(name):
 7 |     """
 8 |     Extract the movie year given the file name.
 9 |     Assumptions:
10 |     - Year is a 4-digit number, optionally surrounded by non-word characters.
11 |     - Year is interpreted as a Gregorian calendar integer in range [YEAR_MIN, YEAR_MAX].
12 |     - Year is the rightmost string satisfying these two conditions.
13 |     - Year is never found at the beginning of the file name.
14 |     """
15 |     matches = re.finditer(r'(?:\b|_)(\d{4})(?:\b|_)', name)
16 |     for match in reversed(list(matches)):
17 |         if match.start() == 0:
18 |             return None
19 |         year = int(match.group(1))
20 |         if YEAR_MIN <= year <= YEAR_MAX:
21 |             return year
22 |     return None
23 | 
24 | def detect_name(name, year=None):
25 |     """
26 |     Extract the movie name given the file name.
27 |     Optionally provide the movie release year, as tokenization hint.
28 |     Assumptions:
29 |     - Name appears before the year.
30 |     - Name does not contatain parenthesis or brackets.
31 |     """
32 |     # Trim anything after year
33 |     if year is None:
34 |         year = detect_year(name)
35 |     if year:
36 |         name = name[:name.rfind(str(year))]
37 |     # Normalize scene releases
38 |     if not ' ' in name:
39 |         name = name.replace('.', ' ')
40 |         name = name.replace('_', ' ')
41 |     # Extract matching left-starting pattern as name
42 |     match = re.match(r'[\w\s\,\.\-\'\&]+', name)
43 |     if match:
44 |         return match[0].strip()
45 |     return None
46 | 
47 | def detect_tags(name):
48 |     """
49 |     Extract the file tags in the file name.
50 |     Assumptions:
51 |     - Tags are surrounded by square brackets.
52 |     - Tags do not contain any kind of brackets within.
53 |     - Multiple tags can exist.
54 |     """
55 |     matches = re.findall(r'\[([\w\-\,\.]+)\]', name)
56 |     return matches
57 | 


--------------------------------------------------------------------------------
/curator/plan.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | # Configuration
 5 | DEFAULT_UI_BACKEND = 'tui'
 6 | 
 7 | class Plan:
 8 |     def __init__(self):
 9 |         self.tasks = []
10 |         self.last_id = 0
11 | 
12 |     def __iter__(self):
13 |         for task in self.tasks:
14 |             yield task
15 | 
16 |     def __len__(self):
17 |         return len(self.tasks)
18 | 
19 |     def __getitem__(self, index):
20 |         return self.tasks[index]
21 | 
22 |     def is_empty(self):
23 |         return len(self.tasks) == 0
24 | 
25 |     def add_task(self, task):
26 |         self.last_id += 1
27 |         self.tasks.append(task)
28 |         task.id = self.last_id
29 | 
30 |     def optimize(self):
31 |         logging.debug('This plan does not support optimizations')
32 | 
33 |     def validate(self):
34 |         outputs = set()
35 |         for task in self.tasks:
36 |             for output in task.outputs:
37 |                 path = output.path
38 |                 if path in outputs:
39 |                     task.add_error(f'Output {path} already exists in the plan')
40 |                 if os.path.exists(path):
41 |                     task.add_error(f'Output {path} already exists in the filesystem')
42 |                 outputs.add(path)
43 | 
44 |     def apply(self):
45 |         for task in self.tasks:
46 |             if task.enabled:
47 |                 try:
48 |                     task.apply()
49 |                 except Exception as e:
50 |                     task.failed = True
51 |                     print(f'Task #{task.id} with input {task.inputs[0]} failed:\n{e}')
52 | 
53 |     def show(self):
54 |         from .tui import print_plan
55 |         thead, tbody = self.show_tasks()
56 |         tbody = list(map(lambda row: tuple(map(str, row)), tbody))
57 |         print_plan(thead, tbody)
58 | 
59 |     def show_tasks(self):
60 |         thead = tuple(map(lambda c: c['name'], self.columns()))
61 |         tbody = []
62 |         for task in self.tasks:
63 |             tbody += task.view()
64 |         return thead, tbody
65 | 
66 |     def edit(self, backend=DEFAULT_UI_BACKEND):
67 |         from .tui import EditorApp
68 |         if backend == 'tui':
69 |             app = EditorApp(self)
70 |             app.run()
71 | 


--------------------------------------------------------------------------------
/curator/plans/sync.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | from curator import Plan, Task, Media
 5 | 
 6 | class SyncPlan(Plan):
 7 |     def columns(self):
 8 |         return [
 9 |             { 'name': 'Input', 'width': '100%' },
10 |             { 'name': 'Old start', 'width': '9' },
11 |             { 'name': '+', 'width': '1' },
12 |             { 'name': 'Delta', 'width': '9' },
13 |             { 'name': '→', 'width': '1' },
14 |             { 'name': "New start", 'width': '9' },
15 |         ]
16 | 
17 | class SyncTask(Task):
18 |     def __init__(self, input, output, start, delta):
19 |         super().__init__([input], [output])
20 |         self.start = start # Just for debugging
21 |         self.delta = delta
22 | 
23 |     def view(self):
24 |         t0 = self.start
25 |         t1 = self.start + self.delta
26 |         dt = self.delta
27 |         return [(self.inputs[0].name, t0, "+", dt, "→", t1)]
28 | 
29 |     def apply(self):
30 |         si = self.inputs[0]
31 |         so = self.outputs[0]
32 | 
33 |         # Build ffmpeg command
34 |         cmd = ['ffmpeg']
35 |         cmd += ['-i', si.media.path]
36 |         cmd += ['-itsoffset', str(self.delta)]
37 |         cmd += ['-i', si.media.path]
38 |         cmd += ['-c:v', 'copy']
39 |         cmd += ['-c:a', 'copy']
40 |         cmd += ['-c:s', 'copy']
41 | 
42 |         # Select streams respecting input order
43 |         for i in range(si.media.num_streams()):
44 |             if i == si.index:
45 |                 cmd += ['-map', f'1:{i}']
46 |             else:
47 |                 cmd += ['-map', f'0:{i}']
48 | 
49 |         # Generate and replace from temporary directory
50 |         with tempfile.TemporaryDirectory(dir=si.media.dir, prefix='.temp-curator-') as tmp:
51 |             output = os.path.join(tmp, f'output.{si.media.ext}')
52 |             cmd += [output]
53 |             result = subprocess.run(cmd, capture_output=True)
54 |             if result.returncode != 0:
55 |                 errors = result.stderr.decode('utf-8')
56 |                 raise Exception(f"Failed to sync {self.outputs[0].name} with ffmpeg:\n{errors}")
57 |             os.replace(output, so.media.path)
58 | 
59 | def plan_sync(media):
60 |     plan = SyncPlan()
61 |     for m in media:
62 |         pass # TODO
63 |     return plan
64 | 


--------------------------------------------------------------------------------
/tests/tests_analysis.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Curator.
 5 | """
 6 | 
 7 | from curator.analysis import *
 8 | 
 9 | def test_analysis_years():
10 |     # Scene-syntax testing
11 |     assert(1990 == detect_year('Jacobs.Ladder.1990.720p.BluRay.x264.YIFY'))
12 |     assert(1968 == detect_year('2001.A.Space.Odyssey.1968.1080p.BluRay.x264-[YTS.AM]'))
13 |     assert(2008 == detect_year('10000.BC.2008_HDRip_[scarabey.org]'))
14 |     assert(2014 == detect_year('Interstellar.2014.4K.UltraHD.BluRay.2160p.x264.DTS-HD.MA.5.1.AAC.5.1-POOP'))
15 |     
16 |     # Custom-syntax testing
17 |     assert(2013 == detect_year('Coherence (2013) [English]'))
18 |     assert(2003 == detect_year('Bad Santa (Extended Cut) (2003) [English]'))
19 |     assert(1984 == detect_year('1984 (1984) [English]'))
20 |     assert(None == detect_year('Ani-Kuri 15 [Japanese]'))
21 | 
22 |     # Stress testing
23 |     assert(None == detect_year('2000'))
24 |     assert(None == detect_year('x2000'))
25 |     assert(2000 == detect_year('1234 2000'))
26 |     assert(2000 == detect_year('1234 2000 1080'))
27 |     assert(2000 == detect_year('1234 2000 1080 x1999'))
28 |     assert(2000 == detect_year('1234 2000 1080 1999x'))
29 |     assert(None == detect_year('1234'))
30 |     assert(None == detect_year(''))
31 | 
32 | def test_analysis_names():
33 |     # Scene-syntax testing
34 |     assert(detect_name('Jacobs.Ladder.1990.720p.BluRay.x264.YIFY')
35 |         == 'Jacobs Ladder')
36 |     assert(detect_name('2001.A.Space.Odyssey.1968.1080p.BluRay.x264-[YTS.AM]')
37 |         == '2001 A Space Odyssey')
38 |     assert(detect_name('10000.BC.2008_HDRip_[scarabey.org]')
39 |         == '10000 BC')
40 |     assert(detect_name('Interstellar.2014.4K.UltraHD.BluRay.2160p.x264.DTS-HD.MA.5.1.AAC.5.1-POOP')
41 |         == 'Interstellar')
42 | 
43 |     # Custom-syntax testing
44 |     assert(detect_name('Coherence (2013) [English]')
45 |         == 'Coherence')
46 |     assert(detect_name('Bad Santa (Extended Cut) (2003) [English]')
47 |         == 'Bad Santa')
48 |     assert(detect_name('1984 (1984) [English]')
49 |         == '1984')
50 |     assert(detect_name('Ani-Kuri 15 [Japanese]')
51 |         == 'Ani-Kuri 15')
52 | 
53 | def test_analysis():
54 |     test_analysis_years()
55 |     test_analysis_names()
56 | 


--------------------------------------------------------------------------------
/curator/databases/omdb.py:
--------------------------------------------------------------------------------
 1 | import bz2
 2 | import csv
 3 | import io
 4 | import logging
 5 | import math
 6 | import os
 7 | 
 8 | import arrow
 9 | import milli
10 | import requests
11 | 
12 | from curator import Database
13 | 
14 | class OmdbDatabase(Database):
15 |     def __init__(self):
16 |         super().__init__("omdb")
17 | 
18 |         # Check if cached index exists
19 |         suffix = arrow.utcnow().format('YYYY-MM-DD')
20 |         cache_name = f'index_milli_{suffix}'
21 |         cache_path = os.path.join(self.cache, cache_name)
22 |         if os.path.exists(cache_path):
23 |             self.ix = milli.Index(cache_path, 1024*1024*1024) # 1 GiB
24 |             return
25 | 
26 |         # Otherwise create one
27 |         logging.info("Creating movie index...")
28 |         csv1 = self.get_omdb_dataset('all_movies')
29 |         csv2 = self.get_omdb_dataset('all_movie_aliases_iso')
30 |         movies = {}
31 |         for row in csv1:
32 |             movie_id = int(row['id'])
33 |             movies[movie_id] = {
34 |                 'id': movie_id,
35 |                 'name': row['name'],
36 |                 'year': row['date'][:4],
37 |                 'aliases': [],
38 |             }
39 |         for row in csv2:
40 |             movie_id = int(row['movie_id'])
41 |             movie = movies.setdefault(movie_id, { 'id': movie_id, 'aliases': [] })
42 |             movie['aliases'].append(row['name'])
43 |         os.mkdir(cache_path)
44 |         self.ix = milli.Index(cache_path, 1024*1024*1024) # 1 GiB
45 |         self.ix.add_documents(list(movies.values()))
46 | 
47 |     def get_omdb_dataset(self, name):
48 |         suffix = arrow.utcnow().format('MM_DD_YYYY')
49 |         cache_name = f'{name}_{suffix}.csv.bz2'
50 |         cache_path = os.path.join(self.cache, cache_name)
51 |         if not os.path.exists(cache_path):
52 |             r = requests.get(f'http://www.omdb.org/data/{name}.csv.bz2')
53 |             with open(cache_path, 'wb') as f:
54 |                 f.write(r.content)
55 | 
56 |         # Parse compressed CSV dataset
57 |         with open(cache_path, 'rb') as f:
58 |             data = bz2.decompress(f.read())
59 |         text = data.decode('utf-8')
60 |         return csv.DictReader(io.StringIO(text), delimiter=',')
61 | 
62 |     def query(self, name, year=None):
63 |         results = self.ix.search(name)
64 |         if not results:
65 |             return None
66 |         movie = self.ix.get_document(results[0])
67 |         return [{
68 |             'name': name,
69 |             'oname': movie.get('name'),
70 |             'year': movie.get('year'),
71 |         }]
72 | 


--------------------------------------------------------------------------------
/curator/databases/tmdb.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import math
 3 | import os
 4 | import sqlite3
 5 | 
 6 | import arrow
 7 | import pandas as pd
 8 | import requests
 9 | from textdistance import levenshtein
10 | 
11 | from curator import Database
12 | 
13 | class TmdbDatabase(Database):
14 |     def __init__(self):
15 |         super().__init__("tmdb")
16 | 
17 |         # Get movie IDs from TMDB, and cache them
18 |         suffix = arrow.utcnow().shift(days=-1).format('MM_DD_YYYY')
19 |         cache_name = f'movie_ids_{suffix}.json.gz'
20 |         cache_path = os.path.join(self.cache, cache_name)
21 |         if not os.path.exists(cache_path):
22 |             r = requests.get(f'http://files.tmdb.org/p/exports/{cache_name}')
23 |             with open(cache_path, 'wb') as f:
24 |                 f.write(r.content)
25 | 
26 |         # Parse movie IDs table
27 |         with open(cache_path, 'rb') as f:
28 |             data = gzip.decompress(f.read())
29 |         text = data.decode('utf-8')
30 |         df = pd.read_json(text, lines=True)
31 | 
32 |         # Convert to FTS5-enabled SQLite databse
33 |         db = sqlite3.connect(':memory:')
34 |         db.execute('CREATE VIRTUAL TABLE movie_ids USING fts5(id, original_title, popularity, adult, video);')
35 |         df.to_sql('movie_ids', db, if_exists='append', index=False)
36 |         self.db = db
37 | 
38 |     def get_year(self, id):
39 |         return None # TODO
40 | 
41 |     def query_exact(self, name, year=None):
42 |         results = self.db.execute(
43 |             f'''SELECT original_title, popularity, id FROM movie_ids
44 |                 WHERE original_title = "{name}"
45 |                 ORDER BY popularity''').fetchall()
46 |         if year:
47 |             results = list(filter(lambda r: year == self.get_year(r[2]), results))
48 |         if not results:
49 |             return None
50 |         r = max(results, key=lambda r: r[1])
51 |         return {
52 |             'name': r[0],
53 |             'year': self.get_year(r[2]),
54 |         }
55 | 
56 |     def query_fuzzy(self, name, year=None):
57 |         results = self.db.execute(
58 |             f'''SELECT original_title, popularity, id FROM movie_ids
59 |                 WHERE original_title MATCH "{name}" AND popularity >= 0.61
60 |                 ORDER BY popularity''').fetchall()
61 |         def score(record):
62 |             original_title, popularity = record[:2]
63 |             distance = levenshtein.distance(original_title, name)
64 |             if distance == 0:
65 |                 return math.inf
66 |             return popularity * 1/distance
67 |         r = max(results, key=score)
68 |         return [{
69 |             'name': r[0],
70 |             'year': self.get_year(r[2]),
71 |         }]
72 | 
73 |     def query(self, name, year=None):
74 |         match = self.query_exact(name, year)
75 |         if match:
76 |             return match
77 |         match = self.query_fuzzy(name, year)
78 |         if match:
79 |             return match
80 |         return None
81 | 


--------------------------------------------------------------------------------
/scripts/generate_screenshots.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import glob
 4 | import os
 5 | import shutil
 6 | import subprocess
 7 | import tempfile
 8 | 
 9 | # Configuration
10 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
11 | PROJECT_ROOT = os.path.join(SCRIPT_DIR, "..")
12 | PROJECT_DOCS = os.path.join(PROJECT_ROOT, "docs")
13 | 
14 | # Data
15 | # TODO: Generate the screenshots from actual output
16 | TERM_CURATOR_MERGE = '''$ curator merge -f mkv ./movies/The*
17 | ┌───┬──────────────────────────────────┬───┬────────────────────────────────┐
18 | │ # │ Inputs                           │ → │ Output                         │
19 | ├───┼──────────────────────────────────┼───┼────────────────────────────────┤
20 | │ 1 │ The Social Network (2010).mkv    │ → │ The Social Network (2010).mkv  │
21 | │   │ The Social Network (2010).es.ac3 │ ↗ │                                │
22 | │   │ The Social Network (2010).en.srt │ ↗ │                                │
23 | │   │ The Social Network (2010).es.srt │ ↗ │                                │
24 | │   │ The Social Network (2010).de.srt │ ↗ │                                │
25 | │ 2 │ There Will Be Blood (2007).mp4   │ → │ There Will Be Blood (2007).mkv │
26 | │   │ There Will Be Blood (2007).srt   │ ↗ │                                │
27 | └───┴──────────────────────────────────┴───┴────────────────────────────────┘
28 | Continue? (y/N) '''
29 | 
30 | TERM_CURATOR_RENAME = '''$ curator rename -f "@name (@year).@ext" ./downloads/*
31 | ┌───┬────────────────────────────────────────────────────┬───┬─────────────────────────────────┐
32 | │ # │ Old                                                │ → │ New                             │
33 | ├───┼────────────────────────────────────────────────────┼───┼─────────────────────────────────┤
34 | │ 1 │ 10000.BC.2008_HDRip_[scarabey.org].mp4             │ → │ 10000 BC (2008).mp4             │
35 | │ 2 │ 2001.A.Space.Odyssey.1968.BluRay.x264-[YTS.AM].mp4 │ → │ 2001 A Space Odyssey (1968).mp4 │
36 | │ 3 │ Jacobs.Ladder.1990.720p.BluRay.x264.YIFY.mkv       │ → │ Jacobs Ladder (1990).mkv        │
37 | │ 4 │ Venom.2018.HDTS.XViD.AC3-ETRG.mkv                  │ → │ Venom (2018).mkv                │
38 | └───┴────────────────────────────────────────────────────┴───┴─────────────────────────────────┘
39 | Continue? (y/N) '''
40 | 
41 | TERM_CURATOR_TAG = '''$ curator tag -s audio -t language --only-macrolanguages .
42 | ┌───┬────────────────────────────────┬────────┬─────┬───┬─────┐
43 | │ # │ Name                           │ Stream │ Old │ → │ New │
44 | ├───┼────────────────────────────────┼────────┼─────┼───┼─────┤
45 | │ 1 │ El Bola (2000).avi             │ 1      │     │ → │ spa │
46 | │ 2 │ Perfect Blue (1997).mkv        │ 1      │     │ → │ jpn │
47 | │ 3 │ Perfect Blue (1997).mkv        │ 2      │     │ → │ eng │
48 | │ 4 │ Saving Private Ryan (1998).mp4 │ 1      │     │ → │ eng │
49 | │ 5 │ The Innocents (2021).mkv       │ 1      │     │ → │ nor │
50 | │ 6 │ Three-Body (2023) - S01E01.mkv │ 1      │ chi │ → │ zho │
51 | └───┴────────────────────────────────┴────────┴─────┴───┴─────┘
52 | Continue? (y/N) '''
53 | 
54 | def termtosvg(text, output):
55 |     term_w = 100
56 |     term_h = text.count('\n') + 1
57 |     cmd = ['termtosvg']
58 |     cmd += ['-t', 'window_frame']
59 |     cmd += ['--screen-geometry', f'{term_w}x{term_h}']
60 |     cmd += ['--still-frames']
61 |     cmd += ['--command', f'echo -n -e {repr(text)}']
62 |     with tempfile.TemporaryDirectory() as tmp:
63 |         cmd += [tmp]
64 |         result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
65 |         assert(result.returncode == 0)
66 |         files = glob.glob(os.path.join(tmp, '*'))
67 |         last = sorted(files)[-1]
68 |         shutil.move(last, output)
69 | 
70 | def main():
71 |     termtosvg(TERM_CURATOR_MERGE,
72 |         os.path.join(PROJECT_DOCS, 'images/curator-merge.svg'))
73 |     termtosvg(TERM_CURATOR_RENAME,
74 |         os.path.join(PROJECT_DOCS, 'images/curator-rename.svg'))
75 |     termtosvg(TERM_CURATOR_TAG,
76 |         os.path.join(PROJECT_DOCS, 'images/curator-tag.svg'))
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Curator
 2 | =======
 3 | 
 4 | [![ci-badge](https://github.com/AlexAltea/curator/actions/workflows/ci.yml/badge.svg)](https://github.com/AlexAltea/curator/actions/workflows/ci.yml)
 5 | 
 6 | Automated normalization and curating of media collections. Written in Python 3.x.
 7 | 
 8 | Curator is a collection of stateless CLI tools, following the [Unix philosophy](https://en.wikipedia.org/wiki/Unix_philosophy), to organize large collections of heterogeneous media. Each tool creates a *plan* made of *tasks* with clearly defined input and output files, which the user can optionally review before applying.
 9 | 
10 | Install the package via:
11 | 
12 | ```sh
13 | pip install git+https://github.com/AlexAltea/curator.git
14 | ```
15 | 
16 | ## Credits
17 | 
18 | Acknowledgements to people who contributed code/ideas to the project:
19 | 
20 | - [Victor Garcia Herrero](https://github.com/VictGH): Mathematician, Machine Learning expert and tamer of scoring functions.
21 | 
22 | ## Features
23 | 
24 | Curator can automatically rename and link media files, edit container metadata, remux and merge streams. Reducing manual labor and achieve reliable results across different media from potentially different sources, some tools rely on signal processing and machine learning (e.g. [Whisper](https://openai.com/blog/whisper/), [LangID](https://github.com/saffsd/langid.py)).
25 | 
26 | Highlighted use cases (current and planned):
27 | 
28 | - [x] Filter media by container and stream metadata (all).
29 | - [x] Rename files based on existing filenames ([`curator-rename`](#rename)).
30 | - [x] Merge streams from multiple related containers ([`curator-merge`](#merge)).
31 | - [x] Detect audio/subtitle language from sound and text data ([`curator-tag`](#tag)).
32 | - [ ] Rename files based on existing metadata and databases ([`curator-rename`](#rename)).
33 | - [ ] Synchronize audio/subtitle streams ([`curator-merge`](#merge) and [`curator-sync`](#sync)).
34 | - [ ] Remove scene banners from subtitles ([`curator-clean`](#clean)).
35 | - [ ] Detect watermarks in video streams ([`curator-clean`](#clean) and [`curator-merge`](#merge)).
36 | - [ ] Select highest quality audio/video streams ([`curator-merge`](#merge)).
37 | 
38 | Below you can find a description and examples of all tools provided by Curator:
39 | 
40 | ### Auto
41 | 
42 | ```mermaid
43 | flowchart LR
44 |     Convert --> Merge --> Sync --> Tag --> Rename
45 | ```
46 | 
47 | ### Merge
48 | 
49 | Merges all streams with identical names into a single container, except for:
50 | 
51 | - Video streams, if one already exists.
52 | - Audio streams, if one with the same `language` tag already exists.
53 | 
54 | Requires all video containers to be MKV.
55 | 
56 | ![example-curator-merge](./docs/images/curator-merge.svg)
57 | 
58 | ### Rename
59 | 
60 | Update filenames according to a pattern made of the following variables:
61 | 
62 | | Key      | Description |
63 | |----------|-------------|
64 | | `@ext`   | File extension of the input media. |
65 | | `@dbid`  | When using a database, the ID of the match, e.g. `imdbid-tt12345678`. |
66 | | `@name`  | Localized name of the media. |
67 | | `@oname` | Original name of the media (needs database). |
68 | | `@tags`  | Tags present in the input media filename enclosed by square brackets, if any. |
69 | | `@year`  | Year the media was released. |
70 | 
71 | ![example-curator-rename](./docs/images/curator-rename.svg)
72 | 
73 | ### Sync
74 | 
75 | Synchronize streams via data cross-correlation.
76 | 
77 | Every synchronization task involves (A) a reference stream, and (B) the stream we want to synchronize. We name this relationship as *A ← B*. Curator can only handle the following types of synchronization tasks:
78 | 
79 | - [ ] *Video ← Audio*:\
80 |     Comparing lip movement timestamps with ASR timestamps.
81 | - [ ] *Audio ← Audio*:\
82 |     Comparing sound data.
83 | - [ ] *Audio ← Subtitle*:\
84 |     Comparing ASR timestamps with uniquely matching text timestamps.
85 | - [ ] *Subtitle ← Subtitle*:\
86 |     Comparing text timestamps.
87 | 
88 | The synchronization plan (`SyncPlan`) will create a tree of synchronization tasks (`SyncTask`) for every media file it processes. For example, with an input `Media("movie.mkv")` with streams: `#0` (video), `#1` (audio:eng), `#2` (audio:spa), `#3` (subtitle:eng), `#4` (subtitle:spa), it will genarate the following sync proposals:
89 | 
90 | 1. `#0` ← `#1`
91 | 2. `#1` ← `#2`
92 | 3. `#1` ← `#3`
93 | 4. `#3` ← `#4`
94 | 
95 | ### Tag
96 | 
97 | ![example-curator-tag](./docs/images/curator-tag.svg)
98 | 


--------------------------------------------------------------------------------
/curator/plans/rename.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | from curator.analysis import *
  5 | from curator.databases import *
  6 | from curator import Plan, Task, Media
  7 | 
  8 | class RenamePlan(Plan):
  9 |     def columns(self):
 10 |         return [
 11 |             { 'name': 'Old', 'width': '50%' },
 12 |             { 'name': 'Source', 'width': '8' },
 13 |             { 'name': '→', 'width': '1' },
 14 |             { 'name': "New", 'width': '50%' },
 15 |         ]
 16 | 
 17 | class RenameTask(Task):
 18 |     def __init__(self, input, name, source, alternatives=[]):
 19 |         super().__init__([input])
 20 |         self.update_output(name)
 21 |         self.source = source
 22 |         self.alternatives = alternatives
 23 | 
 24 |     def update_output(self, name):
 25 |         input = self.inputs[0]
 26 |         output_path = os.path.join(os.path.dirname(input.path), name)
 27 |         output_media = Media(output_path, Media.TYPE_FILE)
 28 |         self.outputs = [output_media]
 29 | 
 30 |     def view(self):
 31 |         name_input = self.inputs[0].name
 32 |         name_output = self.outputs[0].name
 33 |         return [(name_input, self.source, "→", name_output)]
 34 | 
 35 |     def apply(self):
 36 |         src = self.inputs[0].path
 37 |         dst = self.outputs[0].path
 38 |         if not os.path.exists(dst):
 39 |             os.rename(src, dst)
 40 | 
 41 | def normalize(filename):
 42 |     replacements = [
 43 |         (r'([\w!]): ',  r'\1 - '),  # Remove colons when used as separators
 44 |         (r'\.\.\.',     r''),       # Remove ellipsis
 45 |         (r' vs\. ',     r' vs '),   # Remove versus period
 46 |         (r' 1/3 ',      r' ⅓ '),    # Convert to vulgar fractions
 47 |         (r'/',          r'-'),      # Remove slashes
 48 |         (r'\*',         r''),       # Remove stars
 49 |         (r'\?',         r''),       # Remove question marks
 50 |     ]
 51 |     for pattern, replacement in replacements:
 52 |         filename = re.sub(pattern, replacement, filename)
 53 |     return filename
 54 | 
 55 | def format_entry(format, entry, tags, ext):
 56 |     name = entry.get('name')
 57 |     year = entry.get('year')
 58 |     dbid = entry.get('dbid')
 59 |     oname = entry.get('oname')
 60 | 
 61 |     filename = format
 62 |     filename = filename.replace('@name', str(name))
 63 |     filename = filename.replace('@oname', str(oname))
 64 |     filename = normalize(filename)
 65 |     filename = filename.replace('@dbid', str(dbid))
 66 |     filename = filename.replace('@year', str(year))
 67 |     filename = filename.replace('@ext', ext)
 68 |     filename = filename.replace('@tags',
 69 |         ''.join(map(lambda t: f'[{t}] ', tags)))
 70 |     root, ext = os.path.splitext(filename)
 71 |     filename = root.strip() + ext
 72 |     return filename
 73 | 
 74 | def plan_rename(media, format, db=None):
 75 |     plan = RenamePlan()
 76 |     for m in media:
 77 |         # Detect name, year and tags
 78 |         name = detect_name(m.name)
 79 |         year = detect_year(m.name)
 80 |         tags = detect_tags(m.name)
 81 |         dbid = None
 82 |         oname = None
 83 |         source = "analysis"
 84 |         if db and (entries := db.query(name, year)):
 85 |             entry = entries[0]
 86 |             name = entry.get('name')
 87 |             year = entry.get('year')
 88 |             dbid = entry.get('dbid')
 89 |             oname = entry.get('oname')
 90 |             source = db.name
 91 |         if '@name' in format and not name:
 92 |             logging.warning(f"Could not rename: {m.name} (name not detected)")
 93 |             continue
 94 |         if '@year' in format and not year:
 95 |             logging.warning(f"Could not rename: {m.name} (year not detected)")
 96 |             continue
 97 |         if '@dbid' in format and not dbid:
 98 |             logging.warning(f"Could not rename: {m.name} (database id not detected)")
 99 |             continue
100 |         if '@oname' in format and not oname:
101 |             logging.warning(f"Could not rename: {m.name} (original name not found)")
102 |             continue
103 | 
104 |         # Generate new filename
105 |         filename = format
106 |         filename = filename.replace('@name', str(name))
107 |         filename = filename.replace('@oname', str(oname))
108 |         filename = normalize(filename)
109 |         filename = filename.replace('@dbid', str(dbid))
110 |         filename = filename.replace('@year', str(year))
111 |         filename = filename.replace('@ext', m.ext.lower())
112 |         filename = filename.replace('@tags',
113 |             ''.join(map(lambda t: f'[{t}] ', tags)))
114 |         root, ext = os.path.splitext(filename)
115 |         filename = root.strip() + ext
116 | 
117 |         if filename != m.name:
118 |             alternatives = list(map(lambda e: format_entry(format, e, tags, m.ext.lower()), entries))
119 |             task = RenameTask(m, filename, source, alternatives)
120 |             plan.add_task(task)
121 |     return plan
122 | 


--------------------------------------------------------------------------------
/docs/images/curator-tag.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" id="terminal" baseProfile="full" viewBox="0 0 847 285" width="847" version="1.1">
 2 |     <defs>
 3 |         <termtosvg:template_settings xmlns:termtosvg="https://github.com/nbedos/termtosvg">
 4 |             <termtosvg:screen_geometry columns="100" rows="12"/>
 5 |             <termtosvg:animation type="css"/>
 6 |         </termtosvg:template_settings>
 7 |         <style type="text/css" id="generated-style"><![CDATA[#screen {
 8 |                 font-family: 'DejaVu Sans Mono', monospace;
 9 |                 font-style: normal;
10 |                 font-size: 14px;
11 |             }
12 | 
13 |         text {
14 |             dominant-baseline: text-before-edge;
15 |             white-space: pre;
16 |         }
17 |     ]]></style>
18 |         <style type="text/css" id="user-style">
19 |             /* The colors defined below are the default 16 colors used for rendering text of the terminal. Adjust
20 |                them as needed.
21 |                gjm8 color theme (source: https://terminal.sexy/) */
22 |             .foreground {fill: #f8f8f2}
23 |             .background {fill: #272822}
24 |             .color0 {fill: #272822}
25 |             .color1 {fill: #f92672}
26 |             .color2 {fill: #a6e22e}
27 |             .color3 {fill: #f4bf75}
28 |             .color4 {fill: #66d9ef}
29 |             .color5 {fill: #ae81ff}
30 |             .color6 {fill: #a1efe4}
31 |             .color7 {fill: #f8f8f2}
32 |             .color8 {fill: #75715e}
33 |             .color9 {fill: #fd971f}
34 |             .color10 {fill: #383830}
35 |             .color11 {fill: #49483e}
36 |             .color12 {fill: #a59f85}
37 |             .color13 {fill: #f5f4f1}
38 |             .color14 {fill: #cc6633}
39 |             .color15 {fill: #f9f8f5}
40 |         </style>
41 |     </defs>
42 |     <rect id="terminalui" class="background" width="100%" height="100%" ry="4.5826941"/>
43 |     <circle cx="24" cy="23" r="7" class="color1"/>
44 |     <circle cx="44" cy="23" r="7" class="color3"/>
45 |     <circle cx="64" cy="23" r="7" class="color2"/>
46 |     <svg id="screen" width="800" height="204" x="23" y="50" viewBox="0 0 800 204" preserveAspectRatio="xMidYMin slice"><rect class="background" height="100%" width="100%" x="0" y="0"/><defs><g id="g1"><text x="0" textLength="464" class="foreground">$ curator tag -s audio -t language --only-macrolanguages .</text></g><g id="g2"><text x="0" textLength="504" class="foreground">&#9484;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9488;</text></g><g id="g3"><text x="0" textLength="504" class="foreground">&#9474; # &#9474; Name                           &#9474; Stream &#9474; Old &#9474; &#8594; &#9474; New &#9474;</text></g><g id="g4"><text x="0" textLength="504" class="foreground">&#9500;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9508;</text></g><g id="g5"><text x="0" textLength="504" class="foreground">&#9474; 1 &#9474; El Bola (2000).avi             &#9474; 1      &#9474;     &#9474; &#8594; &#9474; spa &#9474;</text></g><g id="g6"><text x="0" textLength="504" class="foreground">&#9474; 2 &#9474; Perfect Blue (1997).mkv        &#9474; 1      &#9474;     &#9474; &#8594; &#9474; jpn &#9474;</text></g><g id="g7"><text x="0" textLength="504" class="foreground">&#9474; 3 &#9474; Perfect Blue (1997).mkv        &#9474; 2      &#9474;     &#9474; &#8594; &#9474; eng &#9474;</text></g><g id="g8"><text x="0" textLength="504" class="foreground">&#9474; 4 &#9474; Saving Private Ryan (1998).mp4 &#9474; 1      &#9474;     &#9474; &#8594; &#9474; eng &#9474;</text></g><g id="g9"><text x="0" textLength="504" class="foreground">&#9474; 5 &#9474; The Innocents (2021).mkv       &#9474; 1      &#9474;     &#9474; &#8594; &#9474; nor &#9474;</text></g><g id="g10"><text x="0" textLength="504" class="foreground">&#9474; 6 &#9474; Three-Body (2023) - S01E01.mkv &#9474; 1      &#9474; chi &#9474; &#8594; &#9474; zho &#9474;</text></g><g id="g11"><text x="0" textLength="504" class="foreground">&#9492;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9496;</text></g><g id="g12"><text x="0" textLength="128" class="foreground">Continue? (y/N) </text><text x="128" textLength="8" class="background"> </text></g></defs><g><use xlink:href="#g1" y="0"/><use xlink:href="#g2" y="17"/><use xlink:href="#g3" y="34"/><use xlink:href="#g4" y="51"/><use xlink:href="#g5" y="68"/><use xlink:href="#g6" y="85"/><use xlink:href="#g7" y="102"/><use xlink:href="#g8" y="119"/><use xlink:href="#g9" y="136"/><use xlink:href="#g10" y="153"/><use xlink:href="#g11" y="170"/><rect x="128" y="187" width="8" height="17" class="foreground"/><use xlink:href="#g12" y="187"/></g></svg>
47 | </svg>


--------------------------------------------------------------------------------
/docs/images/curator-rename.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" id="terminal" baseProfile="full" viewBox="0 0 847 251" width="847" version="1.1">
 2 |     <defs>
 3 |         <termtosvg:template_settings xmlns:termtosvg="https://github.com/nbedos/termtosvg">
 4 |             <termtosvg:screen_geometry columns="100" rows="10"/>
 5 |             <termtosvg:animation type="css"/>
 6 |         </termtosvg:template_settings>
 7 |         <style type="text/css" id="generated-style"><![CDATA[#screen {
 8 |                 font-family: 'DejaVu Sans Mono', monospace;
 9 |                 font-style: normal;
10 |                 font-size: 14px;
11 |             }
12 | 
13 |         text {
14 |             dominant-baseline: text-before-edge;
15 |             white-space: pre;
16 |         }
17 |     ]]></style>
18 |         <style type="text/css" id="user-style">
19 |             /* The colors defined below are the default 16 colors used for rendering text of the terminal. Adjust
20 |                them as needed.
21 |                gjm8 color theme (source: https://terminal.sexy/) */
22 |             .foreground {fill: #f8f8f2}
23 |             .background {fill: #272822}
24 |             .color0 {fill: #272822}
25 |             .color1 {fill: #f92672}
26 |             .color2 {fill: #a6e22e}
27 |             .color3 {fill: #f4bf75}
28 |             .color4 {fill: #66d9ef}
29 |             .color5 {fill: #ae81ff}
30 |             .color6 {fill: #a1efe4}
31 |             .color7 {fill: #f8f8f2}
32 |             .color8 {fill: #75715e}
33 |             .color9 {fill: #fd971f}
34 |             .color10 {fill: #383830}
35 |             .color11 {fill: #49483e}
36 |             .color12 {fill: #a59f85}
37 |             .color13 {fill: #f5f4f1}
38 |             .color14 {fill: #cc6633}
39 |             .color15 {fill: #f9f8f5}
40 |         </style>
41 |     </defs>
42 |     <rect id="terminalui" class="background" width="100%" height="100%" ry="4.5826941"/>
43 |     <circle cx="24" cy="23" r="7" class="color1"/>
44 |     <circle cx="44" cy="23" r="7" class="color3"/>
45 |     <circle cx="64" cy="23" r="7" class="color2"/>
46 |     <svg id="screen" width="800" height="170" x="23" y="50" viewBox="0 0 800 170" preserveAspectRatio="xMidYMin slice"><rect class="background" height="100%" width="100%" x="0" y="0"/><defs><g id="g1"><text x="0" textLength="432" class="foreground">$ curator rename -f "@name (@year).@ext" ./downloads/*</text></g><g id="g2"><text x="0" textLength="768" class="foreground">&#9484;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9488;</text></g><g id="g3"><text x="0" textLength="768" class="foreground">&#9474; # &#9474; Old                                                &#9474; &#8594; &#9474; New                             &#9474;</text></g><g id="g4"><text x="0" textLength="768" class="foreground">&#9500;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9508;</text></g><g id="g5"><text x="0" textLength="768" class="foreground">&#9474; 1 &#9474; 10000.BC.2008_HDRip_[scarabey.org].mp4             &#9474; &#8594; &#9474; 10000 BC (2008).mp4             &#9474;</text></g><g id="g6"><text x="0" textLength="768" class="foreground">&#9474; 2 &#9474; 2001.A.Space.Odyssey.1968.BluRay.x264-[YTS.AM].mp4 &#9474; &#8594; &#9474; 2001 A Space Odyssey (1968).mp4 &#9474;</text></g><g id="g7"><text x="0" textLength="768" class="foreground">&#9474; 3 &#9474; Jacobs.Ladder.1990.720p.BluRay.x264.YIFY.mkv       &#9474; &#8594; &#9474; Jacobs Ladder (1990).mkv        &#9474;</text></g><g id="g8"><text x="0" textLength="768" class="foreground">&#9474; 4 &#9474; Venom.2018.HDTS.XViD.AC3-ETRG.mkv                  &#9474; &#8594; &#9474; Venom (2018).mkv                &#9474;</text></g><g id="g9"><text x="0" textLength="768" class="foreground">&#9492;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9496;</text></g><g id="g10"><text x="0" textLength="128" class="foreground">Continue? (y/N) </text><text x="128" textLength="8" class="background"> </text></g></defs><g><use xlink:href="#g1" y="0"/><use xlink:href="#g2" y="17"/><use xlink:href="#g3" y="34"/><use xlink:href="#g4" y="51"/><use xlink:href="#g5" y="68"/><use xlink:href="#g6" y="85"/><use xlink:href="#g7" y="102"/><use xlink:href="#g8" y="119"/><use xlink:href="#g9" y="136"/><rect x="128" y="153" width="8" height="17" class="foreground"/><use xlink:href="#g10" y="153"/></g></svg>
47 | </svg>


--------------------------------------------------------------------------------
/docs/images/curator-merge.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" id="terminal" baseProfile="full" viewBox="0 0 847 302" width="847" version="1.1">
 2 |     <defs>
 3 |         <termtosvg:template_settings xmlns:termtosvg="https://github.com/nbedos/termtosvg">
 4 |             <termtosvg:screen_geometry columns="100" rows="13"/>
 5 |             <termtosvg:animation type="css"/>
 6 |         </termtosvg:template_settings>
 7 |         <style type="text/css" id="generated-style"><![CDATA[#screen {
 8 |                 font-family: 'DejaVu Sans Mono', monospace;
 9 |                 font-style: normal;
10 |                 font-size: 14px;
11 |             }
12 | 
13 |         text {
14 |             dominant-baseline: text-before-edge;
15 |             white-space: pre;
16 |         }
17 |     ]]></style>
18 |         <style type="text/css" id="user-style">
19 |             /* The colors defined below are the default 16 colors used for rendering text of the terminal. Adjust
20 |                them as needed.
21 |                gjm8 color theme (source: https://terminal.sexy/) */
22 |             .foreground {fill: #f8f8f2}
23 |             .background {fill: #272822}
24 |             .color0 {fill: #272822}
25 |             .color1 {fill: #f92672}
26 |             .color2 {fill: #a6e22e}
27 |             .color3 {fill: #f4bf75}
28 |             .color4 {fill: #66d9ef}
29 |             .color5 {fill: #ae81ff}
30 |             .color6 {fill: #a1efe4}
31 |             .color7 {fill: #f8f8f2}
32 |             .color8 {fill: #75715e}
33 |             .color9 {fill: #fd971f}
34 |             .color10 {fill: #383830}
35 |             .color11 {fill: #49483e}
36 |             .color12 {fill: #a59f85}
37 |             .color13 {fill: #f5f4f1}
38 |             .color14 {fill: #cc6633}
39 |             .color15 {fill: #f9f8f5}
40 |         </style>
41 |     </defs>
42 |     <rect id="terminalui" class="background" width="100%" height="100%" ry="4.5826941"/>
43 |     <circle cx="24" cy="23" r="7" class="color1"/>
44 |     <circle cx="44" cy="23" r="7" class="color3"/>
45 |     <circle cx="64" cy="23" r="7" class="color2"/>
46 |     <svg id="screen" width="800" height="221" x="23" y="50" viewBox="0 0 800 221" preserveAspectRatio="xMidYMin slice"><rect class="background" height="100%" width="100%" x="0" y="0"/><defs><g id="g1"><text x="0" textLength="288" class="foreground">$ curator merge -f mkv ./movies/The*</text></g><g id="g2"><text x="0" textLength="616" class="foreground">&#9484;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9516;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9488;</text></g><g id="g3"><text x="0" textLength="616" class="foreground">&#9474; # &#9474; Inputs                           &#9474; &#8594; &#9474; Output                         &#9474;</text></g><g id="g4"><text x="0" textLength="616" class="foreground">&#9500;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9532;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9508;</text></g><g id="g5"><text x="0" textLength="616" class="foreground">&#9474; 1 &#9474; The Social Network (2010).mkv    &#9474; &#8594; &#9474; The Social Network (2010).mkv  &#9474;</text></g><g id="g6"><text x="0" textLength="616" class="foreground">&#9474;   &#9474; The Social Network (2010).es.ac3 &#9474; &#8599; &#9474;                                &#9474;</text></g><g id="g7"><text x="0" textLength="616" class="foreground">&#9474;   &#9474; The Social Network (2010).en.srt &#9474; &#8599; &#9474;                                &#9474;</text></g><g id="g8"><text x="0" textLength="616" class="foreground">&#9474;   &#9474; The Social Network (2010).es.srt &#9474; &#8599; &#9474;                                &#9474;</text></g><g id="g9"><text x="0" textLength="616" class="foreground">&#9474;   &#9474; The Social Network (2010).de.srt &#9474; &#8599; &#9474;                                &#9474;</text></g><g id="g10"><text x="0" textLength="616" class="foreground">&#9474; 2 &#9474; There Will Be Blood (2007).mp4   &#9474; &#8594; &#9474; There Will Be Blood (2007).mkv &#9474;</text></g><g id="g11"><text x="0" textLength="616" class="foreground">&#9474;   &#9474; There Will Be Blood (2007).srt   &#9474; &#8599; &#9474;                                &#9474;</text></g><g id="g12"><text x="0" textLength="616" class="foreground">&#9492;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9524;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9496;</text></g><g id="g13"><text x="0" textLength="128" class="foreground">Continue? (y/N) </text><text x="128" textLength="8" class="background"> </text></g></defs><g><use xlink:href="#g1" y="0"/><use xlink:href="#g2" y="17"/><use xlink:href="#g3" y="34"/><use xlink:href="#g4" y="51"/><use xlink:href="#g5" y="68"/><use xlink:href="#g6" y="85"/><use xlink:href="#g7" y="102"/><use xlink:href="#g8" y="119"/><use xlink:href="#g9" y="136"/><use xlink:href="#g10" y="153"/><use xlink:href="#g11" y="170"/><use xlink:href="#g12" y="187"/><rect x="128" y="204" width="8" height="17" class="foreground"/><use xlink:href="#g13" y="204"/></g></svg>
47 | </svg>


--------------------------------------------------------------------------------
/curator/plans/tag.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import os
  3 | import subprocess
  4 | import tempfile
  5 | import shutil
  6 | 
  7 | from curator import Plan, Task, Media
  8 | from curator.util import *
  9 | 
 10 | class TagPlan(Plan):
 11 |     def columns(self):
 12 |         return [
 13 |             { 'name': 'Name', 'width': '50%' },
 14 |             { 'name': 'Stream', 'width': '6' },
 15 |             { 'name': "Old", 'width': '25%' },
 16 |             { 'name': '→', 'width': '1' },
 17 |             { 'name': "New", 'width': '25%' },
 18 |         ]
 19 | 
 20 |     def optimize(self):
 21 |         tasks = []
 22 |         last_id = 0
 23 |         last_task = None
 24 |         for task in self.tasks:
 25 |             if last_task and last_task.inputs == task.inputs:
 26 |                 last_task.combine(task)
 27 |             else:
 28 |                 last_id += 1
 29 |                 last_task = task
 30 |                 tasks.append(task)
 31 |                 task.id = last_id
 32 |         self.tasks = tasks
 33 | 
 34 | class TagTask(Task):
 35 |     TagUpdate = collections.namedtuple('TagUpdate', ('index', 'tag', 'old', 'new'))
 36 | 
 37 |     def __init__(self, input):
 38 |         super().__init__([input], [])
 39 |         self.updates = []
 40 |         self.use_mkvpropedit = False
 41 |         self.path_mkvpropedit = None
 42 | 
 43 |     def combine(self, other):
 44 |         super().combine(other)
 45 |         self.updates += other.updates
 46 | 
 47 |     def view(self):
 48 |         rows = []
 49 |         for update in self.updates:
 50 |             rows.append([self.inputs[0].name, str(update.index), str(update.old), "→", str(update.new)])
 51 |         return rows
 52 | 
 53 |     def add_update(self, index, tag, old, new=None):
 54 |         self.updates.append(self.TagUpdate(index, tag, old, new))
 55 | 
 56 |     def apply(self):
 57 |         if self.use_mkvpropedit and self.path_mkvpropedit:
 58 |             self.apply_with_mkvpropedit()
 59 |             return
 60 | 
 61 |         m = self.inputs[0]
 62 |         cmd = ['ffmpeg']
 63 |         cmd += ['-i', m.path]
 64 |         cmd += ['-c:v', 'copy']
 65 |         cmd += ['-c:a', 'copy']
 66 |         cmd += ['-c:s', 'copy']
 67 |         cmd += ['-map', '0']
 68 |         cmd += ['-map_metadata', '0']
 69 | 
 70 |         for update in self.updates:
 71 |             cmd += [f'-metadata:s:{update.index}', f'{update.tag}={update.new}']
 72 | 
 73 |             # Tweaks
 74 |             s = m.get_streams()[update.index]
 75 |             if m.is_format('avi') and update.tag == 'language':
 76 |                 if (audio_index := s.audio_index()) not in range(9):
 77 |                     raise Exception("RIFF IASx tags should only support up to 9 audio tracks")
 78 |                 cmd += ['-metadata', f'IAS{audio_index + 1}={update.new}']
 79 | 
 80 |         with tempfile.TemporaryDirectory(dir=m.dir, prefix='.temp-curator-') as tmp:
 81 |             output = os.path.join(tmp, f'output.{m.ext}')
 82 |             cmd += [output]
 83 |             result = subprocess.run(cmd, capture_output=True)
 84 |             if result.returncode != 0:
 85 |                 errors = result.stderr.decode('utf-8')
 86 |                 raise Exception(f"Failed to update tags in {m.name} with ffmpeg:\n{errors}")
 87 |             os.replace(output, m.path)
 88 | 
 89 |     def apply_with_mkvpropedit(self):
 90 |         m = self.inputs[0]
 91 |         cmd = [self.path_mkvpropedit, m.path]
 92 |         cmd += ['--disable-language-ietf'] # Fix error 0xc00d3e8c in Windows Movies & TV app
 93 |         for update in self.updates:
 94 |             cmd += ['--edit', f'track:{update.index+1}']
 95 |             cmd += ['--set', f'{update.tag}={update.new}']
 96 |         result = subprocess.run(cmd, capture_output=True)
 97 |         if result.returncode != 0:
 98 |             errors = result.stderr.decode('utf-8')
 99 |             raise Exception(f"Failed to update tags in {m.name} with mkvpropedit:\n{errors}")
100 | 
101 | def tag_value(stream, tag, opts=None):
102 |     try:
103 |         if tag == 'language':
104 |             return stream.detect_language(opts)
105 |     except Exception as e:
106 |         print(f'Could not process {stream.media.path}:\n{e}')
107 |     return None
108 | 
109 | def plan_tag(media, stype, tag, value=None, skip_tagged=False, opts=None):
110 |     path_mkvpropedit = find_executable('mkvpropedit', [
111 |         'C:/Program Files/MKVToolNix/mkvpropedit.exe',
112 |         'C:/Program Files (x86)/MKVToolNix/mkvpropedit.exe',
113 |     ])
114 |     plan = TagPlan()
115 |     for m in media:
116 |         # Skip files with formats that do not support tagging
117 |         if m.is_format('subviewer'):
118 |             continue
119 | 
120 |         for stream in m.get_streams():
121 |             # Filter streams and get old tag value
122 |             stream_info = stream.get_info()
123 |             if stream_info['codec_type'] not in ('audio', 'subtitle'):
124 |                 continue
125 |             if stype != 'all' and stream_info['codec_type'] != stype:
126 |                 continue
127 |             stream_value = stream_info['tags'].get(tag) or \
128 |                            stream_info['tags'].get(tag.lower()) or \
129 |                            stream_info['tags'].get(tag.upper())
130 |             if skip_tagged and stream_value is not None:
131 |                 continue
132 | 
133 |             # Create tag update task
134 |             task = TagTask(m)
135 |             if m.is_format('avi'):
136 |                 task.add_warning("Modifying AVI metadata might affect stream synchronization.")
137 |                 if tag == 'languge' and stream.get_info()['codec_type'] == 'audio' and stream.audio_index() > 8:
138 |                     task.add_error("Cannot change AVI audio stream using IASx tags. Index out of range.")
139 |             if m.is_format('matroska'):
140 |                 task.use_mkvpropedit = True
141 |                 task.path_mkvpropedit = path_mkvpropedit
142 |             old_value = stream_value
143 |             new_value = value if value is not None else tag_value(stream, tag, opts)
144 |             if old_value != new_value and new_value is not None:
145 |                 task.add_update(stream.index, tag, old_value, new_value)
146 |                 plan.add_task(task)
147 |     return plan
148 | 


--------------------------------------------------------------------------------
/curator/databases/imdb.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import gzip
  3 | import hashlib
  4 | import io
  5 | import logging
  6 | import math
  7 | import os
  8 | import re
  9 | 
 10 | import arrow
 11 | import milli
 12 | import requests
 13 | import textdistance
 14 | 
 15 | from curator import Database
 16 | from curator.util import confirm
 17 | 
 18 | IMDB_DISCLAIMER = """IMDb Dataset Terms and Conditions:
 19 | By using this backend you agree to IMDb Datesets terms and conditions.
 20 | Read more at https://www.imdb.com/interfaces/ regarding compliance with
 21 | the IMdb Non-Commercial Licensing and copyright/license.
 22 | 
 23 | To proceed, please confirm that Curator and data at ~/.cache/curator/imdb
 24 | is and will be used *only* for personal and non-commercial purposes.
 25 | """
 26 | 
 27 | IMDB_ACKNOWLEDGEMENT = """Information courtesy of IMDb (https://www.imdb.com).
 28 | Used with permission."""
 29 | 
 30 | class ImdbDatabase(Database):
 31 |     def __init__(self, cache_days):
 32 |         super().__init__("imdb")
 33 | 
 34 |         # Common suffix
 35 |         today = arrow.utcnow().format('YYYY-MM-DD')
 36 | 
 37 |         # Check IMDb terms
 38 |         terms_hash = hashlib.sha256(IMDB_DISCLAIMER.encode('utf-8')).hexdigest()[:32]
 39 |         terms_path = os.path.join(self.cache, f'terms_{terms_hash}')
 40 |         if not os.path.exists(terms_path):
 41 |             print(IMDB_DISCLAIMER)
 42 |             if confirm("Do you agree/confirm this?", default='no'):
 43 |                 with open(terms_path, 'w') as f:
 44 |                     f.write(f'```\n{IMDB_DISCLAIMER}```\n\nAgreed on {today}.\n')
 45 |             else:
 46 |                 print("Curator cannot continue without accepting the IMDb terms.")
 47 |                 exit(0)
 48 |         print(IMDB_ACKNOWLEDGEMENT)
 49 | 
 50 |         # Check if cached index exists
 51 |         for day in list(range(cache_days)) + [0]:
 52 |             day = arrow.utcnow().shift(days=-day).format('YYYY-MM-DD')
 53 |             cache_name = f'index_milli_{day}'
 54 |             cache_path = os.path.join(self.cache, cache_name)
 55 |             if os.path.exists(cache_path):
 56 |                 logging.info(f"Using cached movie index from {day}")
 57 |                 self.ix = milli.Index(cache_path, 4*1024*1024*1024) # 4 GiB
 58 |                 return
 59 | 
 60 |         # Otherwise create one
 61 |         logging.info("Creating movie index...")
 62 |         title_akas = self.get_imdb_dataset('title.akas')
 63 |         title_basics = self.get_imdb_dataset('title.basics')
 64 |         title_ratings = self.get_imdb_dataset('title.ratings')
 65 |         movies = {}
 66 |         for row in title_basics:
 67 |             if row['titleType'] != 'movie' or row['startYear'] == '\\N':
 68 |                 continue
 69 |             movie_id = row['tconst']
 70 |             movies[movie_id] = {
 71 |                 'id': movie_id,
 72 |                 'name': row['primaryTitle'],
 73 |                 'year': row['startYear'],
 74 |                 'akas': [],
 75 |                 'votes': 0,
 76 |             }
 77 |         for row in title_akas:
 78 |             movie_id = row['titleId']
 79 |             movie = movies.get(movie_id)
 80 |             if movie is None:
 81 |                 continue
 82 |             movie['akas'].append(row['title'])
 83 |         for row in title_ratings:
 84 |             movie_id = row['tconst']
 85 |             movie = movies.get(movie_id)
 86 |             if movie is None:
 87 |                 continue
 88 |             movie['votes'] = int(row['numVotes'])
 89 |         os.mkdir(cache_path)
 90 |         self.ix = milli.Index(cache_path, 4*1024*1024*1024) # 4 GiB
 91 |         self.ix.add_documents(list(movies.values()))
 92 | 
 93 |     def get_imdb_dataset(self, name):
 94 |         today = arrow.utcnow().format('YYYY-MM-DD')
 95 |         cache_name = f'{name}_{today}.tsv.gz'
 96 |         cache_path = os.path.join(self.cache, cache_name)
 97 |         if not os.path.exists(cache_path):
 98 |             r = requests.get(f'https://datasets.imdbws.com/{name}.tsv.gz')
 99 |             with open(cache_path, 'wb') as f:
100 |                 f.write(r.content)
101 | 
102 |         # Parse compressed CSV dataset
103 |         with gzip.open(cache_path) as gz:
104 |             text = gz.read().decode('utf-8')
105 |         return csv.DictReader(io.StringIO(text), delimiter='\t', quoting=csv.QUOTE_NONE)
106 | 
107 |     def query(self, name, year=None):
108 |         results = self.ix.search(name)
109 |         if not results:
110 |             return None
111 |         movies = self.ix.get_documents(results)
112 |         if year is not None:
113 |             movies_year_exact = list(filter(lambda m: int(m['year']) == year, movies))
114 |             if len(movies_year_exact) > 0:
115 |                 movies = movies_year_exact
116 |             else:
117 |                 movies_year_above = list(filter(lambda m: int(m['year']) == year + 1, movies))
118 |                 movies_year_below = list(filter(lambda m: int(m['year']) == year - 1, movies))
119 |                 movies = movies_year_above + movies_year_below
120 |         if not movies:
121 |             return None
122 |         for movie in movies:
123 |             titles = [movie['name']] + movie['akas']
124 |             distance = min(map(lambda title: textdistance.levenshtein(title, name), titles))
125 |             popularity = math.log10(movie['votes'] + 1)
126 |             movie['score'] = popularity - distance
127 |         # Return closest matches
128 |         movies = [{
129 |             'name': name,
130 |             'oname': movie.get('name'),
131 |             'year': movie.get('year'),
132 |             'dbid': 'imdbid-' + movie.get('id'),
133 |         } for movie in sorted(movies, key=lambda m: m['score'], reverse=True)][:10]
134 |         return movies
135 | 
136 |     def query_id(self, name):
137 |         match = re.search(r'tt\d{7,9}', name)
138 |         if not match:
139 |             return None
140 |         id = match.group()
141 |         results = self.ix.search(name)
142 |         if not results:
143 |             return None
144 |         movies = self.ix.get_documents(results)
145 |         for movie in movies:
146 |             if movie.get('id') == id:
147 |                 return [{
148 |                     'name': name,
149 |                     'oname': movie.get('name'),
150 |                     'year': movie.get('year'),
151 |                     'dbid': 'imdbid-' + movie.get('id'),
152 |                 }]
153 |         return None
154 | 


--------------------------------------------------------------------------------
/curator/plans/convert.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import subprocess
  4 | import tempfile
  5 | 
  6 | from curator import Plan, Task, Media
  7 | from curator.util import flatten
  8 | 
  9 | class ConvertPlan(Plan):
 10 |     def columns(self):
 11 |         return [
 12 |             { 'name': 'Inputs', 'width': '50%' },
 13 |             { 'name': '→', 'width': '1' },
 14 |             { 'name': "Output", 'width': '50%' },
 15 |         ]
 16 | 
 17 | class ConvertTask(Task):
 18 |     def __init__(self, input, output, format, delete=False):
 19 |         super().__init__([input], [output])
 20 |         assert(output.type == Media.TYPE_FILE)
 21 |         self.format = format
 22 |         self.delete = delete
 23 |         self.fflags = set()
 24 |         self.cflags = set()
 25 |         self.mflags = set()
 26 |         self.unpack_bframes = False
 27 |         self.skip_subtitles = False
 28 | 
 29 |     def view(self):
 30 |         return [(self.inputs[0].name, "→", self.outputs[0].name)]
 31 | 
 32 |     def apply(self):
 33 |         temp = None
 34 |         # Solve conflict when -fflags +genpts and -bsf:v mpeg4_unpack_bframes are both enabled
 35 |         input_media = self.inputs[0].path
 36 |         if self.unpack_bframes and '+genpts' in self.fflags:
 37 |             temp = tempfile.TemporaryDirectory(dir=self.inputs[0].dir, prefix='.temp-curator-')
 38 |             fixed_media = os.path.join(temp.name, "media.avi")
 39 |             cmd = ['ffmpeg']
 40 |             cmd += ['-i', input_media]
 41 |             cmd += ['-c:v', 'copy']
 42 |             cmd += ['-c:a', 'copy']
 43 |             cmd += ['-bsf:v', 'mpeg4_unpack_bframes']
 44 |             cmd += ['-map', '0']
 45 |             if self.skip_subtitles:
 46 |                 cmd += ['-map', '-0:s']
 47 |             cmd += ['-map_metadata', '0']
 48 |             cmd += ['-movflags', 'use_metadata_tags']
 49 |             cmd += [fixed_media]
 50 |             result = subprocess.run(cmd, capture_output=True)
 51 |             if result.returncode != 0:
 52 |                 errors = result.stderr.decode('utf-8')
 53 |                 raise Exception(f"Failed to generate PTS for {self.inputs[0].name} with ffmpeg:\n{errors}")
 54 |             input_media = fixed_media
 55 | 
 56 |         # Build ffmpeg command
 57 |         cmd = ['ffmpeg']
 58 |         if self.fflags:
 59 |             cmd += ['-fflags', ''.join(self.fflags)]
 60 |         cmd += ['-i', input_media]
 61 |         cmd += ['-c:v', 'copy']
 62 |         cmd += ['-c:a', 'copy']
 63 |         cmd += ['-c:s', 'copy']
 64 |         cmd += ['-c:d', 'copy']
 65 |         cmd += ['-c:t', 'copy']
 66 |         if self.cflags:
 67 |             cmd += flatten(self.cflags)
 68 |         if self.unpack_bframes and '+genpts' not in self.fflags:
 69 |             cmd += ['-bsf:v', 'mpeg4_unpack_bframes']
 70 |         cmd += ['-map', '0']
 71 |         if self.skip_subtitles:
 72 |             cmd += ['-map', '-0:s']
 73 |         if self.mflags:
 74 |             cmd += flatten(self.mflags)
 75 |         cmd += ['-map_metadata', '0']
 76 |         cmd += ['-movflags', 'use_metadata_tags']
 77 | 
 78 |         # Create output file
 79 |         output = self.outputs[0].path
 80 |         cmd += [output]
 81 |         result = subprocess.run(cmd, capture_output=True)
 82 |         if temp:
 83 |             temp.cleanup()
 84 |         if result.returncode != 0:
 85 |             if os.path.exists(output):
 86 |                 os.remove(output)
 87 |             errors = result.stderr.decode('utf-8')
 88 |             raise Exception(f"Failed to convert to {output} with ffmpeg:\n{errors}")
 89 |         if self.delete:
 90 |             os.remove(self.inputs[0].path)
 91 | 
 92 |     def add_fflag(self, flag):
 93 |         self.fflags.add(flag)
 94 | 
 95 |     def add_cflag(self, flag):
 96 |         self.cflags.add(flag)
 97 | 
 98 |     def add_mflag(self, flag):
 99 |         self.mflags.add(flag)
100 | 
101 | def plan_convert(media, format, delete=False):
102 |     plan = ConvertPlan()
103 |     for m in media:
104 |         root, ext = os.path.splitext(m.path)
105 |         output_path = f'{root}.{format}'
106 |         if os.path.exists(output_path):
107 |             logging.debug(f'Skipping existing output: {output_path}')
108 |             continue
109 |         output_media = Media(output_path, Media.TYPE_FILE)
110 |         task = ConvertTask(m, output_media, format, delete)
111 | 
112 |         # Tweaks for mismatching formats
113 |         if m.get_info()['format_name'] in ('avi', 'ogg') and m.has_video():
114 |             task.add_warning(f'Media contains packets without PTS data.')
115 |             task.add_fflag('+genpts')
116 |         if m.get_info()['format_name'] == 'avi' and m.has_video_codec('h264'):
117 |             task.add_error('AVI contains H264 stream. Unpacking is required, but not supported.')
118 |         if m.get_info()['format_name'] == 'avi' and m.has_subtitle():
119 |             task.skip_subtitles = True
120 |             task.add_warning('AVI contains subtitles. Conversion is not supported.')
121 |         if m.has_packed_bframes():
122 |             task.unpack_bframes = True
123 |             task.add_warning(f'Media contains packed B-frames. Unpacking is required.')
124 |         if format == 'mkv':
125 |             for stream in m.get_streams():
126 |                 # Reencode MP4/TX3G to MKV/SRT
127 |                 if stream.get_info()['codec_type'] == "subtitle" and \
128 |                    stream.get_info()['codec_name'] == "mov_text":
129 |                     task.add_warning(f'Conversion requires reencoding {stream}. Styles will be removed.')
130 |                     task.add_cflag(('-c:s', 'text'))
131 |         if format == 'mkv':
132 |             for stream in m.get_streams():
133 |                 # Drop MP4/TMCD
134 |                 if stream.get_info()['codec_type'] == "data" and \
135 |                    stream.get_info()['tags'].get('handler_name') == "Time Code Media Handler":
136 |                     task.add_warning('Chapters have been included in {stream}. Stream will be dropped.')
137 |                     task.add_mflag(('-map', f'-0:{stream.index}'))
138 |                 if stream.get_info()['codec_type'] == "data" and \
139 |                    stream.get_info()['tags'].get('handler_name') == "SubtitleHandler":
140 |                     task.add_warning('Chapters have been included in {stream}. Stream will be dropped, but chapters might be carried over by ffmpeg.')
141 |                     task.add_mflag(('-map', f'-0:{stream.index}'))
142 |                 # Drop binary data
143 |                 if stream.get_info()['codec_type'] == "data" and \
144 |                    stream.get_info()['codec_name'] == "bin_data":
145 |                     task.add_warning('Binary data has been included in {stream}. Stream will be dropped.')
146 |                     task.add_mflag(('-map', f'-0:{stream.index}'))
147 |         plan.add_task(task)
148 |     return plan
149 | 


--------------------------------------------------------------------------------
/curator/media.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import glob
  3 | import json
  4 | import operator
  5 | import os
  6 | import subprocess
  7 | 
  8 | from .stream import *
  9 | 
 10 | # Extensions
 11 | VIDEO_EXTENSIONS = ['avi', 'flv', 'm4v', 'mkv', 'mov', 'mp4', 'mpg', 'wmv']
 12 | AUDIO_EXTENSIONS = ['mp3', 'aac', 'ogg']
 13 | TEXTS_EXTENSIONS = ['srt', 'ass']
 14 | 
 15 | class Media:
 16 |     # Type
 17 |     TYPE_FILE = 1
 18 |     TYPE_LINK = 2
 19 | 
 20 |     def __init__(self, path, type=None):
 21 |         self.path = path
 22 | 
 23 |         # Detect regular file or link
 24 |         if type is None:
 25 |             if os.path.isfile(path):
 26 |                 self.type = Media.TYPE_FILE
 27 |             elif os.path.islink(path):
 28 |                 self.type = Media.TYPE_LINK
 29 |         else:
 30 |             self.type = type
 31 | 
 32 |         # Cache directory, filename and extension
 33 |         root, ext = os.path.splitext(path)
 34 |         self.name = os.path.basename(path)
 35 |         self.dir = os.path.dirname(root)
 36 |         self.ext = ext[1:]
 37 | 
 38 |         # Cache media information
 39 |         self.info = None
 40 |         self.streams = None
 41 |         self.packets = None
 42 | 
 43 |     def __repr__(self):
 44 |         return f'Media("{self.name}")'
 45 | 
 46 |     def has_video_ext(self):
 47 |         return self.ext in VIDEO_EXTENSIONS
 48 | 
 49 |     def has_audio_ext(self):
 50 |         return self.ext in AUDIO_EXTENSIONS
 51 | 
 52 |     def has_subtitle_ext(self):
 53 |         return self.ext in TEXTS_EXTENSIONS
 54 | 
 55 |     def has_video(self):
 56 |         return any(map(lambda s: s.is_video(), self.get_streams()))
 57 | 
 58 |     def has_audio(self):
 59 |         return any(map(lambda s: s.is_audio(), self.get_streams()))
 60 | 
 61 |     def has_subtitle(self):
 62 |         return any(map(lambda s: s.is_subtitle(), self.get_streams()))
 63 | 
 64 |     def has_video_codec(self, codec_name):
 65 |         return any(map(lambda s: s.is_video() and s.get_info()['codec_name'] == codec_name,
 66 |             self.get_streams()))
 67 | 
 68 |     def has_subtitle_codec(self, codec_name):
 69 |         return any(map(lambda s: s.is_subtitle() and s.get_info()['codec_name'] == codec_name,
 70 |             self.get_streams()))
 71 | 
 72 |     def has_packed_bframes(self):
 73 |         if self.get_info()['format_name'] != 'avi':
 74 |             return False
 75 |         for stream in self.get_streams():
 76 |             if stream.is_video() and stream.has_packed_bframes():
 77 |                 return True
 78 |         return False
 79 | 
 80 |     def is_format(self, name):
 81 |         return name in self.get_info()['format_name'].split(',')
 82 | 
 83 |     def get_info(self):
 84 |         if self.info:
 85 |             return self.info
 86 |         cmd = ['ffprobe', self.path]
 87 |         cmd += ['-show_format']
 88 |         cmd += ['-of', 'json']
 89 |         result = subprocess.run(cmd, capture_output=True)
 90 |         if result.returncode != 0:
 91 |             errors = result.stderr.decode('utf-8')
 92 |             raise Exception(f"Failed to  get info from {self.path} with ffmpeg:\n{errors}")
 93 |         output = result.stdout.decode('utf-8')
 94 |         self.info = json.loads(output)['format']
 95 |         return self.info
 96 | 
 97 |     def get_packets(self):
 98 |         if self.packets:
 99 |             return self.packets
100 |         cmd = ['ffprobe', self.path]
101 |         cmd += ['-show_packets']
102 |         cmd += ['-of', 'json']
103 |         result = subprocess.run(cmd, capture_output=True)
104 |         if result.returncode != 0:
105 |             errors = result.stderr.decode('utf-8')
106 |             raise Exception(f"Failed to get packets from {self.path} with ffmpeg:\n{errors}")
107 |         output = result.stdout.decode('utf-8')
108 |         self.packets = json.loads(output)['packets']
109 |         return self.packets
110 | 
111 |     def get_streams(self):
112 |         if self.streams is not None:
113 |             return self.streams
114 | 
115 |         # Obtain information about streams within media
116 |         cmd = ['ffprobe', self.path]
117 |         cmd += ['-show_streams']
118 |         cmd += ['-of', 'json']
119 |         result = subprocess.run(cmd, capture_output=True)
120 |         if result.returncode != 0:
121 |             errors = result.stderr.decode('utf-8')
122 |             raise Exception(f"Failed get info from {self.path} with ffmpeg:\n{errors}")
123 |         output = result.stdout.decode('utf-8')
124 |         streams_info = json.loads(output)['streams']
125 | 
126 |         # Create and return stream objects
127 |         streams = []
128 |         for stream_info in streams_info:
129 |             stream_info.setdefault('tags', {})
130 |             stream = Stream(self, stream_info['index'], stream_info)
131 |             streams.append(stream)
132 |         self.streams = streams
133 |         return streams
134 | 
135 |     def num_streams():
136 |         return len(get_streams())
137 | 
138 | 
139 | def parse_query(query):
140 |     lhs, rhs = query.split('=')
141 |     path = lhs.split('.')
142 |     return { 'lhs_path': path, 'op': operator.eq, 'rhs_value': rhs }
143 | 
144 | def filter_streams(streams, query):
145 |     results = []
146 |     query = parse_query(query)
147 |     for stream in streams:
148 |         try:
149 |             lhs = functools.reduce(dict.get, query['lhs_path'], stream.get_info())
150 |         except TypeError:
151 |             continue
152 |         rhs = query['rhs_value']
153 |         if query['op'](lhs, rhs):
154 |             results.append(stream)
155 |     return results
156 | 
157 | def filter_check(media, queries):
158 |     if not queries:
159 |         return True
160 |     streams = media.get_streams()
161 |     for query in queries:
162 |         streams = filter_streams(streams, query)
163 |         if len(streams) == 0:
164 |             return False
165 |     return True
166 | 
167 | def media_input(paths, recursive=False, queries=[]):
168 |     media = []
169 |     for path in paths:
170 |         # Add files
171 |         if os.path.isfile(path):
172 |             m = Media(path)
173 |             if filter_check(m, queries):
174 |                 media.append(m)
175 |         # Add directories
176 |         elif os.path.isdir(path):
177 |             path = os.path.join(path, '*')
178 |             for path in glob.glob(path, recursive=recursive):
179 |                 if os.path.isfile(path):
180 |                     m = Media(path)
181 |                     if filter_check(m, queries):
182 |                         media.append(m)
183 |         # Add wildcards (needed for Windows)
184 |         elif '*' in path:
185 |             for path in glob.glob(path, recursive=recursive):
186 |                 if os.path.isfile(path):
187 |                     m = Media(path)
188 |                     if filter_check(m, queries):
189 |                         media.append(m)
190 |     return media
191 | 


--------------------------------------------------------------------------------
/curator/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import logging
  5 | import os
  6 | import pathlib
  7 | import sys
  8 | 
  9 | import curator
 10 | from curator.databases import get_database
 11 | from curator.util import confirm
 12 | 
 13 | # Helpers
 14 | def curator_argparser():
 15 |     parser = argparse.ArgumentParser()
 16 |     parser.add_argument('input', nargs='+', type=str)
 17 |     parser.add_argument('--log', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], default='WARNING')
 18 |     parser.add_argument('-q', '--query', action='append', help="metadata filter(s), e.g. `tags.language=eng`", default=[])
 19 |     parser.add_argument('-y', action='store_true') # Auto-yes
 20 |     parser.add_argument('-n', action='store_true') # Auto-no
 21 |     parser.add_argument('-r', action='store_true') # Recursive
 22 |     return parser
 23 | 
 24 | def curator_args(parser, argv):
 25 |     args = parser.parse_args(argv)
 26 |     logging.basicConfig(format='%(asctime)s | %(levelname)s | %(message)s',
 27 |         level=getattr(logging, args.log), stream=sys.stderr)
 28 |     return args
 29 | 
 30 | def curator_input(args):
 31 |     media = curator.media_input(args.input, recursive=args.r, queries=args.query)
 32 |     logging.info(f'Analyzing {len(media)} input media files')
 33 |     return media
 34 | 
 35 | def curator_handle_plan(plan, args):
 36 |     plan.validate()
 37 |     if plan.is_empty():
 38 |         print('Current plan requires no tasks. There is nothing to be done.')
 39 |         return
 40 | 
 41 |     # Dry run
 42 |     if args.n:
 43 |         plan.show()
 44 |         return
 45 |     # Blind run
 46 |     if args.y:
 47 |         curator_apply_plan(plan)
 48 |         return
 49 |     # Interactive mode (default)
 50 |     plan.edit()
 51 |     tasks_enabled = len([t for t in plan if t.enabled])
 52 |     print(f"After changes, the current plan has {tasks_enabled} tasks enabled out of {len(plan)}.")
 53 |     if confirm("Apply plan?", default="no"):
 54 |         curator_apply_plan(plan)
 55 | 
 56 | def curator_apply_plan(plan):
 57 |     plan.optimize()
 58 |     plan.apply()
 59 |     tasks_failed = len([t for t in plan if t.failed])
 60 |     if not tasks_failed:
 61 |         print('All tasks completed successfully')
 62 |     else:
 63 |         print('Some tasks failed:')
 64 |         for task in plan:
 65 |             if not task.failed: continue
 66 |             print(f'- Task #{task.id} with input {task.inputs[0]} failed')
 67 | 
 68 | # Usage
 69 | CURATOR_USAGE = '''
 70 | Usage: curator <command> [<args>]
 71 | 
 72 | The following commands are supported:
 73 |   convert  Convert files to a different format.
 74 |   link     Create symbolic links to files in another directory.
 75 |   merge    Merge related files into a single container.
 76 |   rename   Rename files according to their metadata.
 77 |   tag      Update stream metadata/tags.
 78 | '''.strip()
 79 | 
 80 | def curator_convert(argv):
 81 |     parser = curator_argparser()
 82 |     parser.add_argument('-d', '--delete', action='store_true', help='delete inputs after converting')
 83 |     parser.add_argument('-f', '--format', choices=['mkv'], required=True)
 84 |     args = curator_args(parser, argv)
 85 | 
 86 |     from curator.plans import plan_convert
 87 |     media = curator_input(args)
 88 |     plan = plan_convert(media, args.format, args.delete)
 89 |     curator_handle_plan(plan, args)
 90 | 
 91 | def curator_link(argv):
 92 |     parser = curator_argparser()
 93 |     parser.add_argument('-o', '--output', required=True)
 94 |     args = curator_args(parser, argv)
 95 | 
 96 |     from curator.plans import plan_link
 97 |     media = curator_input(args)
 98 |     plan = plan_link(media, args.output)
 99 |     curator_handle_plan(plan, args)
100 | 
101 | def curator_merge(argv):
102 |     parser = curator_argparser()
103 |     parser.add_argument('-d', '--delete', action='store_true', help='delete inputs after merging')
104 |     parser.add_argument('-f', '--format', choices=['mkv'], default='mkv')
105 | 
106 |     # Video stream selection
107 |     parser.add_argument('--try-video-criteria', required=False, default='resolution,codec,fps',
108 |         help='Comma-separated list of video criteria in decreasing order of importance')
109 |     parser.add_argument('--try-video-codecs', required=False, default='hevc,h264,mpeg4',
110 |         help='Comma-separated list of video codec in decreasing order of preference')
111 |     parser.add_argument('--min-video-resolution', required=False, default=None,
112 |         help='Try to discard video streams below this resolution')
113 |     parser.add_argument('--max-video-resolution', required=False, default=None,
114 |         help='Try to discard video streams above this resolution')
115 |     parser.add_argument('--min-video-bitrate', required=False, default=None,
116 |         help='Try to discard video streams below this bitrate')
117 |     parser.add_argument('--max-video-bitrate', required=False, default=None,
118 |         help='Try to discard video streams above this bitrate')
119 | 
120 |     args = curator_args(parser, argv)
121 |     if args.min_video_resolution or args.min_video_bitrate or \
122 |        args.max_video_resolution or args.max_video_bitrate:
123 |         raise Exception("Unsupported argument")
124 |     select = lambda *keys: { k: vars(args)[k] for k in keys }
125 |     opts = select(
126 |         'try_video_criteria',
127 |         'try_video_codecs',
128 |         'min_video_resolution',
129 |         'max_video_resolution',
130 |         'min_video_bitrate',
131 |         'max_video_bitrate')
132 |     for k in ('try_video_criteria', 'try_video_codecs'):
133 |         opts[k] = opts[k].split(',')
134 | 
135 |     from curator.plans import plan_merge
136 |     media = curator_input(args)
137 |     plan = plan_merge(media, args.format, args.delete, opts)
138 |     curator_handle_plan(plan, args)
139 | 
140 | def curator_rename(argv):
141 |     parser = curator_argparser()
142 |     parser.add_argument('-f', '--format', default="@name (@year).@ext")
143 |     parser.add_argument('-d', '--db', required=False)
144 |     parser.add_argument('--db-cache-days', required=False, type=int, default=30,
145 |         help='Update database if older than N days. Set to 0 to force refresh (default: 30 days)')
146 |     args = curator_args(parser, argv)
147 | 
148 |     from curator.plans import plan_rename
149 |     db = get_database(args.db, args.db_cache_days) if args.db else None
150 |     media = curator_input(args)
151 |     plan = plan_rename(media, args.format, db)
152 |     curator_handle_plan(plan, args)
153 | 
154 | def curator_tag(argv):
155 |     parser = curator_argparser()
156 |     parser.add_argument('-s', '--streams', default="all", choices=["all", "audio", "subtitle"])
157 |     parser.add_argument('-t', '--tag', required=True, choices=["language"])
158 |     parser.add_argument('-v', '--value', required=False)
159 |     parser.add_argument('--skip-tagged', action='store_true',
160 |         help='skip streams if a valid tag already exists')
161 |     # Tag-specific options
162 |     parser.add_argument('--only-macrolanguages', action='store_true',
163 |         help='when detecting languages, consider only macrolanguages. ' +
164 |              'e.g. this will map `nno`/`nnb` detections into `nor`.')
165 |     parser.add_argument('--max-audio-samples', type=int, default=10,
166 |         help='when detecting languages in audio, max number of samples to extract.')
167 |     parser.add_argument('--min-score', type=float, default=0.8,
168 |         help='when detecting languages in audio, max number of samples to extract.')
169 |     args = curator_args(parser, argv)
170 | 
171 |     # Select relevant options
172 |     select = lambda *keys: { k: vars(args)[k] for k in keys }
173 |     if args.tag == 'language':
174 |         opts = select('only_macrolanguages', 'max_audio_samples', 'min_score')
175 | 
176 |     from curator.plans import plan_tag
177 |     media = curator_input(args)
178 |     plan = plan_tag(media, args.streams, args.tag, args.value, args.skip_tagged, opts)
179 |     curator_handle_plan(plan, args)
180 | 
181 | def main():
182 |     commands = {
183 |         'convert': curator_convert,
184 |         'link': curator_link,
185 |         'merge': curator_merge,
186 |         'rename': curator_rename,
187 |         'tag': curator_tag,
188 |     }
189 | 
190 |     # If no arguments are provided
191 |     if len(sys.argv) < 2 or sys.argv[1] in ('-h', '--help'):
192 |         print('Curator: Automated normalization and curating of media collections.\n')
193 |         print(CURATOR_USAGE)
194 |         return
195 | 
196 |     # Configure logging
197 |     blacklist = ['langid']
198 |     for name in blacklist:
199 |         logging.getLogger(name).setLevel(logging.ERROR)
200 | 
201 |     # Dispatch command otherwise
202 |     command = sys.argv[1]
203 |     handler = commands.get(command)
204 |     if not handler:
205 |         print('Unsupported command "{}"\n'.format(command))
206 |         print(CURATOR_USAGE)
207 |         exit(1)
208 |     handler(sys.argv[2:])
209 | 
210 | if __name__ == '__main__':
211 |     main()
212 | 


--------------------------------------------------------------------------------
/curator/plans/merge.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import subprocess
  5 | import tempfile
  6 | 
  7 | from curator import Plan, Task, Media
  8 | from curator import VIDEO_EXTENSIONS
  9 | 
 10 | # Default options
 11 | DEF_OPTS_MERGE = {
 12 |     # Video selection
 13 |     'try_video_criteria': ['resolution', 'codec', 'fps', 'length'],
 14 |     'try_video_codecs': ['hevc', 'h264', 'mpeg4'],
 15 |     'min_video_resolution': None,
 16 |     'max_video_resolution': None,
 17 |     'min_video_bitrate': None,
 18 |     'max_video_bitrate': None,
 19 | 
 20 |     # Audio selection
 21 |     'try_audio_criteria': ['codec', 'bitrate', 'channels'],
 22 |     'try_audio_codecs': ['flac', 'dts', 'eac3', 'ac3', 'mp3'],
 23 |     'min_audio_bitrate': None,
 24 |     'max_audio_bitrate': None,
 25 | 
 26 |     # Subtitle selection
 27 |     'try_subtitle_criteria': [],
 28 | }
 29 | 
 30 | class MergePlan(Plan):
 31 |     def columns(self):
 32 |         return [
 33 |             { 'name': 'Inputs', 'width': '50%' },
 34 |             { 'name': '→', 'width': '1' },
 35 |             { 'name': "Output", 'width': '50%' },
 36 |         ]
 37 | 
 38 |     def validate(self):
 39 |         # Overriding Plan.validate since MergePlans allow for inplace merges
 40 |         # where the output already exists in the filesystem.
 41 |         outputs = set()
 42 |         for task in self.tasks:
 43 |             for output in task.outputs:
 44 |                 path = output.path
 45 |                 if path in outputs:
 46 |                     task.add_error(f'Output {path} already exists in the plan')
 47 |                 outputs.add(path)
 48 | 
 49 | class MergeTask(Task):
 50 |     def __init__(self, inputs, output, format, delete):
 51 |         super().__init__(inputs, [output])
 52 |         self.format = format
 53 |         self.delete = delete
 54 |         self.selected_streams = []
 55 | 
 56 |     def view(self):
 57 |         rows = []
 58 |         for m in self.inputs:
 59 |             #  Check if input used
 60 |             m_selected_streams = list(filter(lambda s: s.media == m, self.selected_streams))
 61 |             if len(m_selected_streams) == 0:
 62 |                 continue
 63 |             # Show selected media
 64 |             rows.append((m.name,
 65 |                 '↗' if rows else '→',
 66 |                 ' ' if rows else self.outputs[0].name,
 67 |             ))
 68 |             # Show selected streams if strict subset of available
 69 |             if len(m_selected_streams) < len(m.get_streams()):
 70 |                 for stream in m_selected_streams:
 71 |                     rows.append((f' - {stream}', '↗', ''))
 72 |         return rows
 73 | 
 74 |     def select_streams(self, video, audio, subtitle):
 75 |         self.selected_streams = [video]
 76 |         self.selected_streams += audio
 77 |         self.selected_streams += subtitle
 78 | 
 79 |     def apply(self):
 80 |         # Build ffmpeg command
 81 |         cmd = ['ffmpeg']
 82 |         for minput in self.inputs:
 83 |             cmd += ['-i', minput.path]
 84 |         cmd += ['-c:v', 'copy']
 85 |         cmd += ['-c:a', 'copy']
 86 |         cmd += ['-c:s', 'copy']
 87 |         for stream in self.selected_streams:
 88 |             index_m = self.inputs.index(stream.media)
 89 |             index_s = stream.index
 90 |             cmd += ['-map', f'{index_m}:{index_s}']
 91 | 
 92 |         # Create output file
 93 |         output = self.outputs[0]
 94 |         with tempfile.TemporaryDirectory(dir=output.dir, prefix='.temp-curator-') as tmp:
 95 |             output_tmp = os.path.join(tmp, f'output.{output.ext}')
 96 |             cmd += [output_tmp]
 97 |             result = subprocess.run(cmd, capture_output=True)
 98 |             if result.returncode != 0:
 99 |                 errors = result.stderr.decode('utf-8')
100 |                 raise Exception(f"Failed to merge into {output.name} with ffmpeg:\n{errors}")
101 |             os.replace(output_tmp, output.path)
102 |             if self.delete:
103 |                 for media in self.inputs:
104 |                     if media.path == output.path:
105 |                         continue # Do not accidentally remove output after in-place merges
106 |                     os.remove(media.path)
107 | 
108 | def select_codec(s1, s2, codec_list):
109 |     codec_scores = { codec: len(codec_list) - index for index, codec in enumerate(codec_list) }
110 |     s1_codec = s1.get_info()['codec_name']
111 |     s2_codec = s2.get_info()['codec_name']
112 |     s1_codec_score = codec_scores.get(s1_codec, 0)
113 |     s2_codec_score = codec_scores.get(s2_codec, 0)
114 |     if s1_codec_score == 0:
115 |         logging.warning(f"Select criteria does not consider codec {s1_codec} in stream {s1}")
116 |     if s2_codec_score == 0:
117 |         logging.warning(f"Select criteria does not consider codec {s2_codec} in stream {s2}")
118 |     if s1_codec_score > s2_codec_score:
119 |         return s1
120 |     if s2_codec_score > s1_codec_score:
121 |         return s2
122 |     return None
123 | 
124 | def select_video_stream(s1, s2, opts=DEF_OPTS_MERGE):
125 |     if s1 is None:
126 |         return s2
127 |     if s2 is None:
128 |         return s1
129 |     for criterion in opts['try_video_criteria']:
130 |         # Resolution:
131 |         # - Consider width as some sources include black bars which might increase the height.
132 |         # - Only consider 10% increases as meaningful enough to trigger this criterion.
133 |         if criterion == 'resolution':
134 |             if s1.get_info()['width'] / s2.get_info()['width'] > 1.1:
135 |                 return s1
136 |             if s2.get_info()['width'] / s1.get_info()['width'] > 1.1:
137 |                 return s2
138 |         # Codec
139 |         elif criterion == 'codec':
140 |             stream = select_codec(s1, s2, opts['try_video_codecs'])
141 |             if stream is not None:
142 |                 return stream
143 |         # Frames
144 |         elif criterion == 'fps':
145 |             if s1.get_frame_rate() > s2.get_frame_rate():
146 |                 return s1
147 |             if s2.get_frame_rate() > s1.get_frame_rate():
148 |                 return s2
149 |         else:
150 |             raise Exception(f"Unknown video selection criterion: {criterion}")
151 |     logging.warning(f'Video criteria could not select between {s1} and {s2}')
152 |     return s1
153 | 
154 | def select_audio_stream(s1, s2, opts=DEF_OPTS_MERGE):
155 |     for criterion in opts['try_audio_criteria']:
156 |         # Codec
157 |         if criterion == 'codec':
158 |             stream = select_codec(s1, s2, opts['try_audio_codecs'])
159 |             if stream is not None:
160 |                 return stream
161 |         # Bitrate
162 |         elif criterion == 'bitrate':
163 |             if int(s1.get_info()['bit_rate']) > int(s2.get_info()['bit_rate']):
164 |                 return s1
165 |             if int(s2.get_info()['bit_rate']) > int(s1.get_info()['bit_rate']):
166 |                 return s2
167 |         # Channels
168 |         elif criterion == 'channels':
169 |             if s1.get_info()['channels'] > s2.get_info()['channels']:
170 |                 return s1
171 |             if s2.get_info()['channels'] > s1.get_info()['channels']:
172 |                 return s2
173 |         else:
174 |             raise Exception(f"Unknown video selection criterion: {criterion}")
175 |     logging.warning(f'Audio criteria could not select between {s1} and {s2}')
176 |     return s1
177 | 
178 | def select_subtitle_stream(s1, s2, opts=DEF_OPTS_MERGE):
179 |     for criterion in opts['try_subtitle_criteria']:
180 |         pass
181 |     logging.warning(f'Subtitle criteria could not select between {s1} and {s2}')
182 |     return s1
183 | 
184 | def find_related(target, media):
185 |     basename, _ = os.path.splitext(target.name)
186 |     matches = []
187 |     for m in media:
188 |         if basename in m.name and m is not target:
189 |             matches.append(m)
190 |     # Try to find dedicated subtitle folders in lone video files
191 |     dirname = os.path.dirname(target.path)
192 |     dirents = glob.glob(os.path.join(dirname, '*'))
193 |     if len([f for f in dirents if os.path.isfile(f) and os.path.splitext(f)[1][1:] in VIDEO_EXTENSIONS]) == 1:
194 |         for entry in dirents:
195 |             if os.path.isdir(entry) and os.path.basename(entry).lower() in ('subs', 'subtitles'):
196 |                 subs = map(lambda path: Media(path), glob.glob(os.path.join(entry, '*.srt')))
197 |                 matches += list(subs)
198 |     return matches
199 | 
200 | def plan_merge(media, format, delete=False, opts=DEF_OPTS_MERGE):
201 |     plan = MergePlan()
202 |     # Identify related files
203 |     for m in media:
204 |         if not m.is_format('matroska'):
205 |             continue
206 |         basepath, _ = os.path.splitext(m.path)
207 |         output = Media(f'{basepath}.{format}', Media.TYPE_FILE)
208 |         related = find_related(m, media)
209 |         if len(related) >= 1:
210 |             task = MergeTask([m] + related, output, format, delete)
211 |             plan.add_task(task)
212 |     # Choose which streams to preserve starting with video
213 |     for task in plan:
214 |         video_stream = None
215 |         for s in task.input_video_streams():
216 |             video_stream = select_video_stream(video_stream, s)
217 |         # Then audio
218 |         audio_streams = []
219 |         for curr in task.input_audio_streams():
220 |             inserted = False
221 |             for index, prev in enumerate(audio_streams):
222 |                 curr_lang = curr.get_info()['tags'].get('language')
223 |                 prev_lang = prev.get_info()['tags'].get('language')
224 |                 # FIXME: Do not remove anything while merging
225 |                 if False and curr_lang == prev_lang != None:
226 |                     audio_streams[index] = select_audio_stream(prev, curr)
227 |                     inserted = True
228 |                     break
229 |             if not inserted:
230 |                 audio_streams.append(curr)
231 |         # Then subtitles
232 |         subtitle_streams = []
233 |         for curr in task.input_subtitle_streams():
234 |             inserted = False
235 |             for index, prev in enumerate(subtitle_streams):
236 |                 curr_lang = curr.get_info()['tags'].get('language')
237 |                 prev_lang = prev.get_info()['tags'].get('language')
238 |                 # FIXME: Do not remove anything while merging
239 |                 if False and curr_lang == prev_lang != None:
240 |                     subtitle_streams[index] = select_subtitle_stream(prev, curr)
241 |                     inserted = True
242 |                     break
243 |             if not inserted:
244 |                 subtitle_streams.append(curr)
245 |         task.select_streams(video_stream, audio_streams, subtitle_streams)
246 |     return plan
247 | 


--------------------------------------------------------------------------------
/curator/tui.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import shutil
  3 | import time
  4 | 
  5 | import numpy
  6 | 
  7 | from rich.style import Style
  8 | from rich.table import Table
  9 | from rich.text import Text
 10 | from textual.app import App, ComposeResult, RenderResult
 11 | from textual.binding import Binding
 12 | from textual.containers import Horizontal, Vertical
 13 | from textual.geometry import Size
 14 | from textual.keys import Keys
 15 | from textual.screen import Screen
 16 | from textual.widgets.data_table import ColumnKey, RowKey
 17 | from textual.widgets._header import HeaderIcon
 18 | from textual.widgets import Button, Header, Footer, DataTable, Static
 19 | 
 20 | from curator.plans import RenameTask
 21 | 
 22 | class TaskFlow(DataTable):
 23 |     COMPONENT_CLASSES = {
 24 |         "taskflow--task-odd",
 25 |         "taskflow--task-even",
 26 |         "taskflow--task-disabled",
 27 |     }
 28 |     DEFAULT_CSS = """
 29 |         TaskFlow > .taskflow--task-odd {
 30 |             background: $primary 0%;
 31 |         }
 32 |         TaskFlow > .taskflow--task-even {
 33 |             background: $primary 10%;
 34 |         }
 35 |         TaskFlow > .taskflow--task-disabled {
 36 |             color: $text 25%;
 37 |         }
 38 |     """
 39 | 
 40 |     def __init__(self, plan, *args, **kwargs):
 41 |         super().__init__(*args, **kwargs)
 42 |         self.cursor_type = 'row'
 43 |         self.zebra_stripes = False
 44 |         self.styles.overflow_x = "hidden"
 45 |         self.plan = plan
 46 | 
 47 |     def on_mount(self):
 48 |         for column in self.get_columns():
 49 |             name = column['name']
 50 |             align = "right" if name == "#" else None
 51 |             label = Text(name, overflow='ellipsis', justify=align)
 52 |             self.add_column(label, key=name)
 53 |         self.update_tasks()
 54 | 
 55 |     def update_tasks(self):
 56 |         self.clear()
 57 |         for task in self.plan:
 58 |             view = task.view()
 59 |             row = self.task_to_row(task)
 60 |             self.add_row(*row, height=len(view))
 61 | 
 62 |     def update_task(self, index):
 63 |         task = self.plan[index]
 64 |         row = self.task_to_row(task)
 65 |         for cell, value in enumerate(row):
 66 |             self.update_cell_at((index, cell), value)
 67 | 
 68 |     @staticmethod
 69 |     def task_to_row(task):
 70 |         view = task.view()
 71 |         row = map(lambda c: '\n'.join(c), zip(*view))
 72 |         row = [Text(str(task.id), justify="right", overflow='ellipsis')] + \
 73 |             list(map(lambda text: Text(text, overflow='ellipsis'), row))
 74 |         return row
 75 | 
 76 |     def get_columns(self):
 77 |         first_width = str(len(str(len(self.plan))))
 78 |         columns = [{ 'name': '#', 'width': str(first_width) }] + self.plan.columns()
 79 |         return columns
 80 | 
 81 |     def compute_column_widths(self, w):
 82 |         columns = self.get_columns()
 83 |         # First reserve absolute widths
 84 |         for col in columns:
 85 |             if not col['width'].endswith('%'):
 86 |                 col_width = int(col['width'])
 87 |                 col['width'] = col_width
 88 |                 w -= col_width
 89 |         # Then reserve relative widths
 90 |         scale = 1
 91 |         for col in columns:
 92 |             if isinstance(col['width'], str):
 93 |                 col_ratio = float(col['width'][:-1]) / 100
 94 |                 col_width = round(w * col_ratio * scale)
 95 |                 col['width'] = col_width
 96 |                 scale_div = (1 - col_ratio * scale)
 97 |                 scale = scale_div and scale / scale_div or math.inf # Avoid division by zero
 98 |                 w -= col_width
 99 |         # Adjust last column
100 |         if w != 0:
101 |             col['width'] += w
102 |         return columns
103 | 
104 |     def on_resize(self, event):
105 |         cols = self.compute_column_widths(event.size.width)
106 |         for c in cols:
107 |             key = ColumnKey(c['name'])
108 |             col = self.columns.get(key)
109 |             if col:
110 |                 col.width = c['width']
111 |                 col.auto_width = False
112 |         self._require_update_dimensions = True
113 | 
114 |     # HACK: Overriding private method
115 |     def _render_line_in_row(self, row_key, line_no, base_style, cursor_location, hover_location):
116 |         index = self._row_locations.get(row_key)
117 |         style = "taskflow--task-odd"
118 |         if index is not None:
119 |             if index % 2:
120 |                 style = "taskflow--task-even"
121 |             if not self.plan[index].enabled:
122 |                 style = "taskflow--task-disabled"
123 |         style = self.get_component_styles(style).rich_style
124 |         return super()._render_line_in_row(row_key, line_no, style, cursor_location, hover_location)
125 | 
126 | 
127 | class TaskAlternatives(DataTable):
128 |     def __init__(self, plan, *args, **kwargs):
129 |         super().__init__(*args, **kwargs)
130 |         self.cursor_type = 'row'
131 |         self.zebra_stripes = False
132 |         self.styles.dock = "right"
133 |         self.styles.width = "30%"
134 |         self.styles.margin = (1, 0, 1, 1)
135 |         self.styles.overflow_x = "hidden"
136 |         self.visible = False
137 |         self.display = False
138 |         self.plan = plan
139 | 
140 |         self.add_column('#',)
141 |         self.add_column('Name')
142 |         self.update(plan[0])
143 | 
144 |     def update(self, task):
145 |         self.clear()
146 |         if isinstance(task, RenameTask):
147 |             self.add_rows(enumerate(task.alternatives))
148 | 
149 | 
150 | class EditorApp(App):
151 |     TITLE = "Curator"
152 |     BINDINGS = [
153 |         ("q", "quit", "Quit"),
154 |         #("f", "view_flow", "View flow"),
155 |         #("c", "view_commands", "View commands"),
156 |         ("d", "disable_all", "Disable all"),
157 |         ("e", "enable_all", "Enable all"),
158 |         ("a", "toggle_alternatives", "Toggle alternatives"),
159 |     ]
160 | 
161 |     def __init__(self, plan):
162 |         super().__init__()
163 |         self.plan = plan
164 | 
165 |     def compose(self) -> ComposeResult:
166 |         yield Header()
167 |         yield TaskFlow(self.plan)
168 |         yield TaskAlternatives(self.plan)
169 |         yield Footer()
170 | 
171 |     def on_mount(self):
172 |         # Remove annoying icon
173 |         self.query_one(HeaderIcon).icon = ' '
174 | 
175 |     def clear_line_cache(self, Widget):
176 |         # HACK: Without this styles don't refresh
177 |         # TODO: Find a better approach
178 |         table = self.query_one(Widget)
179 |         self._require_update_dimensions = True
180 |         table._line_cache.clear()
181 | 
182 |     def on_key(self, event):
183 |         if event.key == Keys.Space or event.key == Keys.Enter:
184 |             self.toggle_selected_task()
185 |         if event.key == Keys.Up or event.key == Keys.Down:
186 |             self.call_after_refresh(self.update_alternatives)
187 |         if event.key in '0123456789':
188 |             index = int(event.key)
189 |             self.select_alternative(index)
190 | 
191 |     def action_view_flow(self):
192 |         return
193 | 
194 |     def action_view_commands(self):
195 |         return
196 | 
197 |     def action_disable_all(self):
198 |         for task in self.plan:
199 |             task.enabled = False
200 | 
201 |         # HACK: Without this styles don't refresh
202 |         # TODO: Find a better approach
203 |         table = self.query_one(TaskFlow)
204 |         table._clear_caches()
205 |         self.refresh()
206 | 
207 |     def action_enable_all(self):
208 |         for task in self.plan:
209 |             task.enabled = True
210 | 
211 |         # HACK: Without this styles don't refresh
212 |         # TODO: Find a better approach
213 |         table = self.query_one(TaskFlow)
214 |         table._clear_caches()
215 |         self.refresh()
216 | 
217 |     def action_toggle_alternatives(self):
218 |         alts = self.query_one(TaskAlternatives)
219 |         alts.visible ^= True
220 |         alts.display ^= True
221 | 
222 |     def toggle_selected_task(self):
223 |         table = self.query_one(TaskFlow)
224 |         index = table.cursor_coordinate.row
225 |         self.plan[index].enabled ^= True
226 |         self.clear_line_cache(TaskFlow)
227 | 
228 |     def update_alternatives(self):
229 |         task = self.get_current_task()
230 |         alts = self.query_one(TaskAlternatives)
231 |         alts.update(task)
232 | 
233 |     def select_alternative(self, index):
234 |         if not self.query_one(TaskAlternatives).visible:
235 |             return
236 |         task = self.get_current_task()
237 |         task.update_output(task.alternatives[index])
238 |         table = self.query_one(TaskFlow)
239 |         table.update_task(task.id - 1)
240 | 
241 |     def get_current_task(self):
242 |         table = self.query_one(TaskFlow)
243 |         index = table.cursor_coordinate.row
244 |         return self.plan[index]
245 | 
246 | ALIGN_LEFT = 1
247 | ALIGN_RIGHT = 2
248 | 
249 | # Helpers
250 | def print_field(string, length, align=ALIGN_LEFT):
251 |     lpad = ' '
252 |     rpad = ' '
253 |     if len(string) <= length:
254 |         if align == ALIGN_LEFT:
255 |             rpad += ' ' * (length - len(string))
256 |         if align == ALIGN_RIGHT:
257 |             padr += ' ' * (length - len(string))
258 |         return lpad + string + rpad
259 |     else:
260 |         return lpad + string[:length-3] + '...' + rpad
261 | 
262 | def compute_width(table, maxwidth=80):
263 |     widths = [] # (width, avgw, maxw, fixed)
264 |     table = numpy.transpose(table)
265 |     for column in table:
266 |         lengths = list(map(len, column))
267 |         average = numpy.average(lengths)
268 |         maximum = max(lengths)
269 |         if maximum <= 4:
270 |             widths.append((maximum, average, maximum, True))
271 |         else:
272 |             widths.append((maximum, average, maximum, False))
273 | 
274 |     # Account for padding and borders
275 |     maxwidth = maxwidth - 3*len(table) + 1
276 | 
277 |     # Reduce column size if overflow
278 |     curwidth = sum(map(lambda x: x[0], widths))
279 |     if curwidth > maxwidth:
280 |         removal = curwidth - maxwidth 
281 |         fixwidth = sum(map(lambda x: x[0], filter(lambda x: x[3], widths)))
282 |         movwidth = sum(map(lambda x: x[0], filter(lambda x: not x[3], widths)))
283 |         ratio = (movwidth-removal)/movwidth
284 |         for i in range(len(widths)):
285 |             width = widths[i]
286 |             if width[3]: continue
287 |             widths[i] = (int(width[0] * ratio - 1),) + width[1:]
288 |     return list(map(lambda x: x[0], widths))
289 |         
290 | def print_plan(thead, tbody):
291 |     # Add ID column
292 |     thead = ("#",) + thead
293 |     for i in range(len(tbody)):
294 |         tbody[i] = (str(i+1),) + tbody[i]
295 | 
296 |     # Compute width for each column
297 |     termsize = shutil.get_terminal_size()
298 |     table = [thead] + tbody
299 |     widths = compute_width(table, termsize.columns)
300 |     
301 |     # Print table
302 |     print('┌' + '┬'.join(list(map(lambda w: '─'*(w+2), widths))) + '┐')
303 |     print('│' + '│'.join(list(map(lambda x: print_field(*x), zip(thead, widths)))) + '│')
304 |     print('├' + '┼'.join(list(map(lambda w: '─'*(w+2), widths))) + '┤')
305 |     for row in tbody:
306 |         print('│' + '│'.join(list(map(lambda x: print_field(*x), zip(row, widths)))) + '│')
307 |     print('└' + '┴'.join(list(map(lambda w: '─'*(w+2), widths))) + '┘')
308 | 


--------------------------------------------------------------------------------
/curator/stream.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import fractions
  3 | import json
  4 | import logging
  5 | import os
  6 | import re
  7 | import subprocess
  8 | import tempfile
  9 | 
 10 | import chardet
 11 | import iso639
 12 | import langid
 13 | import pysrt
 14 | 
 15 | # Default options
 16 | DEF_OPTS_LANGUAGE = {
 17 |     'only_macrolanguages': False,
 18 |     'max_audio_samples': 10,
 19 |     'max_video_samples': 10,
 20 |     'min_score': 0.8,
 21 | }
 22 | 
 23 | class Stream:
 24 |     def __init__(self, media, index, info=None):
 25 |         self.media = media
 26 |         self.index = index
 27 | 
 28 |         # Cache stream information
 29 |         self.info = info
 30 |         self.frames = None
 31 |         self.packets = None
 32 | 
 33 |         # Store warnings about the stream
 34 |         self.warnings = set()
 35 | 
 36 |     def __repr__(self):
 37 |         return f'Stream("{self.media.name}", index={self.index})'
 38 | 
 39 |     def is_video(self):
 40 |         return self.get_info()['codec_type'] == 'video'
 41 | 
 42 |     def is_audio(self):
 43 |         return self.get_info()['codec_type'] == 'audio'
 44 | 
 45 |     def is_subtitle(self):
 46 |         return self.get_info()['codec_type'] == 'subtitle'
 47 | 
 48 |     def video_index(self):
 49 |         return len([s for s in self.media.get_streams()[:self.index] if s.is_video()])
 50 | 
 51 |     def audio_index(self):
 52 |         return len([s for s in self.media.get_streams()[:self.index] if s.is_audio()])
 53 | 
 54 |     def subtitle_index(self):
 55 |         return len([s for s in self.media.get_streams()[:self.index] if s.is_subtitle()])
 56 | 
 57 |     def has_packed_bframes(self):
 58 |         packet = self.get_packet(0)
 59 |         packet_offs = int(packet['pos'])
 60 |         packet_size = int(packet['size'])
 61 |         with open(self.media.path, 'rb') as f:
 62 |             f.seek(packet_offs)
 63 |             data = f.read(packet_size)
 64 |         match1 = re.search(br'\x00\x00\x01\xB2DivX(\d+)b(\d+)p', data)
 65 |         match2 = re.search(br'\x00\x00\x01\xB2DivX(\d+)Build(\d+)p', data)
 66 |         if match1 or match2:
 67 |             return True
 68 |         return False
 69 | 
 70 |     def get_info(self):
 71 |         if self.info:
 72 |             return self.info
 73 |         cmd = ['ffprobe', self.media.path]
 74 |         cmd += ['-show_streams']
 75 |         cmd += ['-select_streams', str(self.index)]
 76 |         cmd += ['-of', 'json']
 77 |         result = subprocess.run(cmd, capture_output=True)
 78 |         if result.returncode != 0:
 79 |             errors = result.stderr.decode('utf-8')
 80 |             raise Exception(f"Failed to get info from {self} with ffmpeg:\n{errors}")
 81 |         output = result.stdout.decode('utf-8')
 82 |         self.info = json.loads(output)['streams']
 83 |         self.info.setdefault('tags', {})
 84 |         return self.info
 85 | 
 86 |     def get_duration(self):
 87 |         info = self.get_info()
 88 |         if 'duration' in info:
 89 |             return float(info['duration'])
 90 |         self.warnings.add("Stream has no `duration` information.")
 91 |         info = self.media.get_info()
 92 |         if 'duration' in info:
 93 |             return float(info['duration'])
 94 |         raise Exception("Could not determine stream duration.")
 95 | 
 96 |     def get_frame_rate(self):
 97 |         assert(self.is_video())
 98 |         return fractions.Fraction(self.get_info()['avg_frame_rate'])
 99 | 
100 |     def get_frames(self):
101 |         if self.frames:
102 |             return self.frames
103 |         cmd = ['ffprobe', self.media.path]
104 |         cmd += ['-show_frames']
105 |         cmd += ['-select_streams', str(self.index)]
106 |         cmd += ['-of', 'json']
107 |         result = subprocess.run(cmd, capture_output=True)
108 |         if result.returncode != 0:
109 |             errors = result.stderr.decode('utf-8')
110 |             raise Exception(f"Failed to get frames from {self} with ffmpeg:\n{errors}")
111 |         output = result.stdout.decode('utf-8')
112 |         self.frames = json.loads(output)['frames']
113 |         return self.frames
114 | 
115 |     def get_packets(self):
116 |         if self.packets:
117 |             return self.packets
118 |         cmd = ['ffprobe', self.media.path]
119 |         cmd += ['-show_packets']
120 |         cmd += ['-select_streams', str(self.index)]
121 |         cmd += ['-of', 'json']
122 |         result = subprocess.run(cmd, capture_output=True)
123 |         if result.returncode != 0:
124 |             errors = result.stderr.decode('utf-8')
125 |             raise Exception(f"Failed to get packets from {self} with ffmpeg:\n{errors}")
126 |         output = result.stdout.decode('utf-8')
127 |         self.packets = json.loads(output)['packets']
128 |         return self.packets
129 | 
130 |     def get_packet(self, index):
131 |         if self.packets:
132 |             return self.packets[index]
133 |         cmd = ['ffprobe', self.media.path]
134 |         cmd += ['-show_packets']
135 |         cmd += ['-select_streams', str(self.index)]
136 |         cmd += ['-read_intervals', f'%+#{index+1}']
137 |         cmd += ['-of', 'json']
138 |         result = subprocess.run(cmd, capture_output=True)
139 |         if result.returncode != 0:
140 |             errors = result.stderr.decode('utf-8')
141 |             raise Exception(f"Failed to get packets from {self} with ffmpeg:\n{errors}")
142 |         output = result.stdout.decode('utf-8')
143 |         packet = json.loads(output)['packets'][index]
144 |         return packet
145 | 
146 |     def detect_language(self, opts=DEF_OPTS_LANGUAGE):
147 |         opts = DEF_OPTS_LANGUAGE if opts is None else opts
148 |         codec_type = self.get_info()['codec_type']
149 |         if codec_type == 'audio':
150 |             return self.detect_audio_language(opts)
151 |         if codec_type == 'subtitle':
152 |             return self.detect_subtitle_language(opts)
153 | 
154 |     def detect_audio_language(self, opts=DEF_OPTS_LANGUAGE):
155 |         """
156 |         Detect language of an audio stream using OpenAI Whisper.
157 |         """
158 |         assert(self.is_audio())
159 |         debug = logging.getLogger().level == logging.DEBUG
160 |         logging.debug(f'Detecting audio language in stream #{self.index} of media: "{self.media.name}"')
161 | 
162 |         import whisper
163 |         from whisper.audio import CHUNK_LENGTH
164 |         model = whisper.load_model("base")
165 | 
166 |         # Calculate number of samples
167 |         duration = self.get_duration()
168 |         len_samples = float(CHUNK_LENGTH)
169 |         num_samples = min(opts['max_audio_samples'], int(duration / len_samples))
170 | 
171 |         results = {}
172 |         with tempfile.TemporaryDirectory() as tmp:
173 |             ext = self.media.ext
174 |             err_samples = 0
175 |             for index in range(num_samples):
176 |                 # Extract sample
177 |                 sample = os.path.join(tmp, f'sample{index:04d}.{ext}')
178 |                 cmd = ['ffmpeg', '-i', self.media.path, '-map', f'0:{self.index}']
179 |                 cmd += ['-c:a', 'copy']
180 |                 cmd += ['-ss', str(index * duration / num_samples)]
181 |                 cmd += ['-t', str(len_samples)]
182 |                 cmd += [sample]
183 |                 result = subprocess.run(cmd, capture_output=True)
184 |                 if result.returncode != 0:
185 |                     errors = result.stderr.decode('utf-8')
186 |                     raise Exception(f"Failed to extract audio sample from {self.media.path} with ffmpeg:\n{errors}")
187 | 
188 |                 # Detect language in sample
189 |                 try:
190 |                     audio = whisper.load_audio(sample)
191 |                     audio = whisper.pad_or_trim(audio)
192 |                     mel = whisper.log_mel_spectrogram(audio).to(model.device)
193 |                     _, probs = model.detect_language(mel)
194 |                 except Exception as e:
195 |                     logging.warning(f'Failed to detect language in {sample}:\n{e}')
196 |                     err_samples += 1
197 |                     continue
198 |                 # Process language detection results
199 |                 if debug:
200 |                     highest_probs = dict(collections.Counter(probs).most_common(5))
201 |                     highest_probs_rounded = { k: f'{v:.4f}' for k, v in highest_probs.items() }
202 |                     logging.debug(f'Sample #{index:02d}: {highest_probs_rounded}')
203 |                 lang = max(probs, key=probs.get)
204 |                 prob = probs[lang]
205 |                 if opts['min_score'] <= prob:
206 |                     results.setdefault(lang, []).append(prob)
207 | 
208 |         # Compute final scores as votes+avg(prob) if more than half succeeded
209 |         if err_samples > num_samples / 2:
210 |             return None
211 |         results = { k: len(v) + sum(v)/len(v) for k, v in results.items() }
212 |         if not results:
213 |             return None
214 | 
215 |         # Rename keys since OpenAI Whisper does not fully adhere to ISO 639-1
216 |         replacements = [('jw', 'jv')]
217 |         for old, new in replacements:
218 |             if old in results:
219 |                 results[new] = results.pop(old)
220 | 
221 |         # Optionally merge into ISO 639-3 macrolanguages and return highest ocurring
222 |         if opts['only_macrolanguages']:
223 |             macro_results = {}
224 |             for key, value in results.items():
225 |                 part3 = iso639.Lang(pt1=key).pt3
226 |                 macro = iso639.Lang(pt1=key).macro()
227 |                 lang = macro.pt3 if macro else part3
228 |                 macro_results[lang] = macro_results.get(lang, 0) + value
229 |             lang = max(macro_results, key=macro_results.get)
230 |             return lang
231 | 
232 |         # Get highest occurring language and convert ISO 639-1 to ISO 639-3
233 |         lang = max(results, key=results.get)
234 |         lang = iso639.Lang(pt1=lang).pt3
235 |         return lang
236 | 
237 |     def detect_subtitle_language(self, opts=DEF_OPTS_LANGUAGE):
238 |         """
239 |         Detect subtitle language copying/converting to SRT,
240 |         extracting the raw text and detecting its language.
241 |         """
242 |         assert(self.is_subtitle())
243 | 
244 |         # Cannot detect language in bitmap subtitles
245 |         if self.get_info()['codec_name'] == 'hdmv_pgs_subtitle':
246 |             return None
247 | 
248 |         # Detect subtitle language
249 |         def srt_language(path):
250 |             with open(path, 'rb') as f:
251 |                 enc = chardet.detect(f.read())['encoding']
252 |             if enc == 'Windows-1254':
253 |                 enc = None # Often false positive, let PySRT auto-detect
254 |             subs = pysrt.open(path, encoding=enc)
255 |             text = ' '.join(map(lambda x: x.text, subs))
256 |             lang = langid.classify(text)[0]
257 |             lang = iso639.Lang(pt1=lang).pt3
258 |             return lang
259 | 
260 |         # Check if the parent media is already an SRT file
261 |         path = self.media.path
262 |         if self.media.ext == 'srt':
263 |             return srt_language(path)
264 | 
265 |         # Otherwise extract subtitle stream, converting to SRT
266 |         with tempfile.TemporaryDirectory() as tmp:
267 |             output = os.path.join(tmp, 'output.srt')
268 |             cmd = ['ffmpeg', '-i', path, '-map', f'0:{self.index}']
269 |             if self.get_info()['codec_name'] in ('srt', 'subrip'):
270 |                 cmd += ['-c:s', 'copy']
271 |             cmd += [output]
272 |             result = subprocess.run(cmd, capture_output=True)
273 |             if result.returncode != 0:
274 |                 errors = result.stderr.decode('utf-8')
275 |                 raise Exception(f"Failed to extract subtitles from {path} with ffmpeg:\n{errors}")
276 |             return srt_language(output)
277 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------