├── .gitignore
├── requirements.txt
├── Dockerfile
├── LICENSE
├── .github
    └── workflows
    │   └── docker-build-ghcr.yml
├── src
    ├── stats.py
    ├── helper.py
    ├── log.py
    ├── vad.py
    └── lecturecut.py
├── README.md
├── CONTRIBUTING.md
└── CODE_OF_CONDUCT.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | **/*.mp4
3 | 
4 | *.code-workspace
5 | .vscode/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ffmpeg-python
2 | webrtcvad
3 | opencv-python
4 | joblib
5 | rich>=12


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-slim
 2 | 
 3 | COPY requirements.txt .
 4 | 
 5 | RUN apt-get update && \
 6 |     apt-get install -y gcc ffmpeg && \
 7 |     pip install --no-cache-dir -r requirements.txt
 8 | 
 9 | ENV LECTURECUT_HOME=/LectureCut
10 | WORKDIR ${LECTURECUT_HOME}/src
11 | COPY src .
12 | 
13 | ENTRYPOINT ["python", "lecturecut.py"]
14 | CMD ["-h"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Gamer92000
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/.github/workflows/docker-build-ghcr.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | 
 6 | # GitHub recommends pinning actions to a commit SHA.
 7 | # To get a newer version, you will need to update the SHA.
 8 | # You can also reference a tag or branch, but the action may change without warning.
 9 | 
10 | name: Build and Push GHCR
11 | 
12 | on:
13 |   push:
14 |     tags:
15 |       - v*
16 | 
17 | env:
18 |   REGISTRY: ghcr.io
19 |   IMAGE_NAME: ${{ github.repository }}
20 | 
21 | jobs:
22 |   build-and-push-image:
23 |     runs-on: ubuntu-latest
24 |     permissions:
25 |       contents: read
26 |       packages: write
27 | 
28 |     steps:
29 |       - name: Checkout repository
30 |         uses: actions/checkout@v3
31 | 
32 |       - name: Log in to the Container registry
33 |         uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
34 |         with:
35 |           registry: ${{ env.REGISTRY }}
36 |           username: ${{ github.actor }}
37 |           password: ${{ secrets.GITHUB_TOKEN }}
38 | 
39 |       - name: Extract metadata (tags, labels) for Docker
40 |         id: meta
41 |         uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
42 |         with:
43 |           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
44 | 
45 |       - name: Build and push Docker image
46 |         uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
47 |         with:
48 |           context: .
49 |           push: true
50 |           tags: ${{ steps.meta.outputs.tags }}
51 |           labels: ${{ steps.meta.outputs.labels }}


--------------------------------------------------------------------------------
/src/stats.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from helper import get_video_length
 3 | import rich
 4 | from rich.align import Align
 5 | from rich.table import Table
 6 | 
 7 | def print_stats(files, total_time):
 8 |   """
 9 |   Print some stats for the given files.
10 | 
11 |   input_files -- The input files.
12 |   output_files -- The output files.
13 |   """
14 |   table = Table(title="File Stats")
15 | 
16 |   table.add_column("Input File", justify="left", style="yellow")
17 |   table.add_column("Size Changes", justify="right", style="plum4")
18 |   table.add_column("Duration Changes", justify="right", style="cyan")
19 |   table.add_column("Duration %", justify="right", style="magenta")
20 | 
21 |   # total length of input video 
22 |   total_input_length = 0
23 |   total_input_size = 0
24 |   total_output_length = 0
25 |   total_output_size = 0
26 | 
27 |   for input_file, output_file in files:
28 |     input_length = get_video_length(input_file)
29 |     input_size = os.path.getsize(input_file)
30 |     output_length = get_video_length(output_file)
31 |     output_size = os.path.getsize(output_file)
32 |     total_input_length += input_length
33 |     total_input_size += input_size
34 |     total_output_length += output_length
35 |     total_output_size += output_size
36 |     table.add_row(
37 |       os.path.basename(input_file),
38 |       f"{input_size / 1024 / 1024:.2f} MB -> {output_size / 1024 / 1024:.2f} MB",
39 |       f"{input_length / 60:.2f} min -> {output_length / 60:.2f} min",
40 |       f"{output_length / input_length * 100:.2f} %"
41 |     )
42 |   
43 |   if len(files) > 1:
44 |     table.add_row(
45 |       "[italic]Total",
46 |       f"{total_input_size / 1024 / 1024:.2f} MB -> {total_output_size / 1024 / 1024:.2f} MB",
47 |       f"{total_input_length / 60:.2f} min -> {total_output_length / 60:.2f} min",
48 |       f"{total_output_length / total_input_length * 100:.2f} %"
49 |     )
50 | 
51 |   performance = f"[bold green]Processed [bold cyan]{len(files)} [bold green]video{'s' if len(files) > 1 else ''} in [bold cyan]{total_time / 60:.0f} [bold green]min and [bold cyan]{total_time % 60:.0f} [bold green]sec."
52 |   
53 |   rich.print()
54 |   rich.print(Align(table, align="center"))
55 |   rich.print()
56 |   rich.print(Align(performance, align="center"))
57 |   rich.print()


--------------------------------------------------------------------------------
/src/helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from pathlib import Path
 4 | from queue import Queue
 5 | from threading import Thread
 6 | import cv2
 7 | 
 8 | 
 9 | def get_video_length(videoPath, progress=None, pbar=None):
10 |   """
11 |   Get the length of the given video in seconds.
12 | 
13 |   progress -- the manager for the progress bars
14 |   pbar -- the progress bar
15 |   videoPath -- the path to the video
16 |   """
17 |   video = cv2.VideoCapture(videoPath)
18 |   fps = video.get(cv2.CAP_PROP_FPS)
19 |   frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
20 |   if pbar: progress.update(pbar, advance=1)
21 |   return frame_count / fps
22 | 
23 | 
24 | # TODO: replace with shutil.rmtree
25 | def delete_directory_recursively(path, retryCounter=10):
26 |   """
27 |   Delete a directory and all its contents.
28 | 
29 |   path -- The path to the directory to delete.
30 |   retryCounter -- The number of times to retry deleting the directory.
31 |   """
32 |   if os.path.exists(path):
33 |     for _ in range(retryCounter):
34 |       try:
35 |         for filename in os.listdir(path):
36 |           if os.path.isdir(path + filename):
37 |             delete_directory_recursively(path + filename + "/")
38 |           else:
39 |             for _ in range(retryCounter):
40 |               try:
41 |                 os.remove(path + filename)
42 |                 break
43 |               except:
44 |                 time.sleep(0.1)
45 |         os.rmdir(path)
46 |         break
47 |       except:
48 |         time.sleep(0.1)
49 | 
50 | def reader(pipe, queue):
51 |   """
52 |   Read the output of a pipe and put it in a queue.
53 | 
54 |   pipe -- The pipe to read from.
55 |   queue -- The queue to put the output in.
56 |   """
57 |   try:
58 |     with pipe:
59 |       for line in iter(pipe.readline, b""):
60 |         queue.put((pipe, line))
61 |   finally:
62 |     queue.put(None)
63 | 
64 | def read_progress(progress, pbar, ffmpeg_run):
65 |   """
66 |   Read the output of a ffmpeg run and update the given progress bar.
67 | 
68 |   progress -- The manager that controls the progress bars.
69 |   pbar -- The progress bar to update.
70 |   ffmpeg_run -- The ffmpeg run to read the output from.
71 |   """
72 |   q = Queue()
73 |   Thread(target=reader, args=(ffmpeg_run.stdout, q)).start()
74 |   Thread(target=reader, args=(ffmpeg_run.stderr, q)).start()
75 |   for _ in range(2):
76 |     for source, line in iter(q.get, None):
77 |       line = line.decode()
78 |       if source == ffmpeg_run.stderr:
79 |         print(line)
80 |       else:
81 |         line = line.rstrip()
82 |         parts = line.split("=")
83 |         key = parts[0] if len(parts) > 0 else None
84 |         value = parts[1] if len(parts) > 1 else None # TODO: this might cause float(none):
85 |         if key == "out_time_ms":
86 |           time = max(round(float(value) / 1000000., 2), 0)
87 |           progress.update(pbar, advance=int(time * 1000))
88 |         elif key == "progress" and value == "end":
89 |           progress.update(pbar, completet=True)
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🎞️ LectureCut
 2 | 
 3 | <div align="center">
 4 | 
 5 |   [![GitHub license](https://img.shields.io/github/license/Gamer92000/LectureCut)](https://github.com/Gamer92000/LectureCut/blob/main/LICENSE)
 6 |   [![GitHub commits](https://badgen.net/github/commits/Gamer92000/LectureCut/main)](https://GitHub.com/Gamer92000/LectureCut/commit/)
 7 |   [![Github stars](https://img.shields.io/github/stars/Gamer92000/LectureCut.svg)](https://GitHub.com/Gamer92000/LectureCut/stargazers/)
 8 |   <br>
 9 |   <h3>If you like this project, please consider giving it a star ⭐️!</h3>
10 | </div>
11 | 
12 | ## 📝 Description
13 | 
14 | LectureCut is a video editor for lectures. It allows you to automatically cut out parts of a video that have no voice in it. This can cut down the time you need to watch a lecture by a lot.
15 | LectureCut uses WebRTC to detect voice in a video. It then uses ffmpeg to cut out the parts of the video that have no voice in it. Using some advanced smart encoding techniques, LectureCut can cut down the time it takes to process a video by a lot.
16 | 
17 | 
18 | ## 🎃 Hacktoberfest
19 | 
20 | This project is participating in Hacktoberfest 2022. If you want to contribute, please read the [contribution guidelines](CONTRIBUTING.md) first.
21 | Any contributions are welcome, no matter how small. If you have any questions, feel free to ask them in the [Discussions](https://github.com/Gamer92000/LectureCut/discussions) tab.
22 | Some ideas for contributions can be found in the [issues](https://github.com/Gamer92000/LectureCut/issues) tab.
23 | 
24 | ## 🚀 Usage
25 | 
26 | ### 🐳 Docker
27 | Docker is a convenient way to build and run LectureCut. Instead of manually installing and maintaining versions of ffmpeg
28 | and various python libraries on your machine, you can utilize Docker to run LectureCut in as a container.
29 | Moreover, this repo is expected to change at a fast pace, and so Docker is the easiest way to ensure that you're running
30 | the most up-to-date version of LectureCut and its dependencies.
31 | 
32 | #### How it works:
33 | 
34 | Pull the LectureCut image from GitHub Container Registry.
35 | ```bash
36 | # pull a specific release version
37 | docker pull ghcr.io/gamer92000/lecturecut:<version>
38 | # pull the current main
39 | docker pull ghcr.io/gamer92000/lecturecut:main
40 | ```
41 | 
42 | Simple example: 
43 | To run LectureCut via Docker, simply mount the file location into the container. In this example,
44 | video.mp4 is mounted into /tmp in the container and `lecturecut` is run with the `-i` input flag pointing to this location.
45 | ```bash
46 | docker run -it -v /path/to/video_file/on_your_machine/video.mp4:/tmp/video.mp4 ghcr.io/gamer92000/lecturecut:main -i /tmp/video.mp4
47 | ```
48 | 
49 | Multiple directories example:
50 | ```bash
51 | docker run -it \
52 |   -v /path/to/input_files/:/tmp/input_files/ \
53 |   -v /path/to/output_files/:/tmp/output_files/ \
54 |   ghcr.io/gamer92000/lecturecut:main -i /tmp/input_files/video_in.mp4 -o /tmp/output_files/video_out.mp4 -q 25 -a 2
55 | ```
56 | 
57 | ### 🐍 Python
58 | 
59 | #### 👶 Requirements
60 | 
61 | First you need to have [ffmpeg](https://ffmpeg.org/download.html) and [Python 3](https://www.python.org/downloads/) and [pip](https://pip.pypa.io/en/stable/installing/) installed.  
62 | To install the python dependencies, simply run:
63 | ```bash
64 | pip install -r requirements.txt
65 | ```
66 | 
67 | #### 🏃 Running
68 | 
69 | To run the program, simply run:
70 | ```bash
71 | python src/lecturecut.py -h
72 | ```
73 | 
74 | ## 📝 License
75 | 
76 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
77 | 
78 | ## ⚠️ Disclaimer
79 | 
80 | This software is provided as-is and without any warranty. You are free to use it for any purpose, but I am not responsible for any damage caused by this software.
81 | 
82 | ## 📝 Contributing
83 | 
84 | If you want to contribute to this project, feel free to open a pull request. I will try to review it as soon as possible.
85 | 


--------------------------------------------------------------------------------
/src/log.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | # Simple logger with journald and file support
  3 | 
  4 | import os
  5 | import socket
  6 | import time
  7 | from enum import Enum
  8 | import pathlib
  9 | 
 10 | 
 11 | class LogMode(Enum):
 12 |   SYSTEMD = 0
 13 |   FILE = 1
 14 |   AUTO = 0
 15 | 
 16 | class LogLevel(Enum):
 17 |   DEBUG = 0
 18 |   INFO = 1
 19 |   WARNING = 2
 20 |   ERROR = 3
 21 | 
 22 |   def __str__(self):
 23 |     return self.name.lower() + ":"
 24 | 
 25 | log_is_initialized = False
 26 | log_mode = None
 27 | log_sock = None
 28 | log_file = None
 29 | log_level = LogLevel.INFO
 30 | log_to_std_out = False
 31 | 
 32 | def log_init(mode=LogMode.AUTO, log_path=None, level=LogLevel.INFO, logToStdOut=False):
 33 |   """
 34 |   Sets up the logger. Must be called before the first call of log_print to set up the logger with non-default settings.
 35 |   
 36 |   log_init with default settings is called form log_print if the logger wasn't initialized.
 37 |   This function will do nothing if the logger is in an initialized state.
 38 | 
 39 |   Parameters
 40 |   ----------
 41 |   mode : LogMode
 42 |     AUTO, SYSTEMD : Try to write to journald or write to file if journald is unavailable.
 43 |     FILE : Write to file. The old logfile is overwritten. (Default path: `~/.local/var/log/LectureCut/log.txt` or `%LOCALAPPDATA%\\LectureCut\\log.txt`)
 44 |   log_path : Path
 45 |     Path of the log file in FILE mode. Ignored if journald is used.
 46 |   level : LogLevel
 47 |     Maximum level of log messages that are logged.
 48 |   logToStdOut : bool
 49 |     If true, write messages to stdout and into the log.
 50 |   """
 51 | 
 52 |   global log_is_initialized, log_mode, log_sock, log_file, log_level, log_to_std_out
 53 | 
 54 |   # check if log system already initialized
 55 |   if log_is_initialized:
 56 |     return
 57 | 
 58 |   # setup journald logging
 59 |   if os.name == "posix" and mode == LogMode.SYSTEMD:
 60 |     try:
 61 |       log_sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
 62 |       log_sock.connect("/dev/log")
 63 |       log_mode = LogMode.SYSTEMD
 64 |     except:
 65 |       mode = LogMode.FILE
 66 | 
 67 |   # setup file logging
 68 |   if mode == LogMode.FILE:
 69 |     try:
 70 |       # set default path
 71 |       if log_path == None:
 72 |         if os.name == "posix":
 73 |           log_path = pathlib.Path.home() / ".local/var/log/LectureCut/log.txt"
 74 |         else:
 75 |           log_path = pathlib.Path.home() / "AppData/Local/LectureCut/log.txt"
 76 |       
 77 |         if not log_path.exists():
 78 |           log_path.parent.mkdir(parents=True, exist_ok=True)
 79 |           log_path.touch()
 80 |         log_file = open(log_path, "w")
 81 |         log_mode = LogMode.FILE
 82 |     except:
 83 |       print("!!!!!!!!!!!!!!!!!!!\n")
 84 |       print("!!! CAN NOT LOG !!!\n")
 85 |       print("!!!!!!!!!!!!!!!!!!!\n")
 86 |       print("!! USING STD OUT !!\n")
 87 |       logToStdOut = True
 88 | 
 89 |   log_is_initialized = True
 90 |   log_level = level
 91 |   log_to_std_out = logToStdOut
 92 | 
 93 | def log_print(message, level=LogLevel.INFO, toStdOut=False):
 94 |   """
 95 |   Write message into the log and initializes the logger if necessary.
 96 |   
 97 |   Parameters
 98 |   ----------
 99 |   message : str
100 |     Message that is written into the log.
101 |   level : LogLevel
102 |     Log-level of this log call.
103 |   logToStdOut : bool
104 |     If true, write messages to stdout and into the log.
105 |   """
106 | 
107 |   if not log_is_initialized:
108 |     log_init()
109 | 
110 |   if not log_level.value <= level.value:
111 |     return
112 | 
113 |   message = "{} {}".format(level, message)
114 |   if toStdOut or log_to_std_out:
115 |     print(message)
116 |   if log_mode == LogMode.FILE:
117 |     if not log_file or log_file.closed:
118 |       return
119 |     log_file.write("[{}] {}\n".format(time.time(), message))
120 |   elif log_mode == LogMode.SYSTEMD:
121 |     if not log_sock or log_sock.closed:
122 |       return
123 |     log_sock.send(bytes("LectureCut: {}".format(message), 'UTF-8'))
124 | 
125 | def log_close():
126 |   """
127 |   Closes file handles, sockets and sets the logger to an uninitialized state.
128 |   """
129 |   global log_is_initialized
130 | 
131 |   # check if log is initialized
132 |   if not log_is_initialized:
133 |     return
134 | 
135 |   # close sockets or file handles
136 |   if log_mode == LogMode.SYSTEMD and log_sock and not log_sock.closed:
137 |     log_sock.close()
138 |   elif log_mode == LogMode.FILE and log_file and not log_file.closed:
139 |     log_file.close()
140 | 
141 |   log_is_initialized = False


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Welcome to the LectureCut contributing guide
  2 | 
  3 | Thanks for coming by! We're glad you're interested in contributing to LectureCut.
  4 | We're a small team of students and we're always looking for help.
  5 | If you're interested in contributing, please read the following guidelines.
  6 | 
  7 | ## How can I contribute?
  8 | 
  9 | ### Reporting bugs
 10 | 
 11 | This section guides you through submitting a bug report for LectureCut.
 12 | Following these guidelines helps maintainers and the community understand your report, reproduce the behavior, and find related reports.
 13 | 
 14 | Before creating bug reports, please check [this list](#before-submitting-a-bug-report) as you might find out that you don't need to create one.
 15 | When you are creating a bug report, please [include as many details as possible](#how-do-i-submit-a-good-bug-report).
 16 | 
 17 | #### Before Submitting A Bug Report
 18 | 
 19 | <!-- * **Check the [FAQs on the Wiki]() -->
 20 | * **Check the [issue tracker](https://github.com/Gamer92000/LectureCut/issues)** to see if the problem has already been reported. If it has, add a comment to the existing issue instead of opening a new one.
 21 | 
 22 | #### How Do I Submit A (Good) Bug Report?
 23 | 
 24 | Explain the problem and include additional details to help maintainers reproduce the problem:
 25 | 
 26 | * **Use a clear and descriptive title** for the issue to identify the problem.
 27 | * **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you started LectureCut, e.g. which command exactly you used in the terminal.
 28 | * **Provide some context** by explaining which problem you're experiencing, and how you're trying to solve it. What's the current behavior? What's the expected behavior?
 29 | * **Include the log file** if an error occured. You can find the log file somewhere (see #5). The log file will most likely be called `lecturecut.log`.
 30 | * **Include some metadata** by giving the OS, the terminal, the version of LectureCut, and the version of the used libraries. You currently can't get the version of LectureCut from the program itself, you have to look at the release page on GitHub.
 31 | 
 32 | ### Suggesting Enhancements
 33 | 
 34 | This section guides you through submitting an enhancement suggestion for LectureCut, including completely new features and minor improvements to existing functionality.
 35 | 
 36 | Before creating enhancement suggestions, please check [this list](#before-submitting-an-enhancement-suggestion) as you might find out that you don't need to create one.
 37 | When you are creating an enhancement suggestion, please [include as many details as possible](#how-do-i-submit-a-good-enhancement-suggestion).
 38 | 
 39 | #### Before Submitting An Enhancement Suggestion
 40 | 
 41 | * **Check the [issue tracker](https://github.com/Gamer92000/LectureCut/issues)** to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one.
 42 | 
 43 | #### How Do I Submit A (Good) Enhancement Suggestion?
 44 | 
 45 | * **Use a clear and descriptive title** for the issue to identify the suggestion.
 46 | * **Provide a step-by-step description of the suggested enhancement** in as many details as possible.
 47 | * **Provide some context** by explaining the problem you're trying to solve, and how you're trying to solve it. What would the enhancement do? What would be the expected behavior?
 48 | * **Explain why this enhancement would be useful** to most LectureCut users.
 49 | * **List some other applications where this enhancement exists.**
 50 | 
 51 | ### Pull Requests
 52 | 
 53 | The process described here has several goals:
 54 | 
 55 | - Maintain LectureCut's quality
 56 | - Fix problems that are important to users
 57 | - Engage the community in working toward the best possible LectureCut
 58 | - Enable a sustainable system for LectureCut's maintainers to review contributions
 59 | 
 60 | Please follow these steps to have your contribution considered by the maintainers:
 61 | 
 62 | 1. Follow all instructions in the template
 63 | 2. Follow the [styleguides](#styleguides)
 64 | 3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing
 65 | 
 66 | #### Styleguides
 67 | 
 68 | ##### Git Commit Messages
 69 | 
 70 | * Use the present tense ("Add feature" not "Added feature")
 71 | * Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
 72 | * Limit the first line to 72 characters or less
 73 | * Reference issues and pull requests liberally after the first line
 74 | * Use the [conventional commit](https://www.conventionalcommits.org/en/v1.0.0/) format but use [gitmojis](https://gitmoji.dev/) instead of the type
 75 | 
 76 | ##### Python Styleguide
 77 | 
 78 | * Use 2 spaces for indentation rather than tabs
 79 | * Use 79 characters per line
 80 | * Use `snake_case` for variables and functions
 81 | * Use `CamelCase` for classes
 82 | * Use `UPPER_CASE` for constants
 83 | * Use `lowercase` for filenames
 84 | 
 85 | ##### Documentation Styleguide
 86 | 
 87 | * Use [Markdown](https://daringfireball.net/projects/markdown/syntax) for documentation
 88 | 
 89 | ## Attribution
 90 | 
 91 | This guide was adapted from the [Atom contributing guide](https://github.com/atom/atom/blob/master/CONTRIBUTING.md).
 92 | 
 93 | ## License
 94 | 
 95 | By contributing to LectureCut, you agree that your contributions will be licensed under its MIT license.
 96 | 
 97 | ## Code of Conduct
 98 | 
 99 | Everyone interacting in the LectureCut project is expected to follow the [code of conduct](.github/CODE_OF_CONDUCT.md).
100 | 
101 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | julian.imhof@t-online.de.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/src/vad.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import cv2
  4 | import ffmpeg
  5 | import webrtcvad
  6 | 
  7 | KERN_SIZE = 30
  8 | 
  9 | def read_audio(path):
 10 |   """
 11 |   Reads the video file.
 12 |   
 13 |   returns (PCM audio data, sample rate).
 14 |   """
 15 |   out, _ = (
 16 |     ffmpeg
 17 |     .input(path)
 18 |     .output("pipe:", format="s16le", acodec="pcm_s16le", ac=1, ar="16k")
 19 |     .global_args("-loglevel", "quiet")
 20 |     .global_args("-hide_banner")
 21 |     .global_args("-nostdin")
 22 |     .run(capture_stdout=True)
 23 |   )
 24 |   return out, 16000
 25 | 
 26 | 
 27 | class Frame(object):
 28 |   """Represents a "frame" of audio data."""
 29 |   def __init__(self, bytes, timestamp, duration):
 30 |     self.bytes = bytes
 31 |     self.timestamp = timestamp
 32 |     self.duration = duration
 33 | 
 34 | 
 35 | def frame_generator(frame_duration_ms, audio, sample_rate):
 36 |   """
 37 |   Generates audio frames from PCM audio data.
 38 |   Takes the desired frame duration in milliseconds, the PCM data, and
 39 |   the sample rate.
 40 |   Yields Frames of the requested duration.
 41 | 
 42 |   frame_duration_ms -- The frame duration in milliseconds.
 43 |   audio -- The PCM data.
 44 |   sample_rate -- The sample rate of the data.
 45 |   """
 46 |   n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
 47 |   offset = 0
 48 |   timestamp = 0.0
 49 |   duration = (float(n) / sample_rate) / 2.0
 50 |   while offset + n < len(audio):
 51 |     yield Frame(audio[offset:offset + n], timestamp, duration)
 52 |     timestamp += duration
 53 |     offset += n
 54 | 
 55 | 
 56 | def build_gauss_kernel(n_frames):
 57 |   """
 58 |   n_frames: number of frames to consider (needs to be odd)
 59 |   """
 60 |   def gauss(x):
 61 |     """
 62 |     calculates a sample of the continuous gaussian function
 63 |     # sigma is fixed at 1
 64 |     # normalization is skipped for simplicity
 65 |     """
 66 |     return 1 * math.exp(- (float(x)**2) / 2)
 67 | 
 68 |   if n_frames <= 1:
 69 |     scale = 1
 70 |   else:
 71 |     scale = 1 / (n_frames // 2) * 2
 72 |   kernel = [0.0] * n_frames
 73 |   for i in range(n_frames):
 74 |     kernel[i] = gauss((i - n_frames // 2) * scale)
 75 |   kernelSum = sum(kernel)
 76 |   kernel = [x / kernelSum for x in kernel]
 77 |   return kernel
 78 | 
 79 | def clip_gauss_kernel(kernel, side, cutoff):
 80 |   """
 81 |   removes part of the kernel and normalizes the remaining part
 82 |   kernel: gaussian kernel
 83 |   side: "left" or "right"
 84 |   cutoff: number of elments to cut off
 85 |   """
 86 |   if side == "left":
 87 |     kernel = kernel[cutoff:]
 88 |   elif side == "right":
 89 |     kernel = kernel[:-cutoff]
 90 |   kernelSum = sum(kernel)
 91 |   kernel = [x / kernelSum for x in kernel]
 92 |   return kernel
 93 | 
 94 | def vad_collector(sample_rate, frame_duration_ms, kernel_size, vad, frames):
 95 |   """
 96 |   Filters out non-voiced audio frames.
 97 |   Given a webrtcvad.Vad and a source of audio frames, returns a list
 98 |   of (start, end) timestamps for the voiced audio.
 99 |   Uses a Gaussian filter to smooth the probability of being voiced
100 |   over time.
101 |   
102 |   Arguments:
103 |   sample_rate -- The audio sample rate, in Hz.
104 |   frame_duration_ms -- The frame duration in milliseconds.
105 |   kernel_size -- The number of frames to include in the smoothing per side.
106 |   vad -- An instance of webrtcvad.Vad.
107 |   frames -- a source of audio frames (sequence or generator).
108 |   
109 |   returns -- a list of (start, end) timestamps.
110 |   """
111 |   vad_frames = [(frame, vad.is_speech(frame.bytes, sample_rate))
112 |                 for frame in frames]
113 | 
114 |   kernel = build_gauss_kernel(kernel_size * 2 + 1)
115 |   filtered_vad_frames = []
116 |   for i in range(len(vad_frames)):
117 |     if i < kernel_size:
118 |       tmpKernel = clip_gauss_kernel(kernel, "left", kernel_size - i)
119 |       filtered_vad_frames.append((vad_frames[i][0], sum([x[1] * y
120 |           for x, y in
121 |           zip(vad_frames[i : i+kernel_size+1], tmpKernel)
122 |           ])))
123 |     elif i > len(vad_frames) - kernel_size :
124 |       tmpKernel = clip_gauss_kernel(kernel, "right", 
125 |           kernel_size - (len(vad_frames) - i))
126 |       filtered_vad_frames.append((vad_frames[i][0], sum([x[1] * y
127 |           for x, y in
128 |           zip(vad_frames[i-kernel_size : i+1], tmpKernel)
129 |           ])))
130 |     else:
131 |       filtered_vad_frames.append((vad_frames[i][0], sum([x[1] * y
132 |           for x, y in
133 |           zip(vad_frames[i-kernel_size : i+kernel_size+1], kernel)
134 |           ])))
135 | 
136 |   segments = [(x[0].timestamp, x[0].timestamp+x[0].duration)
137 |       for x in
138 |       filtered_vad_frames if x[1] > 0.5]
139 |   # merge segments when no more than .2 second between them
140 |   newSegments = []
141 |   i = 0
142 |   while i < len(segments)-1:
143 |     startSegment = i
144 |     while i < (len(segments)-1) and segments[i][1] + .2 >= segments[i+1][0]:
145 |       i += 1
146 |     newSegments.append((segments[startSegment][0], segments[i][1]))
147 |     i += 1
148 | 
149 |   # rount to 4 decimal places
150 |   newSegments = [(round(x[0], 4), round(x[1], 4)) for x in newSegments]
151 | 
152 |   return newSegments
153 | 
154 | def run(file, aggressiveness, invert=False):
155 |   """
156 |   Given a file path, aggressiveness, and invert flag, returns a list of
157 |   (start, end) timestamps for the voiced audio.
158 | 
159 |   file: path to the audio file
160 |   aggressiveness: aggressiveness of the VAD
161 |   invert: if True, returns a list of (start, end) timestamps
162 |           for the non-voiced audio
163 |   """
164 |   audio, sample_rate = read_audio(file)
165 |   vad = webrtcvad.Vad(aggressiveness)
166 |   frames = frame_generator(30, audio, sample_rate)
167 |   frames = list(frames)
168 |   segments = vad_collector(sample_rate, 30, KERN_SIZE, vad, frames)  
169 |   cuts = segments
170 | 
171 |   if invert:
172 |     video = cv2.VideoCapture(file)
173 |     fps = video.get(cv2.CAP_PROP_FPS)
174 |     frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
175 |     duration = frame_count / fps
176 |     cuts = []
177 |     if segments[0][0] > 0:
178 |       cuts.append((0, segments[0][0]))
179 |     for j in range(len(segments) - 1):
180 |       cuts.append((segments[j][1], segments[j + 1][0]))
181 |     if segments[-1][1] < duration:
182 |       cuts.append((segments[-1][1], duration))
183 |   
184 |   return cuts


--------------------------------------------------------------------------------
/src/lecturecut.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import atexit
  5 | import multiprocessing
  6 | import os
  7 | import textwrap
  8 | import time
  9 | import uuid
 10 | from itertools import takewhile
 11 | 
 12 | import ffmpeg
 13 | from joblib import Parallel, delayed
 14 | 
 15 | import rich
 16 | from rich.console import Group
 17 | from rich.live import Live
 18 | from rich.align import Align
 19 | from rich.progress import (
 20 |     MofNCompleteColumn,
 21 |     BarColumn,
 22 |     Progress,
 23 |     TextColumn,
 24 |     TimeRemainingColumn,
 25 |     TimeElapsedColumn,
 26 | )
 27 | 
 28 | import vad
 29 | from helper import delete_directory_recursively, read_progress, get_video_length
 30 | from stats import print_stats
 31 | 
 32 | N_CORES = multiprocessing.cpu_count()
 33 | PROCESSES = N_CORES // 4
 34 | 
 35 | # TODO: use pathlib
 36 | CACHE_PREFIX = "./" # needs to end with a slash 
 37 | 
 38 | instances = {}
 39 | 
 40 | def init_cache(instance):
 41 |   """
 42 |   Create a cache directory for the given instance.
 43 |   The cache directory is used to store temporary files.
 44 | 
 45 |   instance -- the instance id
 46 |   """
 47 |   cache_path = CACHE_PREFIX + f"{instance}/"
 48 |   if os.path.exists(cache_path):
 49 |     raise Exception("Cache already exists")
 50 |   os.mkdir(cache_path)
 51 |   os.mkdir(cache_path + "/segments")
 52 |   os.mkdir(cache_path + "/cutSegments")
 53 | 
 54 | def cleanup(instance):
 55 |   """
 56 |   Delete the cache directory for the given instance.
 57 | 
 58 |   instance -- the instance id
 59 |   """
 60 |   cache_path = CACHE_PREFIX + f"/{instance}/"
 61 |   delete_directory_recursively(cache_path)
 62 | 
 63 | def generate_cut_list(instance):
 64 |   """
 65 |   Generate a list of segments that should not be cut out of the video.
 66 |   The list is stored in the instances dictionary.
 67 | 
 68 |   instance -- the instance id
 69 |   """
 70 |   global instances
 71 |   file = instances[instance]["file"]
 72 |   instances[instance]["cuts"] = vad.run(file, aggressiveness, invert)
 73 | 
 74 | def prepare_video(progress, instance):
 75 |   """
 76 |   Prepare the video for cutting.
 77 |   This includes segmenting the video and analysing the segments.
 78 | 
 79 |   progress -- the manager for the progress bars
 80 |   instance -- the instance id
 81 |   """
 82 |   _split_video(progress, instance)
 83 |   _analyse_segments(progress, instance)
 84 | 
 85 | def _split_video(progress, instance):
 86 |   """
 87 |   Split the video into segments based on keyframes.
 88 | 
 89 |   progress -- the manager for the progress bars
 90 |   instance -- the instance id
 91 |   """
 92 |   cache_path = CACHE_PREFIX + f"/{instance}/"
 93 |   file = instances[instance]["file"]
 94 | 
 95 |   total_input_length = get_video_length(file)
 96 |   bar_total = int(total_input_length * 1000)
 97 | 
 98 |   pbar = progress.add_task("[magenta]Segmenting", total=bar_total)
 99 | 
100 |   split = (
101 |     ffmpeg
102 |     .input(file)
103 |     .output(cache_path + "segments/out%05d.ts",
104 |         f="segment",
105 |         c="copy",
106 |         reset_timestamps=1)
107 |     .global_args("-progress", "pipe:1")
108 |     .global_args("-loglevel", "error")
109 |     .global_args("-hide_banner")
110 |     .global_args("-nostdin")
111 |     .run_async(pipe_stdout=True, pipe_stderr=True)
112 |   )
113 |   read_progress(progress, pbar, split)
114 | 
115 | def _analyse_segments(progress, instance):
116 |   """
117 |   Analyse the length of each segment of the video.
118 | 
119 |   progress -- the manager for the progress bars
120 |   instance -- the instance id
121 |   """
122 |   global instances
123 |   instances[instance]["segments"] = {}
124 |   cache_path = CACHE_PREFIX + f"/{instance}/"
125 |   
126 |   segments = sorted(os.listdir(cache_path + "segments"))
127 | 
128 |   pbar = progress.add_task("[magenta]Analysing", total=len(segments))
129 | 
130 |   durations = Parallel(n_jobs=PROCESSES)(
131 |       delayed(get_video_length)
132 |       (f"{cache_path}segments/{path}", progress, pbar)
133 |       for path in segments)
134 |   # calculate start end map
135 |   total_duration = 0
136 |   for i, duration in enumerate(durations):
137 |     instances[instance]["segments"][i] = {
138 |       "start": total_duration,
139 |       "end": total_duration + duration,
140 |     }
141 |     total_duration += duration
142 | 
143 | 
144 | def transcode(progress, instance):
145 |   """
146 |   Transcode the video.
147 | 
148 |   progress -- the manager for the progress bars
149 |   instance -- the instance id
150 |   """
151 |   global instances
152 | 
153 |   cache_path = CACHE_PREFIX + f"/{instance}/"
154 |   segments = instances[instance]["segments"]
155 |   cuts = instances[instance]["cuts"]
156 | 
157 |   pbar = progress.add_task("[magenta]Transcoding", total=len(segments))
158 | 
159 |   def _process_segment(i):
160 |     """
161 |     Process a single segment.
162 | 
163 |     i -- the segment number
164 |     """
165 |     # cats are segments that need to be kept
166 |     segment = segments[i]
167 | 
168 |     # find id of first cut ending after segment start
169 |     first_cut_id, first_cut = next((x for x in enumerate(cuts)
170 |         if x[1][1] > segment["start"]), (-1, None))
171 | 
172 |     # skip segment if it ends before the current cut starts
173 |     if first_cut == None or first_cut[0] >= segment["end"]:
174 |       progress.update(pbar, advance=1)
175 |       return
176 | 
177 |     # if completely enclosed by a cut, copy
178 |     if first_cut[0] <= segment["start"] and first_cut[1] >= segment["end"]:
179 |       os.rename(f"{cache_path}segments/out{i:05d}.ts",
180 |           f"{cache_path}cutSegments/out{i:05d}.ts")
181 |       progress.update(pbar, advance=1)
182 |       return
183 | 
184 |     # find all cuts that start before segment end
185 |     cuts_in_segment = list(takewhile(lambda x: x[0] < segment["end"],
186 |         cuts[first_cut_id+1:]))
187 |     all_cuts = [first_cut] + cuts_in_segment
188 | 
189 |     keep = []
190 |     for cut in all_cuts:
191 |       start = max(segment["start"], cut[0])
192 |       end = min(segment["end"], cut[1])
193 |       keep.append((start, end))
194 | 
195 |     # filter keep list to remove segments that are too short
196 |     keep = [x for x in keep if x[1] - x[0] > 0.1]
197 | 
198 |     # convert keep list from global time to segment time
199 |     keep = [(x[0] - segment["start"], x[1] - segment["start"]) for x in keep]
200 | 
201 |     for j,trim in enumerate(keep):
202 |       # only transcode when a new keyframe needs to be calculated
203 |       # otherwise just cut P and B frames
204 |       # TODO: check if this results in a quality loss
205 |       #       assuming that a P frame that is kept referenced a B frame
206 |       #       that was cut, might result in the P frame losing its reference
207 |       #       and thus (to me) unknown behaviour
208 |       if (trim[0] == 0):
209 |         (
210 |           ffmpeg
211 |           .input(f"{cache_path}segments/out{i:05d}.ts")
212 |           .output(f"{cache_path}cutSegments/out{i:05d}_{j:03d}.ts",
213 |               f="mpegts",
214 |               to=round(trim[1], 5),
215 |               codec="copy")
216 |           .global_args("-loglevel", "error")
217 |           .global_args("-hide_banner")
218 |           .global_args("-nostdin")
219 |           .run()
220 |         )
221 |       else:
222 |         (
223 |           ffmpeg
224 |           .input(f"{cache_path}segments/out{i:05d}.ts")
225 |           .output(f"{cache_path}cutSegments/out{i:05d}_{j:03d}.ts",
226 |               f="mpegts",
227 |               ss=round(trim[0], 5),
228 |               to=round(trim[1], 5),
229 |               acodec="copy",
230 |               vcodec="libx264",
231 |               preset="fast",
232 |               crf=quality,
233 |               reset_timestamps=1,
234 |               force_key_frames=0)
235 |           .global_args("-loglevel", "error")
236 |           .global_args("-hide_banner")
237 |           .global_args("-nostdin")
238 |           .run()
239 |         )
240 |     progress.update(pbar, advance=1)
241 |   Parallel(n_jobs=PROCESSES, require="sharedmem")(
242 |       delayed(_process_segment)
243 |       (i)
244 |       for i in segments)
245 | 
246 | 
247 | def concat_segments(progress, instance):
248 |   """
249 |   Concatenate the segments into a single video.
250 | 
251 |   progress -- the manager for the progress bars
252 |   instance -- the instance id
253 |   """
254 |   cache_path = f"{CACHE_PREFIX}{instance}/"
255 |   output = instances[instance]["output"]
256 |   with open(f"{cache_path}list.txt", "w") as f:
257 |     for file in sorted(os.listdir(f"{cache_path}cutSegments")):
258 |       f.write(f"file 'cutSegments/{file}'\n")
259 |   total_cut_length = sum([x[1] - x[0] for x in instances[instance]["cuts"]])
260 |   bar_total = int(total_cut_length * 1000)
261 |   
262 |   pbar = progress.add_task("[magenta]Rendering", total=bar_total)
263 |   outputargs = {}
264 |   if reencode:
265 |     outputargs = {
266 |       "vcodec": "libx264",
267 |       "preset": "fast",
268 |       "crf": quality,
269 |       "acodec": "aac",
270 |     }
271 |   else:
272 |     outputargs = {
273 |       "c": "copy",
274 |     }
275 |   concat = (
276 |     ffmpeg
277 |     .input(f"{cache_path}list.txt", f="concat", safe=0)
278 |     .output(output, **outputargs)
279 |     .global_args("-progress", "pipe:1")
280 |     .global_args("-loglevel", "error")
281 |     .global_args("-hide_banner")
282 |     .global_args("-nostdin")
283 |     .run_async(pipe_stdout=True, pipe_stderr=True)
284 |   )
285 |   read_progress(progress, pbar, concat)
286 | 
287 | def generate_progress_instance():
288 |   return Progress(
289 |     TextColumn("{task.description}", justify="right"),
290 |     BarColumn(bar_width=None),
291 |     TextColumn("[progress.percentage]{task.percentage:>3.1f}%", justify="right"),
292 |     "•",
293 |     TimeRemainingColumn(),
294 |     "•",
295 |     TimeElapsedColumn(),
296 |     transient=True,
297 |   )
298 | 
299 | def run(progress, config):
300 |   """
301 |   Run the program on a single instance.
302 | 
303 |   progress -- the manager for the progress bars
304 |   config -- the config for the instance
305 |   """
306 |   global instances
307 | 
308 |   rich.print(f"Input:  [yellow]{config['file']}[/yellow]")
309 |   rich.print(f"Output: [yellow]{config['output']}[/yellow]\n")
310 | 
311 |   instance = str(uuid.uuid4())
312 |   instances[instance] = {
313 |     "file": None,
314 |     "output": None,
315 |   }
316 |   for key in config:
317 |     instances[instance][key] = config[key]
318 | 
319 |   init_cache(instance)
320 |   Parallel(n_jobs=2, require="sharedmem")([
321 |       delayed(generate_cut_list)(instance),
322 |       delayed(prepare_video)(progress, instance)])
323 |   transcode(progress, instance)
324 |   concat_segments(progress, instance)
325 |   cleanup(instance)
326 | 
327 | 
328 | invert = False
329 | quality = 20
330 | aggressiveness = 3
331 | reencode = False
332 | 
333 | def parse_args():
334 |   """
335 |   Parse the command line arguments.
336 |   """
337 |   global invert, quality, aggressiveness, reencode
338 |   parser = argparse.ArgumentParser(description=textwrap.dedent("""
339 |     LectureCut is a tool to remove silence from videos.
340 | 
341 |     It uses WebRTC's VAD to detect silence and ffmpeg to transcode the video.
342 |     To speed up transcoding, a form of smart encoding is employed. This means
343 |     that the video is split into segments and only the segments that need to be
344 |     cut are transcoded. This results in a much faster transcoding process, but
345 |     the output video will have a slightly lower quality than the input video.
346 |   """))
347 | 
348 |   parser.add_argument(
349 |       "-i", "--input",
350 |       help="The video file to process",
351 |       required=True)
352 |   parser.add_argument(
353 |       "-o", "--output",
354 |       help="The output file. If not specified,"+\
355 |           " the input file will be overwritten",
356 |       required=False)
357 |   parser.add_argument(
358 |       "-q", "--quality",
359 |       help="The quality of the output video. Lower is better. Default: 20",
360 |       required=False,
361 |       type=int,
362 |       default=20)
363 |   parser.add_argument(
364 |       "-a", "--aggressiveness",
365 |       help="The aggressiveness of the VAD."+\
366 |           " Higher is more aggressive. Default: 3",
367 |       required=False,
368 |       type=int,
369 |       default=3)
370 |   parser.add_argument(
371 |       "-r", "--reencode",
372 |       help="Reencode the video with a given video codec.",
373 |       required=False,
374 |       type=str)
375 |   parser.add_argument(
376 |       "--invert",
377 |       help="Invert the selection."+\
378 |           " This will cut out all segments that are not silence.",
379 |       required=False,
380 |       action="store_true")
381 | 
382 |   args = parser.parse_args()
383 | 
384 |   if args.invert:
385 |     invert = True
386 |   if args.quality:
387 |     quality = args.quality
388 |   if args.aggressiveness:
389 |     aggressiveness = args.aggressiveness
390 |   if args.reencode:
391 |     reencode = args.reencode
392 | 
393 |   if args.invert and not args.aggressiveness:
394 |     aggressiveness = 1
395 | 
396 |   return args
397 | 
398 | def greetings():
399 |   """
400 |   Create a manager for the progress bars.
401 |   """
402 | 
403 |   title = "██╗    ███████╗ ██████╗████████╗██╗   ██╗███████╗███████╗    ██████╗██╗  ██╗████████╗\n" + \
404 |           "██║    ██╔════╝██╔════╝╚══██╔══╝██║   ██║██╔══██║██╔════╝   ██╔════╝██║  ██║╚══██╔══╝\n" + \
405 |           "██║    █████╗  ██║        ██║   ██║   ██║██████╔╝█████╗     ██║     ██║  ██║   ██║\n" + \
406 |           "██║    ██╔══╝  ██║        ██║   ██║   ██║██╔══██╗██╔══╝     ██║     ██║  ██║   ██║\n" + \
407 |           "██████╗███████╗╚██████╗   ██║   ╚██████╔╝██║  ██║███████╗   ╚██████╗╚█████╔╝   ██║\n" + \
408 |           "╚═════╝╚══════╝ ╚═════╝   ╚═╝    ╚═════╝ ╚═╝  ╚═╝╚══════╝    ╚═════╝ ╚════╝    ╚═╝"
409 |   rich.print()
410 |   title = Align(title, align="center")
411 |   rich.print(title)
412 |   subtitle = "[link=https://github.com/Gamer92000/LectureCut]Source Code[/link] - Made with ❤️ by [link=https://github.com/Gamer92000]Gamer92000[/link]"
413 |   subtitle = Align(subtitle, align="center")
414 |   rich.print(subtitle)
415 | 
416 | def get_automatic_name_insert():
417 |   """
418 |   Get the automatic name insert for the output file.
419 |   """
420 |   automatic_name_insert = "_lecturecut."
421 | 
422 |   if invert:
423 |     automatic_name_insert = "_inverted" + automatic_name_insert
424 | 
425 |   return automatic_name_insert
426 | 
427 | def process_files_in_dir(args):
428 |   get_file_path = lambda x: x
429 |   if args.output:
430 |     if not os.path.isdir(args.output):
431 |       os.mkdir(args.output)
432 |     get_file_path = lambda x: os.path.join(args.output, os.path.basename(x))
433 |   else:
434 |     get_file_path = lambda x: os.path.splitext(os.path.basename(x))[0] +\
435 |         get_automatic_name_insert() +\
436 |         x.rsplit(".", 1)[1]
437 | 
438 |   files = sorted(os.listdir(args.input))
439 |   files = [f for f in files if os.path.isfile(os.path.join(args.input, f))]
440 |   files = [os.path.join(args.input, f) for f in files]
441 |   # TODO: Check if files are actually videos
442 |   files = [(x, get_file_path(x)) for x in files]
443 |   
444 |   file_progress = Progress(
445 |       "[progress.description]{task.description}",
446 |       BarColumn(bar_width=None),
447 |       MofNCompleteColumn(),
448 |       "•",
449 |       TimeElapsedColumn(),
450 |       transient=True,
451 |   )
452 | 
453 |   group = Group(file_progress)
454 | 
455 |   start = time.perf_counter()
456 |   with Live(group):
457 |     pbar = file_progress.add_task("[yellow]Videos", total=len(files))
458 | 
459 |     for input_file, output_file in files:
460 |       prog = generate_progress_instance()
461 |       group.renderables.insert(0, prog)
462 |       run(prog, {
463 |         "file": input_file,
464 |         "output": output_file
465 |       })
466 |       file_progress.update(pbar, advance=1)
467 |       group.renderables.remove(prog)
468 |       rich.print(prog)
469 |       rich.print()
470 |   
471 |     group.renderables.remove(file_progress)
472 | 
473 |   end = time.perf_counter()
474 | 
475 |   print_stats(files, end - start)
476 | 
477 | def main():
478 |   """
479 |   Main function.
480 |   """
481 |   args = parse_args()
482 |   greetings()
483 | 
484 |   # because windows is seemingly designed by a 5 year old
485 |   # we need to replace trailing double quotes with a backslash
486 |   # ( see https://bugs.python.org/msg364246 )
487 |   args.input = args.input.replace('"', '\\')
488 | 
489 |   if os.path.isdir(args.input):
490 |     process_files_in_dir(args)
491 |   else:
492 |     if args.output == None:
493 |       args.output = args.input.rsplit(".", 1)[0] +\
494 |         get_automatic_name_insert() +\
495 |         args.input.rsplit(".", 1)[1]
496 | 
497 |     start = time.perf_counter()
498 |     with generate_progress_instance() as progress:
499 |       run(progress, {
500 |         "file": args.input,
501 |         "output": args.output
502 |       })
503 |       end = time.perf_counter()
504 | 
505 |     print_stats([(args.input, args.output)], end - start)
506 | 
507 | def shotdown_cleanup():
508 |   """
509 |   Cleanup function that is called when the program is terminated.
510 |   """
511 |   if (len(instances) <= 0):
512 |     return
513 |   rich.print()
514 |   rich.print("[red]Cleaning up after unexpected exit...")
515 |   # sleep to make sure open file handles are closed
516 |   time.sleep(3)
517 |   for instance in instances:
518 |     cachePath = f"{CACHE_PREFIX}{instance}/"
519 |     if os.path.isdir(cachePath):
520 |       delete_directory_recursively(cachePath)
521 | 
522 | if __name__ == "__main__":
523 |   atexit.register(shotdown_cleanup)
524 |   main()
525 |   atexit.unregister(shotdown_cleanup)
526 | 


--------------------------------------------------------------------------------