├── start.py ├── .github └── workflows │ └── publish_image.yml ├── boilest kubernetes.yml ├── dockerfile ├── .gitignore ├── README.md └── tasks.py /start.py: -------------------------------------------------------------------------------- 1 | from tasks import locate_files 2 | locate_files('farts') 3 | -------------------------------------------------------------------------------- /.github/workflows/publish_image.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker Image to GHCR 2 | 3 | on: 4 | push 5 | 6 | jobs: 7 | build_and_push: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | - name: Build and push the image 12 | run: | 13 | docker login --username goingoffroading --password ${{ secrets.GH_PAT }} ghcr.io 14 | docker build . --tag ghcr.io/goingoffroading/boilest-worker:latest 15 | docker push ghcr.io/goingoffroading/boilest-worker:latest -------------------------------------------------------------------------------- /boilest kubernetes.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Kubernetes deployment example using a Daemonset and labeled nodes. 3 | # Label a node 'boilest':'worker' and Boilest will automatically deploy to it 4 | # Respository here is Azure Container Registry (ARC) so 5 | kind: Daemonset 6 | apiVersion: apps/v1 7 | metadata: 8 | name: boilest 9 | labels: 10 | app: boilest 11 | boil: worker 12 | spec: 13 | replicas: 1 14 | strategy: 15 | type: Recreate 16 | selector: 17 | matchLabels: 18 | app: boilest 19 | template: 20 | metadata: 21 | labels: 22 | app: boilest 23 | spec: 24 | containers: 25 | - name: boilest 26 | image: ghcr.io/goingoffroading/boilest-worker:latest 27 | imagePullPolicy: Always 28 | volumeMounts: 29 | - name: boilestmedia 30 | mountPath: "/boil_watch" 31 | nodeSelector: 32 | boilest: worker 33 | nodeName: node101-desktop 34 | volumes: 35 | - name: boilestmedia 36 | nfs: 37 | server: SERVER_IP_HERE 38 | path: "/SERVER/PATH/HERE" -------------------------------------------------------------------------------- /dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Python image based on Alpine 2 | FROM python:3.9-alpine 3 | 4 | # Install dependencies and supervisor 5 | RUN apk update && \ 6 | apk add --no-cache \ 7 | build-base \ 8 | linux-headers \ 9 | supervisor \ 10 | ffmpeg && \ 11 | pip install --no-cache-dir celery requests mysql-connector-python && \ 12 | apk upgrade 13 | 14 | # Create a non-root user and group 15 | ARG UID=1000 16 | ARG GID=1000 17 | RUN addgroup -g $GID appgroup && \ 18 | adduser -D -u $UID -G appgroup appuser 19 | 20 | # Create additional directories without setting ownership 21 | RUN mkdir -p /tv /anime /moviles /boil_hold 22 | 23 | # Create application directory and set ownership 24 | WORKDIR /app 25 | COPY . /app 26 | RUN chown -R appuser:appgroup /app /boil_hold 27 | 28 | # Create log directory and set ownership 29 | RUN mkdir -p /app/logs && \ 30 | chown -R appuser:appgroup /app/logs 31 | 32 | # Environment variables 33 | ENV TZ=US/Pacific 34 | 35 | # Used in celery and rabbitmq 36 | ENV user celery 37 | ENV password celery 38 | ENV celery_host 192.168.1.110 39 | ENV celery_port 31672 40 | ENV celery_vhost celery 41 | ENV rabbitmq_host 192.168.1.110 42 | ENV rabbitmq_port 32311 43 | 44 | # Used in celery and rabbitmq 45 | ENV sql_host 192.168.1.110 46 | ENV sql_port 32053 47 | ENV sql_database boilest 48 | ENV sql_user boilest 49 | ENV sql_pswd boilest 50 | 51 | # Run as non-root user 52 | USER appuser 53 | 54 | # Start supervisord 55 | CMD ["celery", "-A", "tasks", "worker"] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Abstraction 2 | 3 | Boilest is my solution to: 4 | 5 | - Having video media in lots of different formats, but wanting to consolidate it into one format 6 | - Wanting my video content to consume less space in my NAS 7 | - Wanting to do this work at scale 8 | 9 | --- 10 | # Why 'Boilest'? 11 | 12 | Because I am terrible at naming things, and it was the first agreeable thing to come out of a random name generator. 13 | 14 | --- 15 | # What about Tdarr, Unmanic, or other existing distributed solutions?? 16 | 17 | [Tdarr](https://home.tdarr.io/) is a great platform, but didn't setup or scale as well as I would have liked. I also found it comfortably to under documented, closed source, had some design oddities, and hid features behind a paywall. 18 | 19 | As frenzied as Tdarr fans are on Reddit, I just can't commit/subscribe to a service like that. 20 | 21 | [Unmanic](https://github.com/Unmanic/unmanic/tree/master) is magic... I am a big fan, and Unmanic is comfortably the inspiration of this project. 22 | 23 | I would be using Unmanic today, instead of writing spaghetti code, but Josh5 had previously [hardcoded the platform on an older version of FFmpeg](https://github.com/Unmanic/unmanic/blob/master/docker/Dockerfile#L82), doesn't currently support AV1, [has some complexities to build the container](https://github.com/Unmanic/unmanic/blob/master/docker/README.md) that make it difficult to code my own support, and doesn't seem to be keeping up on the repo or accepting PRs. 24 | 25 | --- 26 | # Why not Handbrake? 27 | 28 | Handbrake is awesome, but: 29 | 30 | - It's not distributed/doesn't scale past the node with the GUI open 31 | - It's 'watch folder' functionality doesn't do any file checking, sorting, or filtering to decide if it should actually process a file 32 | - Does not have the functionality for monitoring an existing media collection 33 | 34 | --- 35 | # How does Boilest work? 36 | 37 | - Boilest kicks off a job that searches directories for video files 38 | - Boilest then checks each individual video file to see if the various codecs match a spec. In this step, Boilest will also prioritize files that have the highest ROI for encoding (large media, x264, mp4, etc) first as to not waste time with diminishing returns (changing small, x265, mkv files) up front. If any of the codecs don't match spec, the file is dispatched for encoding. 39 | - If it is determined from the above step that encoding is required, the file undergoes a series of validations. Assuming the file passes those validations, the file is encoded. The output encoded file is then also validated. If the output encoded file passes validations, it replaces the original file. 40 | - Once encoding is complete, the results are stored in a DB for stats. 41 | 42 | --- 43 | # What will Boilest change? 44 | 45 | In any given media file: 46 | 47 | | Area | Target Change | 48 | |------|---------------| 49 | | Container | Media containers that are not MKR (like MP4) are changed to MKV 50 | | Video | Video streams that are not AV1 are encoded to AV1 51 | | Audio | No changes to audio streams at this time. Audio streams are copied. 52 | | Subtitles | No changes to subtitle streams at this time. subtitle streams are copied. 53 | | Attachments | No changes to Attachments at this time. Attachments are copied. 54 | 55 | Once I make some final decisions around what is optimal for TV/device streaming, there will become targets to audio, subtitles, and attachments. 56 | 57 | --- 58 | # Prerequisites 59 | 60 | --- 61 | ## RabbitMQ 62 | 63 | The backbone of Boilest is a distributed task Python library called [Celery](https://docs.celeryq.dev/en/stable/getting-started/introduction.html). Celery needs a message transport (a place to store the task queue), and we leverage RabbitMQ for that. 64 | 65 | RabbitMQ will need to be deployed with it's management plugin. 66 | 67 | From the management plugin: 68 | 69 | - Create a 'celery' vhost 70 | - Create a user with the user/pwd of celery/celery 71 | - Give the celery .* configure, write, read permissions in the celery vhost 72 | 73 | --- 74 | ## MariaDB 75 | 76 | Technically, the workflow works fine (at this time) without access to MariaDB (mysql). MariaDB is where the results of the encoding are tracked. If Maria is not deployed, the final task will fail, and this will only be noticeable in the logs. 77 | 78 | In Maria, create a database called 'boilest'. 79 | 80 | In the 'boilest' database, create a table called 'ffmpeghistory' with the following columns: 81 | 82 | | Column Name | Type | 83 | |--------------------------|---------------------------------| 84 | | unique_identifier | varchar(100) | 85 | | recorded_date | datetime | 86 | | file_name | varchar(100) | 87 | | file_path | varchar(100) | 88 | | config_name | varchar(100) | 89 | | new_file_size | int(11) | 90 | | new_file_size_difference | int(11) | 91 | | old_file_size | int(11) | 92 | | watch_folder | varchar(100) | 93 | | ffmpeg_encoding_string | varchar(1000) | 94 | 95 | In a future iteration, I'll include a python script that populates database and table into Maria automatically. 96 | 97 | --- 98 | # How to deploy 99 | 100 | - Create your deployment (Docker/Kubernetes/etc) with the ghcr.io/goingoffroading/boilest-worker:latest container image. 101 | - Change the container variables to reflect your environment: 102 | 103 | | ENV | Default Value | Notes | 104 | |---------------------------------|-------------------------|-----------------------------------------------------| 105 | | celery_user | celery | The user setup for Celery in your RabbitMQ | 106 | | celery_password | celery | The password setup for Celery in your RabbitMQ | 107 | | celery_host | 192.168.1.110 | The IP address of RabbitMQ | 108 | | celery_port | 31672 | The port RabbitMQ's port 5672 or 5673 are mapped to | 109 | | celery_vhost | celery | The RabbitMQ vhost setup for Boilest | 110 | | rabbitmq_host | 192.168.1.110 | The IP address of RabbitMQ management UI | 111 | | rabbitmq_port | 32311 | The port of RabbitMQ management UI | 112 | | sql_host | 192.168.1.110 | The IP address of MariaDB | 113 | | sql_port | 32053 | The port mapped to MariaDB's port 3306 | 114 | | sql_database | boilest | The database name setup for Boilest | 115 | | sql_user | boilest | The username setup for Boilest | 116 | | sql_pswd | boilest | The password setup for Boilest | 117 | 118 | - Deploy the container. 119 | - SSH into any one of the containers and run 'python start.sh'. This will kick off all of the workflows. 120 | 121 | Done. 122 | 123 | - See 'boilest_kubernetes.yml' for an example of a Kubernetes deployment 124 | 125 | --- 126 | # How to start the Boilest/video encoding workflow 127 | Either: 128 | - Deploy the [Boilest Management GUI](https://github.com/GoingOffRoading/Boilest_Manager_GUI) container and either wait for the cron, or SSH into the container and start start.py 129 | - SSH into one of the Boilest-Worker containers and run start.py: 130 | 131 | In both SSH cases, literally run 132 | ``` 133 | python start.py 134 | ``` 135 | SSH in Kuberentes is: 136 | 137 | ``` 138 | kubectl exec -it (your pod name) -- /bin/sh 139 | ``` 140 | 141 | SSH in Docker is: 142 | 143 | ``` 144 | docker exec -it (container ID) /bin/sh 145 | ``` 146 | Starting the workflow only needs to be done once from any one of the relevant containers. That start will trickle into the other containers via the RabbitMQ broker. 147 | 148 | --- 149 | # Q&A 150 | 151 | * If Celery can use Redis or RabbitMQ for it's message transport, can Boilest use Redis? 152 | 153 | Not in Boilest's current state, and probably never. Redis doesn't 'support' prioritization of messages technically at all or as well as rabbit does. Boilest currently uses RabbitMQ's prioritization of messages to encode the video files with the highest ROI for encoding time. 154 | 155 | 156 | --- 157 | # Todo List 158 | 159 | - [x] Setup a set_priority function for ffprobe based on container, file size, and video codec (I.E. the things that have the greatest impact on ROI) 160 | - [x] Setup the function to write the results to the DB 161 | - [x] Replace the prints with logging 162 | - [ ] Made decisions on audio codec 163 | - [ ] Make decisions on subtitle codec 164 | - [ ] Research ffprobe flags for HDR content 165 | - [x] Figure out how to pass the watch folder forward for the SQL write 166 | - [x] Figure out how to pass the ffmpeg string forward for the SQL write 167 | - [x] Stand up repo for management UI 168 | - [ ] Make tweaks to the prioritization scoring 169 | - [ ] Create a 'create database, table' script 170 | - [ ] Having write_results be it's own task is stupid. Incorporate it into process_ffmpeg. 171 | - [ ] Tasks.py is stupidly big. Break it up into different files for readability/management. 172 | - [ ] Revisit string formatting i.e. f"Name: {name}, Age: {age}" instead of name + ", Age:" + str(age) 173 | - [ ] Explore using the Pydantic Model 174 | - [ ] Remove hard-coding related 175 | - [ ] Move UniqueID in the SQL to a GUID 176 | - [ ] Explore using pathlib instead of OS 177 | - [x] Remove the archive 178 | - [ ] Some day... Remove the celery task function for write_results 179 | - [x] Consider moving queue_workers_if_queue_empty to the manager container -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from celery import Celery 2 | import json, os, logging, subprocess, shutil, mysql.connector 3 | from mysql.connector import Error 4 | from datetime import datetime 5 | 6 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 7 | # >>>>>>>>>>>>>>> Celery Configurations >>>>>>>>>>>>>>> 8 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 9 | 10 | # create logger 11 | logger = logging.getLogger('boilest_logs') 12 | logger.setLevel(logging.INFO) 13 | 14 | # create console handler and set level to debug 15 | ch = logging.StreamHandler() 16 | ch.setLevel(logging.DEBUG) 17 | 18 | # create formatter 19 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 20 | 21 | # add formatter to ch 22 | ch.setFormatter(formatter) 23 | 24 | # add ch to logger 25 | logger.addHandler(ch) 26 | 27 | 28 | def celery_url_path(thing): 29 | # https://docs.celeryq.dev/en/stable/getting-started/first-steps-with-celery.html#keeping-results 30 | celery_user = os.environ.get('user', 'celery') 31 | celery_password = os.environ.get('password', 'celery') 32 | celery_host = os.environ.get('celery_host', '192.168.1.110') 33 | celery_port = os.environ.get('celery_port', '31672') 34 | celery_vhost = os.environ.get('celery_vhost', 'celery') 35 | thing = thing + celery_user + ':' + celery_password + '@' + celery_host + ':' + celery_port + '/' + celery_vhost 36 | logger.debug('celery_url_path is: ' + thing) 37 | return thing 38 | 39 | app = Celery('worker_queue', broker = celery_url_path('amqp://') ) 40 | 41 | 42 | app.conf.task_default_queue = 'worker_queue' 43 | app.conf.worker_concurrency = 1 44 | app.conf.worker_prefetch_multiplier=1 45 | 46 | app.conf.task_queues = { 47 | 'worker_queue': { 48 | 'exchange': 'tasks', 49 | 'exchange_type': 'direct', 50 | 'routing_key': 'worker_queue', 51 | 'queue_arguments': {'x-max-priority': 10}, 52 | } 53 | } 54 | 55 | app.conf.task_routes = { 56 | 'locate_files': {'queue': 'worker_queue'}, 57 | 'requires_encoding': {'queue': 'worker_queue'}, 58 | 'process_ffmpeg': {'queue': 'worker_queue'} 59 | } 60 | 61 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 62 | # This section starts the discovery of the files by searching directories 63 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 64 | 65 | @app.task 66 | def locate_files(arg): 67 | directories = ['/anime', '/tv', '/movies'] 68 | extensions = ['.mp4', '.mkv', '.avi'] 69 | 70 | logger.info(f'Searching directories: {directories}') 71 | logger.info(f'File extensions: {extensions}') 72 | 73 | for file_located in find_files(directories, extensions): 74 | logger.debug('File located, sending to ffprobe function') 75 | try: 76 | file_located_data = json.loads(file_located) 77 | logger.debug(json.dumps(file_located_data, indent=3, sort_keys=True)) 78 | # >>>>>>>>>>><<<<<<<<<<<<<<<< 79 | # >>>>>>>>>>><<<<<<<<<<<<<<<< 80 | requires_encoding.apply_async(kwargs={'file_located_data': file_located_data}, priority=1) 81 | # >>>>>>>>>>><<<<<<<<<<<<<<<< 82 | # >>>>>>>>>>><<<<<<<<<<<<<<<< 83 | except json.JSONDecodeError as e: 84 | logger.error(f'Failed to decode JSON: {e}') 85 | continue 86 | 87 | def find_files(directories, extensions): 88 | for directory in directories: 89 | logger.info ('Scanning: ' + directory) 90 | for root, dirs, files in os.walk(directory): 91 | for file in files: 92 | for ext in extensions: 93 | if file.lower().endswith(ext.lower()): 94 | file_path = os.path.join(root, file) 95 | result_dict = { 96 | 'directory': directory, 97 | 'root': root, 98 | 'file': file, 99 | 'file_path': file_path, 100 | 'extension': ext 101 | } 102 | yield json.dumps(result_dict) 103 | 104 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 105 | # This section starts the discovery of the file meta data, and determing what processing may need to occure 106 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 107 | 108 | @app.task 109 | def requires_encoding(file_located_data): 110 | stream_info = ffprobe_function(file_located_data['file_path']) 111 | encoding_decision = False 112 | old_file_size = file_size_kb(file_located_data['file_path']) 113 | file_name = file_located_data['file'] 114 | logger.info(file_name + ' started requires_encoding') 115 | processing_priority = get_ffmpeg_processing_priority(old_file_size,stream_info) 116 | logger.debug('processing_priority is: ' + str(processing_priority)) 117 | encoding_decision, ffmepg_output_file_name = check_container_type(stream_info, encoding_decision, file_located_data['file']) 118 | encoding_decision, ffmpeg_command = check_codecs(stream_info, encoding_decision) 119 | if encoding_decision == True: 120 | logger.info (file_located_data['file'] + ' requires encoding') 121 | file_located_data['ffmpeg_command'] = ffmpeg_command 122 | file_located_data['ffmepg_output_file_name'] = ffmepg_output_file_name 123 | file_located_data['old_file_size'] = file_size_kb(file_located_data['file_path']) 124 | logger.debug(json.dumps(file_located_data, indent=4)) 125 | process_ffmpeg.apply_async(kwargs={'file_located_data': file_located_data}, priority=processing_priority) 126 | else: 127 | logger.debug ('file does not need encoding') 128 | logger.debug (encoding_decision) 129 | logger.debug (ffmpeg_command) 130 | logger.info(file_name + ' ended requires_encoding') 131 | 132 | 133 | def get_ffmpeg_processing_priority(old_file_size,stream_info): 134 | priority = 10 135 | adjustments_for_file_size = adjust_priority_based_on_filesize_f(old_file_size) 136 | adjustments_for_container = adjustments_for_container_f(stream_info) 137 | adjustments_for_codec = adjustments_for_codec_f(stream_info) 138 | priority = priority - adjustments_for_file_size - adjustments_for_container - adjustments_for_codec 139 | logger.debug('Encoding priority determined to be: ' + str(priority)) 140 | return priority 141 | 142 | 143 | def adjust_priority_based_on_filesize_f(file_size_kb): 144 | file_size_adjustment = 0 145 | # Wanting to prioritize larger files first 146 | file_size_gb = file_size_kb // (1024 * 1024) 147 | file_size_adjustment = min(file_size_gb, 4) 148 | logging.debug('Based on file size, priority increasing by: ' + str(file_size_adjustment)) 149 | return file_size_adjustment 150 | 151 | 152 | def adjustments_for_container_f(stream_info): 153 | container_adjustment = 0 154 | if stream_info['format'].get('format_name') != "matroska,webm": 155 | container_adjustment = 1 156 | logging.debug('Based on the files container, priority increasing by: ' + str(container_adjustment)) 157 | return container_adjustment 158 | 159 | 160 | def adjustments_for_codec_f(stream_info): 161 | codec_adjustment = 0 162 | if stream_info["streams"][0]["codec_name"] == "h264": 163 | codec_adjustment = 2 164 | logging.debug('Based on the files container, priority increasing by: ' + str(codec_adjustment)) 165 | return codec_adjustment 166 | 167 | 168 | def file_size_kb(file_path): 169 | # Returns the file size of the file_path on disk 170 | if os.path.isfile(file_path): 171 | file_size_bytes = os.path.getsize(file_path) 172 | file_size_kb = file_size_bytes / 1024 173 | return round(file_size_kb) 174 | else: 175 | return 0.0 176 | 177 | 178 | def ffprobe_function(file_path): 179 | # Subprocess call to ffprobe to retrieve video info in JSON format 180 | ffprobe_command = f'ffprobe -loglevel quiet -show_entries format:stream=index,stream,codec_type,codec_name,channel_layout,format=nb_streams -of json "{file_path}"' 181 | result = subprocess.run(ffprobe_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 182 | stream_info = json.loads(result.stdout) 183 | return stream_info 184 | 185 | 186 | def check_container_type(stream_info, encoding_decision, file): 187 | # Desired container is MKV so we check for that, and pass True for all other container types 188 | format_name = stream_info['format'].get('format_name') 189 | logger.debug ('format is: ' + format_name) 190 | if format_name != 'matroska,webm': 191 | encoding_decision = True 192 | encoding_decision, ffmepg_output_file = check_container_extension(file, encoding_decision) 193 | logger.debug ('>>>check_container_type<<< Container is: ' + format_name + ' so, encoding_decision is: ' + str(encoding_decision)) 194 | return encoding_decision, ffmepg_output_file 195 | 196 | 197 | def check_container_extension(file, encoding_decision): 198 | base, ext = os.path.splitext(file) 199 | if ext.lower() != '.mkv': 200 | # Change the extension to .mkv 201 | file = base + '.mkv' 202 | encoding_decision = True 203 | ffmepg_output_file = '/boil_hold/' + file 204 | return encoding_decision, ffmepg_output_file 205 | 206 | 207 | def check_codecs(stream_info, encoding_decision): 208 | # Loops through the streams in stream_info from requires_encoding, then 209 | # calls functions to determine if the steam needs encoding based on stream type conditions 210 | streams_count = stream_info['format']['nb_streams'] 211 | ffmpeg_command = str() 212 | logger.debug ('There are : ' + str(streams_count) + ' streams') 213 | for i in range (0,streams_count): 214 | codec_type = stream_info['streams'][i]['codec_type'] 215 | if codec_type == 'video': 216 | encoding_decision, ffmpeg_command = check_video_stream(encoding_decision, i, stream_info, ffmpeg_command) 217 | elif codec_type == 'audio': 218 | encoding_decision, ffmpeg_command = check_audio_stream(encoding_decision, i, stream_info, ffmpeg_command) 219 | elif codec_type == 'subtitle': 220 | encoding_decision, ffmpeg_command = check_subtitle_stream(encoding_decision, i, stream_info, ffmpeg_command) 221 | elif codec_type == 'attachment': 222 | encoding_decision, ffmpeg_command = check_attachmeent_stream(encoding_decision, i, stream_info, ffmpeg_command) 223 | return encoding_decision, ffmpeg_command 224 | 225 | 226 | def check_video_stream(encoding_decision, i, stream_info, ffmpeg_command): 227 | # Checks the video stream from check_codecs to determine if the stream needs encoding 228 | codec_name = stream_info['streams'][i]['codec_name'] 229 | desired_video_codec = 'av1' 230 | logger.debug('Steam ' + str(i) + ' codec is: ' + codec_name) 231 | if codec_name == desired_video_codec: 232 | ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v copy' 233 | elif codec_name == 'mjpeg': 234 | ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v copy' 235 | elif codec_name != desired_video_codec: 236 | encoding_decision = True 237 | svt_av1_string = "libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15" 238 | ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v ' + svt_av1_string 239 | else: 240 | logger.debug ('ignoring for now') 241 | return encoding_decision, ffmpeg_command 242 | 243 | 244 | def check_audio_stream(encoding_decision, i, stream_info, ffmpeg_command): 245 | # Checks the audio stream from check_codecs to determine if the stream needs encoding 246 | codec_name = stream_info['streams'][i]['codec_name'] 247 | # This will be populated at a later date 248 | #desired_audio_codec = 'aac' 249 | #if codec_name != desired_video_codec: 250 | # encoding_decision = True 251 | logger.debug('Steam ' + str(i) + ' codec is: ' + codec_name) 252 | ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:a copy' 253 | return encoding_decision, ffmpeg_command 254 | 255 | 256 | def check_subtitle_stream(encoding_decision, i, stream_info, ffmpeg_command): 257 | # Checks the subtitle stream from check_codecs to determine if the stream needs encoding 258 | codec_name = stream_info['streams'][i]['codec_name'] 259 | # This will be populated at a later date 260 | #desired_subtitle_codec = 'srt' 261 | #if codec_name != desired_subtitle_codec: 262 | # encoding_decision = True 263 | logger.debug('Steam ' + str(i) + ' codec is: ' + codec_name) 264 | ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:s copy' 265 | return encoding_decision, ffmpeg_command 266 | 267 | 268 | def check_attachmeent_stream(encoding_decision, i, stream_info, ffmpeg_command): 269 | # Checks the attachment stream from check_codecs to determine if the stream needs encoding 270 | # This will be populated at a later date 271 | #desired_attachment_codec = '???' 272 | #if codec_name != desired_attachment_codec: 273 | # encoding_decision = True 274 | # Note, attachments may not have a codec name if the attachment is an image 275 | ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:t copy' 276 | return encoding_decision, ffmpeg_command 277 | 278 | 279 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 280 | # This section starts the actual processing of media fules 281 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 282 | 283 | 284 | @app.task() 285 | def process_ffmpeg(file_located_data): 286 | file = file_located_data['file'] 287 | if ffmpeg_prelaunch_checks(file_located_data) == True: 288 | logger.debug(file + ' has passed ffmpeg_prelaunch_checks') 289 | if run_ffmpeg(file_located_data) == True: 290 | logger.debug(file + ' has passed run_ffmpeg') 291 | if ffmpeg_postlaunch_checks(file_located_data) == True: 292 | logger.debug(file + ' has passed ffmpeg_postlaunch_checks') 293 | if move_media(file_located_data) == True: 294 | logger.debug(file + ' has passed move_media') 295 | logger.info ('ffmpeg is done with: ' + file) 296 | file_path = file_located_data['file_path'] 297 | ffmepg_output_file_name = file_located_data['ffmepg_output_file_name'] 298 | file_located_data['new_file_size'] = get_file_size_kb(destination_file_name_function(file_path, ffmepg_output_file_name)) 299 | write_results(file_located_data) 300 | logger.debug(json.dumps(file_located_data, indent=4)) 301 | 302 | 303 | ################# Pre Launch Checks ################# 304 | 305 | def ffmpeg_prelaunch_checks(file_located_data): 306 | pre_launch_file_path = file_located_data['file_path'] 307 | pre_launch_old_file_size = file_located_data['old_file_size'] 308 | if prelaunch_file_exists(pre_launch_file_path): 309 | if prelaunch_hash_match(pre_launch_file_path, pre_launch_old_file_size): 310 | if prelaunch_file_validation(pre_launch_file_path): 311 | return True 312 | else: 313 | return False 314 | 315 | 316 | def prelaunch_file_exists(file_path): 317 | # Checks to see if the input file still exists, returns True on existance 318 | if file_exists(file_path): 319 | logger.debug(str(file_path) + ' Passed prelaunch_file_exists') 320 | return True 321 | else: 322 | logger.error(str(file_path) + ' Failed prelaunch_file_exists') 323 | return False 324 | 325 | 326 | def prelaunch_hash_match(file_path, pre_launch_old_file_size): 327 | current_file_hash = get_file_size_kb(file_path) 328 | if pre_launch_old_file_size == current_file_hash: 329 | logger.debug(str(file_path) + ' passed prelaunch_hash_match') 330 | return True 331 | else: 332 | logger.error (str(file_path) + ' failed prelaunch_hash_match') 333 | return False 334 | 335 | 336 | def prelaunch_file_validation(file_path): 337 | if validate_video(file_path): 338 | logger.debug(str(file_path) + ' passed prelaunch_file_validation') 339 | return True 340 | else: 341 | logger.error(str(file_path) + ' failed prelaunch_file_validation') 342 | return False 343 | 344 | 345 | ################# Run FFMPEG ################# 346 | 347 | def run_ffmpeg(file_located_data): 348 | # Command to run ffmpeg in subprocess 349 | ffmpeg_string_settings = 'ffmpeg -hide_banner -loglevel 16 -stats -stats_period 10 -y -i' 350 | ffmpeg_stringfile_path = file_located_data['file_path'] 351 | ffmpeg_stringffmpeg_command = file_located_data['ffmpeg_command'] 352 | ffmpeg_stringffmepg_output_file_name = file_located_data['ffmepg_output_file_name'] 353 | output_ffmpeg_command = f"{ffmpeg_string_settings} \"{ffmpeg_stringfile_path}\" {ffmpeg_stringffmpeg_command} \"{ffmpeg_stringffmepg_output_file_name}\"" 354 | logger.info ('ffmpeg_command is: ' + output_ffmpeg_command) 355 | logger.info ('running ffmpeg now') 356 | try: 357 | process = subprocess.Popen(output_ffmpeg_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,universal_newlines=True) 358 | for line in process.stdout: 359 | logger.debug(line) 360 | return True 361 | except Exception as e: 362 | logger.error(f"Error: {e}") 363 | return False # Return a non-zero exit code to indicate an error 364 | 365 | 366 | ################# Post Launch Checks ################# 367 | 368 | 369 | def ffmpeg_postlaunch_checks(file_located_data): 370 | post_launch_original_file = file_located_data['file_path'] 371 | post_launch_encoded_file = file_located_data['ffmepg_output_file_name'] 372 | if post_launch_file_check(post_launch_original_file, post_launch_encoded_file): 373 | if post_launch_file_validation(post_launch_encoded_file): 374 | logger.debug(str(post_launch_encoded_file) + 'failed ffmpeg_postlaunch_checks failed') 375 | return True 376 | else: 377 | logger.error(str(post_launch_encoded_file) + 'failed ffmpeg_postlaunch_checks failed') 378 | return False 379 | 380 | 381 | def post_launch_file_check(post_launch_original_file, post_launch_encoded_file): 382 | # Check to see if the original file, and the encoded file are there 383 | if file_exists(post_launch_original_file) and file_exists(post_launch_encoded_file): 384 | logger.debug(str(post_launch_encoded_file) + ' passed post_launch_file_check') 385 | return True 386 | else: 387 | logger.error(str(post_launch_encoded_file) + ' failed post_launch_file_check') 388 | return False 389 | 390 | 391 | def post_launch_file_validation(post_launch_encoded_file): 392 | logger.debug('Starting post_launch_file_validation') 393 | if validate_video(post_launch_encoded_file) == True: 394 | logger.debug(str(post_launch_encoded_file) + ' passed post_launch_file_validation') 395 | return True 396 | else: 397 | logger.error(str(post_launch_encoded_file) + ' failed post_launch_file_validation') 398 | return False 399 | 400 | 401 | ################# move_media ################# 402 | 403 | 404 | def move_media(file_located_data): 405 | file_path = file_located_data['file_path'] 406 | renamed_file = renamed_file_function(file_path) 407 | ffmepg_output_file_name = file_located_data['ffmepg_output_file_name'] 408 | destination_file_name = destination_file_name_function(file_path, ffmepg_output_file_name) 409 | if rename_original_file_function(file_path, renamed_file): 410 | if move_encoded_file_function(ffmepg_output_file_name, destination_file_name): 411 | if delete_renamed_original_file_function(renamed_file): 412 | return True 413 | else: 414 | return False 415 | 416 | 417 | def renamed_file_function(file_to_be_renamed): 418 | renamed_directory, renamed_filename = os.path.split(file_to_be_renamed) 419 | rename, reext = os.path.splitext(renamed_filename) 420 | new_filename = f"{rename}-copy{reext}" 421 | new_file_path = os.path.join(renamed_directory, new_filename) 422 | return new_file_path 423 | 424 | 425 | def destination_file_name_function(file_path, ffmepg_output_file_name): 426 | # Quick and silly function for creating the correct filepath to move the encoded file to 427 | destination_file_name = os.path.join(os.path.dirname(file_path), os.path.basename(ffmepg_output_file_name)) 428 | return destination_file_name 429 | 430 | 431 | def rename_original_file_function(file_path, renamed_file): 432 | # This is here incase any of the move opperations mess up 433 | try: 434 | os.rename(file_path, renamed_file) 435 | except Exception as e: 436 | logger.debug(f"An error occurred: {e}") 437 | if file_exists(renamed_file) == True: 438 | logger.debug(file_path + ' passed rename_original_file') 439 | return True 440 | else: 441 | logger.debug(file_path + ' filed rename_original_file') 442 | return False 443 | 444 | 445 | def move_encoded_file_function(ffmepg_output_file_name, destination_file_name): 446 | # Function to move the encoded file to the original file's directory 447 | try: 448 | shutil.move(ffmepg_output_file_name, destination_file_name) 449 | except Exception as e: 450 | logger.debug(f"An error occurred: {e}") 451 | if file_exists(destination_file_name) == True: 452 | logger.debug(destination_file_name + ' has passed move_encoded_file') 453 | return True 454 | else: 455 | logger.debug(destination_file_name + ' has failed move_encoded_file') 456 | return False 457 | 458 | 459 | def delete_renamed_original_file_function(renamed_file): 460 | # Function to delete the renamed original file 461 | try: 462 | os.remove(renamed_file) 463 | except Exception as e: 464 | logger.debug(f"An error occurred: {e}") 465 | if file_exists(renamed_file) == True: 466 | logger.debug(renamed_file + ' has failed delete_renamed_original_file_function') 467 | return False 468 | else: 469 | logger.debug(renamed_file + ' has passed delete_renamed_original_file_function') 470 | return True 471 | 472 | ########################## Common Functions ########################## 473 | 474 | def file_exists(filepath): 475 | file_existance = os.path.isfile(filepath) 476 | # Returns true if the file that is about to be touched is in the expected location 477 | logger.debug (filepath + ' : ' + str(file_existance)) 478 | return file_existance 479 | 480 | def get_file_size_kb(filepath_for_size_kb): 481 | logger.debug('filepath is: ' + str(filepath_for_size_kb)) 482 | file_size_bytes = os.path.getsize(filepath_for_size_kb) 483 | file_size_kb = round(file_size_bytes / 1024) 484 | return file_size_kb 485 | 486 | def validate_video(filepath): 487 | # This function determines if a video is valid, or if the video contains errors 488 | # Returns: 489 | # Failure if the shell command returns anything; i.e. one of the streams is bad 490 | # Success if the shell command doesn't return anything; i.e. the streams are good 491 | # Error if the shell command fails; this shouldn't happen 492 | try: 493 | command = 'ffmpeg -v error -i "' + filepath + '" -f null -' 494 | result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) 495 | if result.stdout or result.stderr: 496 | logger.debug ('File failed validation') 497 | return False 498 | else: 499 | logger.debug ('File passed validation') 500 | return True 501 | except Exception as e: 502 | return f"Error: {e}" 503 | 504 | 505 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 506 | # This section starts the discovery of the files by searching directories 507 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 508 | 509 | def write_results(file_located_data): 510 | unique_identifier = file_located_data['file'] + str(datetime.now().microsecond) 511 | file_name = file_located_data['file'] 512 | file_path = file_located_data['file_path'] 513 | config_name = 'placeholder' 514 | new_file_size = file_located_data['new_file_size'] 515 | old_file_size = file_located_data['old_file_size'] 516 | new_file_size_difference = old_file_size - new_file_size 517 | watch_folder = file_located_data['directory'] 518 | ffmpeg_encoding_string = file_located_data['ffmpeg_command'] 519 | 520 | if len(ffmpeg_encoding_string) > 999: 521 | # the varchar for ffmpeg_encoding_string is 999 characters. This is to keep the db write from failing at 1000 characters 522 | ffmpeg_encoding_string = ffmpeg_encoding_string[:999] 523 | 524 | logger.info('Writing results') 525 | insert_record(unique_identifier, file_name, file_path, config_name, new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string) 526 | logger.info('Writing results complete') 527 | 528 | 529 | def insert_record(unique_identifier, file_name, file_path, config_name, new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string): 530 | try: 531 | # Connection details 532 | connection = mysql.connector.connect( 533 | host='192.168.1.110', 534 | port=32053, # replace with your non-default port 535 | database='boilest', 536 | user='boilest', 537 | password='boilest' 538 | ) 539 | 540 | if connection.is_connected(): 541 | cursor = connection.cursor() 542 | recorded_date = datetime.now() # Current date and time 543 | 544 | insert_query = """ 545 | INSERT INTO ffmpeghistory ( 546 | unique_identifier, recorded_date, file_name, file_path, config_name, 547 | new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string 548 | ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 549 | """ 550 | 551 | record = ( 552 | unique_identifier, recorded_date, file_name, file_path, config_name, 553 | new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string 554 | ) 555 | 556 | cursor.execute(insert_query, record) 557 | connection.commit() 558 | logger.debug("Record inserted successfully") 559 | 560 | except Error as e: 561 | logger.error(f"Error while connecting to MariaDB: {e}") 562 | 563 | finally: 564 | if connection.is_connected(): 565 | cursor.close() 566 | connection.close() 567 | logger.debug("MariaDB connection is closed") 568 | 569 | --------------------------------------------------------------------------------