├── start.py
├── .github
    └── workflows
    │   └── publish_image.yml
├── boilest kubernetes.yml
├── dockerfile
├── .gitignore
├── README.md
└── tasks.py


/start.py:
--------------------------------------------------------------------------------
1 | from tasks import locate_files
2 | locate_files('farts')
3 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_image.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Docker Image to GHCR
 2 | 
 3 | on:
 4 |   push
 5 | 
 6 | jobs:
 7 |   build_and_push:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v3
11 |       - name: Build and push the image
12 |         run: |
13 |           docker login --username goingoffroading --password ${{ secrets.GH_PAT }} ghcr.io
14 |           docker build . --tag ghcr.io/goingoffroading/boilest-worker:latest
15 |           docker push ghcr.io/goingoffroading/boilest-worker:latest


--------------------------------------------------------------------------------
/boilest kubernetes.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Kubernetes deployment example using a Daemonset and labeled nodes.
 3 | # Label a node 'boilest':'worker' and Boilest will automatically deploy to it
 4 | # Respository here is Azure Container Registry (ARC) so 
 5 | kind: Daemonset
 6 | apiVersion: apps/v1
 7 | metadata:
 8 |   name: boilest
 9 |   labels:
10 |     app: boilest
11 |     boil: worker
12 | spec:
13 |   replicas: 1
14 |   strategy:
15 |     type: Recreate
16 |   selector:
17 |     matchLabels:
18 |       app: boilest
19 |   template:
20 |     metadata:
21 |       labels:
22 |         app: boilest
23 |     spec:
24 |       containers:
25 |         - name: boilest
26 |           image: ghcr.io/goingoffroading/boilest-worker:latest
27 |           imagePullPolicy: Always
28 |           volumeMounts:
29 |             - name: boilestmedia
30 |               mountPath: "/boil_watch"
31 |       nodeSelector:
32 |         boilest: worker
33 |       nodeName: node101-desktop
34 |       volumes:
35 |         - name: boilestmedia
36 |           nfs:
37 |             server: SERVER_IP_HERE
38 |             path: "/SERVER/PATH/HERE"


--------------------------------------------------------------------------------
/dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Python image based on Alpine
 2 | FROM python:3.9-alpine
 3 | 
 4 | # Install dependencies and supervisor
 5 | RUN apk update && \
 6 |     apk add --no-cache \
 7 |         build-base \
 8 |         linux-headers \
 9 |         supervisor \
10 |         ffmpeg && \
11 |     pip install --no-cache-dir celery requests mysql-connector-python && \
12 |     apk upgrade
13 | 
14 | # Create a non-root user and group
15 | ARG UID=1000
16 | ARG GID=1000
17 | RUN addgroup -g $GID appgroup && \
18 |     adduser -D -u $UID -G appgroup appuser
19 | 
20 | # Create additional directories without setting ownership
21 | RUN mkdir -p /tv /anime /moviles /boil_hold
22 | 
23 | # Create application directory and set ownership
24 | WORKDIR /app
25 | COPY . /app
26 | RUN chown -R appuser:appgroup /app /boil_hold
27 | 
28 | # Create log directory and set ownership
29 | RUN mkdir -p /app/logs && \
30 |     chown -R appuser:appgroup /app/logs
31 | 
32 | # Environment variables
33 | ENV TZ=US/Pacific
34 | 
35 | # Used in celery and rabbitmq
36 | ENV user celery
37 | ENV password celery
38 | ENV celery_host 192.168.1.110
39 | ENV celery_port 31672
40 | ENV celery_vhost celery
41 | ENV rabbitmq_host 192.168.1.110
42 | ENV rabbitmq_port 32311
43 | 
44 | # Used in celery and rabbitmq
45 | ENV sql_host 192.168.1.110
46 | ENV sql_port 32053
47 | ENV sql_database boilest
48 | ENV sql_user boilest
49 | ENV sql_pswd boilest
50 | 
51 | # Run as non-root user
52 | USER appuser
53 | 
54 | # Start supervisord
55 | CMD ["celery", "-A", "tasks", "worker"]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Abstraction
  2 | 
  3 | Boilest is my solution to:
  4 | 
  5 | - Having video media in lots of different formats, but wanting to consolidate it into one format
  6 | - Wanting my video content to consume less space in my NAS
  7 | - Wanting to do this work at scale
  8 | 
  9 | ---
 10 | # Why 'Boilest'?
 11 | 
 12 | Because I am terrible at naming things, and it was the first agreeable thing to come out of a random name generator.
 13 | 
 14 | ---
 15 | # What about Tdarr, Unmanic, or other existing distributed solutions??
 16 | 
 17 | [Tdarr](https://home.tdarr.io/) is a great platform, but didn't setup or scale as well as I would have liked.  I also found it comfortably to under documented, closed source, had some design oddities, and hid features behind a paywall.
 18 | 
 19 | As frenzied as Tdarr fans are on Reddit, I just can't commit/subscribe to a service like that.
 20 | 
 21 | [Unmanic](https://github.com/Unmanic/unmanic/tree/master) is magic...  I am a big fan, and Unmanic is comfortably the inspiration of this project.
 22 | 
 23 | I would be using Unmanic today, instead of writing spaghetti code, but Josh5 had previously [hardcoded the platform on an older version of FFmpeg](https://github.com/Unmanic/unmanic/blob/master/docker/Dockerfile#L82), doesn't currently support AV1, [has some complexities to build the container](https://github.com/Unmanic/unmanic/blob/master/docker/README.md) that make it difficult to code my own support, and doesn't seem to be keeping up on the repo or accepting PRs.
 24 | 
 25 | ---
 26 | # Why not Handbrake?
 27 | 
 28 | Handbrake is awesome, but:
 29 | 
 30 | - It's not distributed/doesn't scale past the node with the GUI open
 31 | - It's 'watch folder' functionality doesn't do any file checking, sorting, or filtering to decide if it should actually process a file
 32 | - Does not have the functionality for monitoring an existing media collection
 33 | 
 34 | ---
 35 | # How does Boilest work?
 36 | 
 37 | - Boilest kicks off a job that searches directories for video files
 38 | - Boilest then checks each individual video file to see if the various codecs match a spec.  In this step, Boilest will also prioritize files that have the highest ROI for encoding (large media, x264, mp4, etc) first as to not waste time with diminishing returns (changing small, x265, mkv files) up front. If any of the codecs don't match spec, the file is dispatched for encoding.
 39 | - If it is determined from the above step that encoding is required, the file undergoes a series of validations.  Assuming the file passes those validations, the file is encoded.  The output encoded file is then also validated.  If the output encoded file passes validations, it replaces the original file.
 40 | - Once encoding is complete, the results are stored in a DB for stats.
 41 | 
 42 | ---
 43 |  # What will Boilest change?
 44 | 
 45 |  In any given media file:
 46 | 
 47 |  | Area | Target Change |
 48 |  |------|---------------|
 49 |  | Container | Media containers that are not MKR (like MP4) are changed to MKV
 50 |  | Video | Video streams that are not AV1 are encoded to AV1
 51 |  | Audio | No changes to audio streams at this time.  Audio streams are copied.
 52 |  | Subtitles | No changes to subtitle streams at this time.  subtitle streams are copied.
 53 |  | Attachments | No changes to Attachments at this time.  Attachments are copied.
 54 | 
 55 |  Once I make some final decisions around what is optimal for TV/device streaming, there will become targets to audio, subtitles, and attachments.
 56 | 
 57 | ---
 58 | # Prerequisites  
 59 | 
 60 | ---
 61 | ## RabbitMQ
 62 | 
 63 | The backbone of Boilest is a distributed task Python library called [Celery](https://docs.celeryq.dev/en/stable/getting-started/introduction.html). Celery needs a message transport (a place to store the task queue), and we leverage RabbitMQ for that.
 64 | 
 65 | RabbitMQ will need to be deployed with it's management plugin.
 66 | 
 67 | From the management plugin:
 68 | 
 69 | - Create a 'celery' vhost
 70 | - Create a user with the user/pwd of celery/celery
 71 | - Give the celery .* configure, write, read permissions in the celery vhost
 72 | 
 73 | ---
 74 | ## MariaDB
 75 | 
 76 | Technically, the workflow works fine (at this time) without access to MariaDB (mysql).  MariaDB is where the results of the encoding are tracked.  If Maria is not deployed, the final task will fail, and this will only be noticeable in the logs.
 77 | 
 78 | In Maria, create a database called 'boilest'.
 79 | 
 80 | In the 'boilest' database, create a table called 'ffmpeghistory' with the following columns:
 81 | 
 82 | | Column Name              | Type                            |
 83 | |--------------------------|---------------------------------|
 84 | | unique_identifier        | varchar(100)                    |
 85 | | recorded_date            | datetime                        |
 86 | | file_name                | varchar(100)                    |
 87 | | file_path                | varchar(100)                    |
 88 | | config_name              | varchar(100)                    |
 89 | | new_file_size            | int(11)                         |
 90 | | new_file_size_difference | int(11)                         |
 91 | | old_file_size            | int(11)                         |
 92 | | watch_folder             | varchar(100)                    |
 93 | | ffmpeg_encoding_string   | varchar(1000)                   |
 94 | 
 95 | In a future iteration, I'll include a python script that populates database and table into Maria automatically.
 96 | 
 97 | ---
 98 | # How to deploy
 99 | 
100 | - Create your deployment (Docker/Kubernetes/etc) with the ghcr.io/goingoffroading/boilest-worker:latest container image.
101 | - Change the container variables to reflect your environment:
102 | 
103 | | ENV                             | Default Value           | Notes                                               |
104 | |---------------------------------|-------------------------|-----------------------------------------------------|
105 | | celery_user                     | celery                  | The user setup for Celery in your RabbitMQ          |
106 | | celery_password                 | celery                  | The password setup for Celery in your RabbitMQ      |
107 | | celery_host                     | 192.168.1.110           | The IP address of RabbitMQ                          |
108 | | celery_port                     | 31672                   | The port RabbitMQ's port 5672 or 5673 are mapped to |
109 | | celery_vhost                    | celery                  | The RabbitMQ vhost setup for Boilest                |
110 | | rabbitmq_host                   | 192.168.1.110           | The IP address of RabbitMQ management UI            |
111 | | rabbitmq_port                   | 32311                   | The port of RabbitMQ management UI                  |
112 | | sql_host                        | 192.168.1.110           | The IP address of MariaDB                           |
113 | | sql_port                        | 32053                   | The port mapped to MariaDB's port 3306              |
114 | | sql_database                    | boilest                 | The database name setup for Boilest                 |
115 | | sql_user                        | boilest                 | The username setup for Boilest                      |
116 | | sql_pswd                        | boilest                 | The password setup for Boilest                      |
117 | 
118 | - Deploy the container.
119 | - SSH into any one of the containers and run 'python start.sh'.  This will kick off all of the workflows.
120 | 
121 | Done.
122 | 
123 | - See 'boilest_kubernetes.yml' for an example of a Kubernetes deployment
124 | 
125 | ---
126 | # How to start the Boilest/video encoding workflow
127 | Either:
128 | - Deploy the [Boilest Management GUI](https://github.com/GoingOffRoading/Boilest_Manager_GUI) container and either wait for the cron, or SSH into the container and start start.py
129 | - SSH into one of the Boilest-Worker containers and run start.py:
130 | 
131 | In both SSH cases, literally run
132 | ```
133 | python start.py  
134 | ```
135 | SSH in Kuberentes is:
136 | 
137 | ```
138 | kubectl exec -it (your pod name) -- /bin/sh
139 | ```
140 | 
141 | SSH in Docker is:
142 | 
143 | ```
144 | docker exec -it (container ID) /bin/sh
145 | ```
146 | Starting the workflow only needs to be done once from any one of the relevant containers.  That start will trickle into the other containers via the RabbitMQ broker.
147 | 
148 | ---
149 | # Q&A
150 | 
151 |   * If Celery can use Redis or RabbitMQ for it's message transport, can Boilest use Redis?
152 | 
153 |     Not in Boilest's current state, and probably never.  Redis doesn't 'support' prioritization of messages technically at all or as well as rabbit does.  Boilest currently uses RabbitMQ's prioritization of messages to encode the video files with the highest ROI for encoding time.
154 | 
155 | 
156 | ---
157 | # Todo List
158 | 
159 | - [x] Setup a set_priority function for ffprobe based on container, file size, and video codec (I.E. the things that have the greatest impact on ROI)
160 | - [x] Setup the function to write the results to the DB
161 | - [x] Replace the prints with logging
162 | - [ ] Made decisions on audio codec
163 | - [ ] Make decisions on subtitle codec
164 | - [ ] Research ffprobe flags for HDR content
165 | - [x] Figure out how to pass the watch folder forward for the SQL write
166 | - [x] Figure out how to pass the ffmpeg string forward for the SQL write
167 | - [x] Stand up repo for management UI
168 | - [ ] Make tweaks to the prioritization scoring
169 | - [ ] Create a 'create database, table' script
170 | - [ ] Having write_results be it's own task is stupid.  Incorporate it into process_ffmpeg.
171 | - [ ] Tasks.py is stupidly big.  Break it up into different files for readability/management.
172 | - [ ] Revisit string formatting i.e. f"Name: {name}, Age: {age}" instead of  name + ", Age:" + str(age)
173 | - [ ] Explore using the Pydantic Model 
174 | - [ ] Remove hard-coding related 
175 | - [ ] Move UniqueID in the SQL to a GUID
176 | - [ ] Explore using pathlib instead of OS
177 | - [x] Remove the archive
178 | - [ ] Some day...  Remove the celery task function for write_results
179 | - [x] Consider moving queue_workers_if_queue_empty to the manager container


--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
  1 | from celery import Celery
  2 | import json, os, logging, subprocess, shutil, mysql.connector
  3 | from mysql.connector import Error
  4 | from datetime import datetime
  5 | 
  6 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  
  7 | # >>>>>>>>>>>>>>> Celery Configurations >>>>>>>>>>>>>>>
  8 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  
  9 | 
 10 | # create logger
 11 | logger = logging.getLogger('boilest_logs')
 12 | logger.setLevel(logging.INFO)
 13 | 
 14 | # create console handler and set level to debug
 15 | ch = logging.StreamHandler()
 16 | ch.setLevel(logging.DEBUG)
 17 | 
 18 | # create formatter
 19 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 20 | 
 21 | # add formatter to ch
 22 | ch.setFormatter(formatter)
 23 | 
 24 | # add ch to logger
 25 | logger.addHandler(ch)
 26 | 
 27 | 
 28 | def celery_url_path(thing):
 29 |     # https://docs.celeryq.dev/en/stable/getting-started/first-steps-with-celery.html#keeping-results
 30 |     celery_user = os.environ.get('user', 'celery')
 31 |     celery_password = os.environ.get('password', 'celery')
 32 |     celery_host = os.environ.get('celery_host', '192.168.1.110')
 33 |     celery_port = os.environ.get('celery_port', '31672')
 34 |     celery_vhost = os.environ.get('celery_vhost', 'celery')
 35 |     thing = thing + celery_user + ':' + celery_password + '@' + celery_host + ':' + celery_port + '/' + celery_vhost
 36 |     logger.debug('celery_url_path is: ' + thing)
 37 |     return thing
 38 | 
 39 | app = Celery('worker_queue', broker = celery_url_path('amqp://') )
 40 | 
 41 | 
 42 | app.conf.task_default_queue = 'worker_queue'
 43 | app.conf.worker_concurrency = 1
 44 | app.conf.worker_prefetch_multiplier=1
 45 | 
 46 | app.conf.task_queues = {
 47 |     'worker_queue': {
 48 |         'exchange': 'tasks',
 49 |         'exchange_type': 'direct',
 50 |         'routing_key': 'worker_queue',
 51 |         'queue_arguments': {'x-max-priority': 10},
 52 |     }
 53 | }
 54 | 
 55 | app.conf.task_routes = {
 56 |     'locate_files': {'queue': 'worker_queue'},
 57 |     'requires_encoding': {'queue': 'worker_queue'},
 58 |     'process_ffmpeg': {'queue': 'worker_queue'}
 59 | }
 60 | 
 61 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 
 62 | # This section starts the discovery of the files by searching directories
 63 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<<
 64 | 
 65 | @app.task
 66 | def locate_files(arg):
 67 |     directories = ['/anime', '/tv', '/movies']
 68 |     extensions = ['.mp4', '.mkv', '.avi']
 69 | 
 70 |     logger.info(f'Searching directories: {directories}')
 71 |     logger.info(f'File extensions: {extensions}')
 72 | 
 73 |     for file_located in find_files(directories, extensions):
 74 |         logger.debug('File located, sending to ffprobe function')
 75 |         try:
 76 |             file_located_data = json.loads(file_located)
 77 |             logger.debug(json.dumps(file_located_data, indent=3, sort_keys=True))
 78 |             # >>>>>>>>>>><<<<<<<<<<<<<<<<
 79 |             # >>>>>>>>>>><<<<<<<<<<<<<<<<
 80 |             requires_encoding.apply_async(kwargs={'file_located_data': file_located_data}, priority=1)
 81 |             # >>>>>>>>>>><<<<<<<<<<<<<<<<
 82 |             # >>>>>>>>>>><<<<<<<<<<<<<<<<
 83 |         except json.JSONDecodeError as e:
 84 |             logger.error(f'Failed to decode JSON: {e}')
 85 |             continue
 86 | 
 87 | def find_files(directories, extensions):
 88 |     for directory in directories:
 89 |         logger.info ('Scanning: ' + directory)
 90 |         for root, dirs, files in os.walk(directory):
 91 |             for file in files:
 92 |                 for ext in extensions:
 93 |                     if file.lower().endswith(ext.lower()):
 94 |                         file_path = os.path.join(root, file)
 95 |                         result_dict = {
 96 |                             'directory': directory,
 97 |                             'root': root,
 98 |                             'file': file,
 99 |                             'file_path': file_path,
100 |                             'extension': ext
101 |                         }
102 |                         yield json.dumps(result_dict)
103 | 
104 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 
105 | # This section starts the discovery of the file meta data, and determing what processing may need to occure
106 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<<
107 | 
108 | @app.task
109 | def requires_encoding(file_located_data):
110 |     stream_info = ffprobe_function(file_located_data['file_path'])
111 |     encoding_decision = False
112 |     old_file_size = file_size_kb(file_located_data['file_path'])
113 |     file_name = file_located_data['file']
114 |     logger.info(file_name + ' started requires_encoding')
115 |     processing_priority = get_ffmpeg_processing_priority(old_file_size,stream_info)
116 |     logger.debug('processing_priority is: ' + str(processing_priority))
117 |     encoding_decision, ffmepg_output_file_name = check_container_type(stream_info, encoding_decision, file_located_data['file'])
118 |     encoding_decision, ffmpeg_command = check_codecs(stream_info, encoding_decision)
119 |     if encoding_decision == True:
120 |         logger.info (file_located_data['file'] + ' requires encoding')
121 |         file_located_data['ffmpeg_command'] = ffmpeg_command
122 |         file_located_data['ffmepg_output_file_name'] = ffmepg_output_file_name
123 |         file_located_data['old_file_size'] = file_size_kb(file_located_data['file_path'])
124 |         logger.debug(json.dumps(file_located_data, indent=4))
125 |         process_ffmpeg.apply_async(kwargs={'file_located_data': file_located_data}, priority=processing_priority)
126 |     else:
127 |         logger.debug ('file does not need encoding')
128 |     logger.debug (encoding_decision)
129 |     logger.debug (ffmpeg_command)
130 |     logger.info(file_name + ' ended requires_encoding')
131 | 
132 | 
133 | def get_ffmpeg_processing_priority(old_file_size,stream_info):
134 |     priority = 10
135 |     adjustments_for_file_size = adjust_priority_based_on_filesize_f(old_file_size)
136 |     adjustments_for_container = adjustments_for_container_f(stream_info)
137 |     adjustments_for_codec = adjustments_for_codec_f(stream_info)
138 |     priority = priority - adjustments_for_file_size - adjustments_for_container - adjustments_for_codec
139 |     logger.debug('Encoding priority determined to be: ' + str(priority))
140 |     return priority
141 | 
142 | 
143 | def adjust_priority_based_on_filesize_f(file_size_kb):
144 |     file_size_adjustment = 0
145 |     # Wanting to prioritize larger files first
146 |     file_size_gb = file_size_kb // (1024 * 1024)
147 |     file_size_adjustment = min(file_size_gb, 4)
148 |     logging.debug('Based on file size, priority increasing by: ' + str(file_size_adjustment))
149 |     return file_size_adjustment
150 | 
151 | 
152 | def adjustments_for_container_f(stream_info):
153 |     container_adjustment = 0
154 |     if stream_info['format'].get('format_name') != "matroska,webm":
155 |         container_adjustment = 1
156 |         logging.debug('Based on the files container, priority increasing by: ' + str(container_adjustment))
157 |     return container_adjustment
158 | 
159 | 
160 | def adjustments_for_codec_f(stream_info):
161 |     codec_adjustment = 0
162 |     if stream_info["streams"][0]["codec_name"] == "h264":
163 |         codec_adjustment = 2
164 |         logging.debug('Based on the files container, priority increasing by: ' + str(codec_adjustment))
165 |     return codec_adjustment
166 | 
167 | 
168 | def file_size_kb(file_path):
169 |     # Returns the file size of the file_path on disk
170 |     if os.path.isfile(file_path):
171 |         file_size_bytes = os.path.getsize(file_path)
172 |         file_size_kb = file_size_bytes / 1024
173 |         return round(file_size_kb)
174 |     else:
175 |         return 0.0
176 | 
177 | 
178 | def ffprobe_function(file_path):
179 |     # Subprocess call to ffprobe to retrieve video info in JSON format
180 |     ffprobe_command = f'ffprobe -loglevel quiet -show_entries format:stream=index,stream,codec_type,codec_name,channel_layout,format=nb_streams -of json "{file_path}"'
181 |     result = subprocess.run(ffprobe_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
182 |     stream_info = json.loads(result.stdout)
183 |     return stream_info
184 | 
185 | 
186 | def check_container_type(stream_info, encoding_decision, file):
187 |     # Desired container is MKV so we check for that, and pass True for all other container types
188 |     format_name = stream_info['format'].get('format_name')
189 |     logger.debug ('format is: ' + format_name)
190 |     if format_name != 'matroska,webm':
191 |         encoding_decision = True
192 |     encoding_decision, ffmepg_output_file = check_container_extension(file, encoding_decision)
193 |     logger.debug ('>>>check_container_type<<<  Container is: ' + format_name + ' so, encoding_decision is: ' + str(encoding_decision))
194 |     return encoding_decision, ffmepg_output_file
195 |     
196 | 
197 | def check_container_extension(file, encoding_decision):
198 |     base, ext = os.path.splitext(file)
199 |     if ext.lower() != '.mkv':
200 |         # Change the extension to .mkv
201 |         file = base + '.mkv'
202 |         encoding_decision = True
203 |     ffmepg_output_file = '/boil_hold/' + file
204 |     return encoding_decision, ffmepg_output_file
205 | 
206 | 
207 | def check_codecs(stream_info, encoding_decision):
208 |     # Loops through the streams in stream_info from requires_encoding, then
209 |     # calls functions to determine if the steam needs encoding based on stream type conditions 
210 |     streams_count = stream_info['format']['nb_streams']
211 |     ffmpeg_command = str()
212 |     logger.debug ('There are : ' + str(streams_count) + ' streams')
213 |     for i in range (0,streams_count):
214 |         codec_type = stream_info['streams'][i]['codec_type'] 
215 |         if codec_type == 'video':
216 |             encoding_decision, ffmpeg_command = check_video_stream(encoding_decision, i, stream_info, ffmpeg_command)
217 |         elif codec_type == 'audio':
218 |             encoding_decision, ffmpeg_command = check_audio_stream(encoding_decision, i, stream_info, ffmpeg_command)
219 |         elif codec_type == 'subtitle':
220 |             encoding_decision, ffmpeg_command = check_subtitle_stream(encoding_decision, i, stream_info, ffmpeg_command)
221 |         elif codec_type == 'attachment':
222 |             encoding_decision, ffmpeg_command = check_attachmeent_stream(encoding_decision, i, stream_info, ffmpeg_command)        
223 |     return encoding_decision, ffmpeg_command
224 | 
225 | 
226 | def check_video_stream(encoding_decision, i, stream_info, ffmpeg_command):
227 |     # Checks the video stream from check_codecs to determine if the stream needs encoding
228 |     codec_name = stream_info['streams'][i]['codec_name'] 
229 |     desired_video_codec = 'av1'
230 |     logger.debug('Steam ' + str(i) + ' codec is: ' + codec_name)
231 |     if codec_name == desired_video_codec:
232 |         ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v copy'
233 |     elif codec_name == 'mjpeg':
234 |         ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v copy'
235 |     elif codec_name != desired_video_codec: 
236 |         encoding_decision = True
237 |         svt_av1_string = "libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15"
238 |         ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v ' + svt_av1_string
239 |     else:
240 |         logger.debug ('ignoring for now')
241 |     return encoding_decision, ffmpeg_command
242 | 
243 | 
244 | def check_audio_stream(encoding_decision, i, stream_info, ffmpeg_command):
245 |     # Checks the audio stream from check_codecs to determine if the stream needs encoding
246 |     codec_name = stream_info['streams'][i]['codec_name'] 
247 |     # This will be populated at a later date
248 |     #desired_audio_codec = 'aac'
249 |     #if codec_name != desired_video_codec:
250 |     #    encoding_decision = True
251 |     logger.debug('Steam ' + str(i) + ' codec is: ' + codec_name)
252 |     ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:a copy'
253 |     return encoding_decision, ffmpeg_command
254 |     
255 | 
256 | def check_subtitle_stream(encoding_decision, i, stream_info, ffmpeg_command):
257 |     # Checks the subtitle stream from check_codecs to determine if the stream needs encoding
258 |     codec_name = stream_info['streams'][i]['codec_name'] 
259 |     # This will be populated at a later date
260 |     #desired_subtitle_codec = 'srt'
261 |     #if codec_name != desired_subtitle_codec:
262 |     #    encoding_decision = True
263 |     logger.debug('Steam ' + str(i) + ' codec is: ' + codec_name)
264 |     ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:s copy'
265 |     return encoding_decision, ffmpeg_command
266 | 
267 | 
268 | def check_attachmeent_stream(encoding_decision, i, stream_info, ffmpeg_command):
269 |     # Checks the attachment stream from check_codecs to determine if the stream needs encoding
270 |     # This will be populated at a later date
271 |     #desired_attachment_codec = '???'
272 |     #if codec_name != desired_attachment_codec:
273 |     #    encoding_decision = True
274 |     # Note, attachments may not have a codec name if the attachment is an image
275 |     ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:t copy'
276 |     return encoding_decision, ffmpeg_command
277 | 
278 | 
279 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 
280 | # This section starts the actual processing of media fules
281 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<<
282 |  
283 | 
284 | @app.task()
285 | def process_ffmpeg(file_located_data):
286 |     file = file_located_data['file']
287 |     if ffmpeg_prelaunch_checks(file_located_data) == True:
288 |         logger.debug(file + ' has passed ffmpeg_prelaunch_checks')
289 |         if run_ffmpeg(file_located_data) == True:
290 |             logger.debug(file + ' has passed run_ffmpeg')
291 |             if ffmpeg_postlaunch_checks(file_located_data) == True:
292 |                 logger.debug(file + ' has passed ffmpeg_postlaunch_checks')
293 |                 if move_media(file_located_data) == True:
294 |                     logger.debug(file + ' has passed move_media')
295 |                     logger.info ('ffmpeg is done with: ' + file)
296 |                     file_path = file_located_data['file_path']
297 |                     ffmepg_output_file_name = file_located_data['ffmepg_output_file_name'] 
298 |                     file_located_data['new_file_size'] = get_file_size_kb(destination_file_name_function(file_path, ffmepg_output_file_name))
299 |                     write_results(file_located_data)
300 |                     logger.debug(json.dumps(file_located_data, indent=4))
301 | 
302 | 
303 | ################# Pre Launch Checks #################
304 | 
305 | def ffmpeg_prelaunch_checks(file_located_data):
306 |     pre_launch_file_path = file_located_data['file_path']
307 |     pre_launch_old_file_size = file_located_data['old_file_size']
308 |     if prelaunch_file_exists(pre_launch_file_path):
309 |         if prelaunch_hash_match(pre_launch_file_path, pre_launch_old_file_size):
310 |             if prelaunch_file_validation(pre_launch_file_path):
311 |                 return True
312 |     else:
313 |         return False
314 | 
315 | 
316 | def prelaunch_file_exists(file_path):
317 |     #  Checks to see if the input file still exists, returns True on existance
318 |     if file_exists(file_path):
319 |         logger.debug(str(file_path) + ' Passed prelaunch_file_exists')
320 |         return True
321 |     else:
322 |         logger.error(str(file_path) + ' Failed prelaunch_file_exists')
323 |         return False
324 | 
325 | 
326 | def prelaunch_hash_match(file_path, pre_launch_old_file_size):
327 |     current_file_hash = get_file_size_kb(file_path)
328 |     if pre_launch_old_file_size == current_file_hash:
329 |         logger.debug(str(file_path) + ' passed prelaunch_hash_match')
330 |         return True
331 |     else:
332 |         logger.error (str(file_path) + ' failed prelaunch_hash_match')
333 |         return False
334 | 
335 | 
336 | def prelaunch_file_validation(file_path):
337 |     if validate_video(file_path):
338 |         logger.debug(str(file_path) + ' passed prelaunch_file_validation')
339 |         return True
340 |     else:
341 |         logger.error(str(file_path) + ' failed prelaunch_file_validation')
342 |         return False    
343 | 
344 | 
345 | ################# Run FFMPEG  #################
346 | 
347 | def run_ffmpeg(file_located_data):
348 |     # Command to run ffmpeg in subprocess
349 |     ffmpeg_string_settings = 'ffmpeg -hide_banner -loglevel 16 -stats -stats_period 10 -y -i'
350 |     ffmpeg_stringfile_path = file_located_data['file_path']
351 |     ffmpeg_stringffmpeg_command = file_located_data['ffmpeg_command']
352 |     ffmpeg_stringffmepg_output_file_name = file_located_data['ffmepg_output_file_name']
353 |     output_ffmpeg_command = f"{ffmpeg_string_settings} \"{ffmpeg_stringfile_path}\" {ffmpeg_stringffmpeg_command} \"{ffmpeg_stringffmepg_output_file_name}\""
354 |     logger.info ('ffmpeg_command is: ' + output_ffmpeg_command)
355 |     logger.info ('running ffmpeg now')
356 |     try:
357 |         process = subprocess.Popen(output_ffmpeg_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,universal_newlines=True)
358 |         for line in process.stdout:
359 |             logger.debug(line)
360 |         return True
361 |     except Exception as e:
362 |         logger.error(f"Error: {e}")
363 |         return False  # Return a non-zero exit code to indicate an error
364 | 
365 | 
366 | ################# Post Launch Checks #################
367 | 
368 | 
369 | def ffmpeg_postlaunch_checks(file_located_data):  
370 |     post_launch_original_file = file_located_data['file_path']
371 |     post_launch_encoded_file = file_located_data['ffmepg_output_file_name']
372 |     if post_launch_file_check(post_launch_original_file, post_launch_encoded_file):
373 |         if post_launch_file_validation(post_launch_encoded_file):
374 |             logger.debug(str(post_launch_encoded_file) + 'failed ffmpeg_postlaunch_checks failed')
375 |             return True
376 |     else:
377 |         logger.error(str(post_launch_encoded_file) + 'failed ffmpeg_postlaunch_checks failed')
378 |         return False
379 | 
380 | 
381 | def post_launch_file_check(post_launch_original_file, post_launch_encoded_file):
382 |     # Check to see if the original file, and the encoded file are there
383 |     if file_exists(post_launch_original_file) and file_exists(post_launch_encoded_file):
384 |         logger.debug(str(post_launch_encoded_file) + ' passed post_launch_file_check')
385 |         return True
386 |     else:
387 |         logger.error(str(post_launch_encoded_file) + ' failed post_launch_file_check')
388 |         return False
389 |     
390 | 
391 | def post_launch_file_validation(post_launch_encoded_file):
392 |     logger.debug('Starting post_launch_file_validation')
393 |     if validate_video(post_launch_encoded_file) == True:
394 |         logger.debug(str(post_launch_encoded_file) + ' passed post_launch_file_validation')
395 |         return True
396 |     else:
397 |         logger.error(str(post_launch_encoded_file) + ' failed post_launch_file_validation')
398 |         return False
399 | 
400 | 
401 | ################# move_media #################
402 | 
403 | 
404 | def move_media(file_located_data):
405 |     file_path = file_located_data['file_path']
406 |     renamed_file = renamed_file_function(file_path)
407 |     ffmepg_output_file_name = file_located_data['ffmepg_output_file_name']
408 |     destination_file_name = destination_file_name_function(file_path, ffmepg_output_file_name)
409 |     if rename_original_file_function(file_path, renamed_file):
410 |         if move_encoded_file_function(ffmepg_output_file_name, destination_file_name):
411 |             if delete_renamed_original_file_function(renamed_file):
412 |                 return True
413 |     else:
414 |         return False
415 | 
416 | 
417 | def renamed_file_function(file_to_be_renamed):
418 |     renamed_directory, renamed_filename = os.path.split(file_to_be_renamed)
419 |     rename, reext = os.path.splitext(renamed_filename)
420 |     new_filename = f"{rename}-copy{reext}"
421 |     new_file_path = os.path.join(renamed_directory, new_filename)
422 |     return new_file_path
423 | 
424 | 
425 | def destination_file_name_function(file_path, ffmepg_output_file_name):
426 |     # Quick and silly function for creating the correct filepath to move the encoded file to
427 |     destination_file_name = os.path.join(os.path.dirname(file_path), os.path.basename(ffmepg_output_file_name))
428 |     return destination_file_name
429 | 
430 | 
431 | def rename_original_file_function(file_path, renamed_file):
432 |     # This is here incase any of the move opperations mess up
433 |     try:
434 |         os.rename(file_path, renamed_file)
435 |     except Exception as e:
436 |         logger.debug(f"An error occurred: {e}")
437 |     if file_exists(renamed_file) == True:
438 |         logger.debug(file_path + ' passed rename_original_file')
439 |         return True
440 |     else:
441 |         logger.debug(file_path + ' filed rename_original_file')
442 |         return False
443 | 
444 | 
445 | def move_encoded_file_function(ffmepg_output_file_name, destination_file_name):
446 |     # Function to move the encoded file to the original file's directory
447 |     try:
448 |         shutil.move(ffmepg_output_file_name, destination_file_name) 
449 |     except Exception as e:
450 |         logger.debug(f"An error occurred: {e}")
451 |     if file_exists(destination_file_name) == True:
452 |         logger.debug(destination_file_name + ' has passed move_encoded_file')
453 |         return True
454 |     else:
455 |         logger.debug(destination_file_name + ' has failed move_encoded_file')
456 |         return False     
457 |         
458 | 
459 | def delete_renamed_original_file_function(renamed_file):
460 |     # Function to delete the renamed original file
461 |     try:
462 |         os.remove(renamed_file)
463 |     except Exception as e:
464 |         logger.debug(f"An error occurred: {e}")
465 |     if file_exists(renamed_file) == True:
466 |         logger.debug(renamed_file + ' has failed delete_renamed_original_file_function')
467 |         return False
468 |     else:
469 |         logger.debug(renamed_file + ' has passed delete_renamed_original_file_function')
470 |         return True        
471 | 
472 | ########################## Common Functions ##########################
473 | 
474 | def file_exists(filepath):
475 |     file_existance = os.path.isfile(filepath)
476 |     # Returns true if the file that is about to be touched is in the expected location
477 |     logger.debug (filepath + ' : ' + str(file_existance))
478 |     return file_existance
479 | 
480 | def get_file_size_kb(filepath_for_size_kb):
481 |     logger.debug('filepath is: ' + str(filepath_for_size_kb))
482 |     file_size_bytes = os.path.getsize(filepath_for_size_kb)
483 |     file_size_kb = round(file_size_bytes / 1024)
484 |     return file_size_kb
485 | 
486 | def validate_video(filepath):
487 |     # This function determines if a video is valid, or if the video contains errors
488 |     # Returns:
489 |     #       Failure if the shell command returns anything; i.e. one of the streams is bad
490 |     #       Success if the shell command doesn't return anything; i.e. the streams are good
491 |     #       Error if the shell command fails; this shouldn't happen
492 |     try:
493 |         command = 'ffmpeg -v error -i "' + filepath + '" -f null -'
494 |         result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
495 |         if result.stdout or result.stderr:
496 |             logger.debug ('File failed validation')
497 |             return False
498 |         else:
499 |             logger.debug ('File passed validation')
500 |             return True
501 |     except Exception as e:
502 |         return f"Error: {e}"
503 |     
504 | 
505 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<< 
506 | # This section starts the discovery of the files by searching directories
507 | # >>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<<
508 | 
509 | def write_results(file_located_data):
510 |     unique_identifier = file_located_data['file'] + str(datetime.now().microsecond)
511 |     file_name = file_located_data['file']
512 |     file_path = file_located_data['file_path']
513 |     config_name = 'placeholder'
514 |     new_file_size = file_located_data['new_file_size']
515 |     old_file_size = file_located_data['old_file_size']
516 |     new_file_size_difference = old_file_size - new_file_size
517 |     watch_folder = file_located_data['directory']
518 |     ffmpeg_encoding_string = file_located_data['ffmpeg_command']
519 | 
520 |     if len(ffmpeg_encoding_string) > 999:
521 |         # the varchar for ffmpeg_encoding_string is 999 characters.  This is to keep the db write from failing at 1000 characters
522 |         ffmpeg_encoding_string = ffmpeg_encoding_string[:999]
523 | 
524 |     logger.info('Writing results')
525 |     insert_record(unique_identifier, file_name, file_path, config_name, new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string)
526 |     logger.info('Writing results complete')
527 | 
528 | 
529 | def insert_record(unique_identifier, file_name, file_path, config_name, new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string):
530 |     try:
531 |         # Connection details
532 |         connection = mysql.connector.connect(
533 |             host='192.168.1.110',
534 |             port=32053,  # replace with your non-default port
535 |             database='boilest',
536 |             user='boilest',
537 |             password='boilest'
538 |         )
539 |         
540 |         if connection.is_connected():
541 |             cursor = connection.cursor()
542 |             recorded_date = datetime.now()  # Current date and time
543 |             
544 |             insert_query = """
545 |                 INSERT INTO ffmpeghistory (
546 |                     unique_identifier, recorded_date, file_name, file_path, config_name,
547 |                     new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string
548 |                 ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
549 |             """
550 |             
551 |             record = (
552 |                 unique_identifier, recorded_date, file_name, file_path, config_name,
553 |                 new_file_size, new_file_size_difference, old_file_size, watch_folder, ffmpeg_encoding_string
554 |             )
555 |             
556 |             cursor.execute(insert_query, record)
557 |             connection.commit()
558 |             logger.debug("Record inserted successfully")
559 |             
560 |     except Error as e:
561 |         logger.error(f"Error while connecting to MariaDB: {e}")
562 |     
563 |     finally:
564 |         if connection.is_connected():
565 |             cursor.close()
566 |             connection.close()
567 |             logger.debug("MariaDB connection is closed")
568 | 
569 | 


--------------------------------------------------------------------------------