├── bbb_dl
    ├── __init__.py
    ├── version.py
    ├── ffmpeg.py
    ├── batch.py
    ├── utils.py
    └── main.py
├── .gitignore
├── bbb-dl
├── bbb-dl-batch
├── pyproject.toml
├── AUTHORS
├── .github
    └── workflows
    │   └── python-publish.yml
├── LICENSE
├── .vscode
    └── settings.json
├── setup.py
└── README.md


/bbb_dl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bbb_dl/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.13'
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.idea
 2 | /tests
 3 | __pycache__
 4 | bbb_dl.egg-info
 5 | dist
 6 | TODO.md
 7 | /.vscode/launch.json
 8 | /.venv
 9 | /build
10 | 


--------------------------------------------------------------------------------
/bbb-dl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 | 
4 | import bbb_dl.main
5 | 
6 | 
7 | if __name__ == "__main__":
8 |     bbb_dl.main.main()
9 | 


--------------------------------------------------------------------------------
/bbb-dl-batch:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 | 
4 | import bbb_dl.batch
5 | 
6 | 
7 | if __name__ == "__main__":
8 |     bbb_dl.batch.main()
9 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 120
3 | target-version = ['py38']
4 | skip-string-normalization = true
5 | 
6 | [tool.isort]
7 | profile = "black"


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | AGPonomarev
 2 | AliRezaBeigy
 3 | c0d3d3v (Daniel Vogt)
 4 | CreateWebinar.com
 5 | federicotorrielli (Federico Torrielli)
 6 | fer22f (Fernando K)
 7 | Ian Ramirez-España (ianespana)
 8 | JohannesEbke (Johannes Ebke)
 9 | Maik (Angerstoner)
10 | mgudemann (Matthias Güdemann) 
11 | nicholas0g (Nicholas Giordano)
12 | olberger (Olivier Berger)
13 | persuader
14 | rscircus (Roland Siegbert)
15 | SamuelePilleri (Samuele Pilleri)
16 | tilmanmoser (Tilman Moser) 
17 | wallento (Stefan Wallentowitz)


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 See AUTHORS file for all the authors 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "editor.formatOnSave": true,
 3 |     "editor.codeActionsOnSave": {
 4 |         "source.organizeImports": "explicit"
 5 |     },
 6 |     "isort.args": [
 7 |         "--profile",
 8 |         "black"
 9 |     ],
10 |     "python.linting.enabled": true,
11 |     "python.linting.lintOnSave": true,
12 |     "python.linting.pylintEnabled": true,
13 |     "python.linting.pylintArgs": [
14 |         "--enable=W0614,W0611",
15 |         "--disable=C0103,C0111,C0114,C0115,C0116,W0223,raise-missing-from,unnecessary-pass",
16 |         "--max-line-length=120"
17 |     ],
18 |     "python.linting.flake8Enabled": true,
19 |     "python.linting.flake8Args": [
20 |         "--max-line-length=120",
21 |         "--ignore=E203,E266,W503",
22 |     ],
23 |     "python.formatting.provider": "black",
24 |     "python.formatting.blackArgs": [
25 |         "--line-length",
26 |         "120",
27 |         "--target-version",
28 |         "py38",
29 |         "--skip-string-normalization"
30 |     ],
31 |     "editor.rulers": [
32 |         120
33 |     ],
34 |     "cSpell.diagnosticLevel": "Hint",
35 |     "workbench.colorCustomizations": {
36 |         "editorHint.foreground": "#ff0000",
37 |     },
38 |     "gitlens.advanced.blame.customArguments": [
39 |         "--ignore-revs-file",
40 |         "${workspaceRoot}/.git-blame-ignore-revs"
41 |     ]
42 | }


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | # Get the version from bbb_dl/version.py without importing the package
 6 | exec(compile(open('bbb_dl/version.py', encoding="utf-8").read(), 'bbb_dl/version.py', 'exec'))
 7 | 
 8 | 
 9 | def readme():
10 |     this_directory = path.abspath(path.dirname(__file__))
11 |     with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
12 |         return f.read()
13 | 
14 | 
15 | setup(
16 |     name='bbb-dl',
17 |     version=__version__,
18 |     description='Big Blue Button Downloader that downloads a BBB lesson as MP4 video',
19 |     long_description=readme(),
20 |     long_description_content_type='text/markdown',
21 |     url='https://github.com/C0D3D3V/bbb-dl',
22 |     author='C0D3D3V',
23 |     license='GPL-2.0',
24 |     packages=find_packages(),
25 |     include_package_data=True,
26 |     entry_points={
27 |         'console_scripts': [
28 |             'bbb-dl = bbb_dl.main:main',
29 |             'bbb-dl-batch = bbb_dl.batch:main',
30 |         ],
31 |     },
32 |     python_requires='>=3.7',
33 |     install_requires=[
34 |         'aiofiles>=22.1.0',
35 |         'aiohttp>=3.8.3',
36 |         'certifi>=2020.4.5.2',
37 |         'colorama>=0.4.6',
38 |         'playwright>=1.29.0',
39 |         'python-ffmpeg>=2.0.12',
40 |         'requests>=2.24.0',
41 |     ],
42 |     classifiers=[
43 |         'Development Status :: 4 - Beta',
44 |         'Intended Audience :: End Users/Desktop',
45 |         'License :: OSI Approved :: MIT License',
46 |         'Programming Language :: Python :: 3 :: Only',
47 |         'Topic :: Education',
48 |         'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
49 |         'Topic :: Multimedia :: Video',
50 |         'Topic :: Multimedia :: Sound/Audio',
51 |         'Topic :: Utilities',
52 |     ],
53 |     zip_safe=False,
54 | )
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Big Blue Button (BBB) Downloader
  2 | 
  3 | Downloads a BBB lesson as MP4 video.
  4 | The assembled video includes:
  5 | 
  6 | - shared audio and webcams video
  7 | - presented slides with
  8 |   - whiteboard actions (text and drawings)
  9 |   - cursor movements
 10 |   - zooming
 11 | - screen sharing
 12 | 
 13 | If something does not work, feel free to [contact me](https://github.com/C0D3D3V/bbb-dl/issues). 
 14 | 
 15 | ### Setup
 16 | 1. Install [Python](https://www.python.org/) >=3.7
 17 | 2. Install [ffmpeg](https://github.com/C0D3D3V/Moodle-Downloader-2/wiki/Installing-ffmpeg)
 18 | 3. Run: `pip install --user bbb-dl`
 19 | 4. Run `python -m playwright install chromium`
 20 | 
 21 | 5. Run `bbb-dl --help` to see all options
 22 | 
 23 | If you ever need to update `bbb-dl` run: `pip install -U bbb-dl`
 24 | 
 25 | 
 26 | <details>
 27 |   <summary> For Experts: Click here for alternatively Setup using a virtual environment</summary>
 28 | 
 29 | 1. Install [Python](https://www.python.org/) >=3.7 and [git](https://git-scm.com/downloads)
 30 | 2. Install  `virtualenv`: `pip install virtualenv`
 31 | 3. Create a directory where you wish to install bbb-dl. Open a terminal in the desired directory
 32 | 4. Clone this repository into that folder: `git clone https://github.com/C0D3D3V/bbb-dl.git .`
 33 | 5. Run `virtualenv venv` to create the virtual environment (on Windows use `venv\Scripts\activate`)
 34 | 6. Run `source venv/bin/activate` to activate the virtual environment (on Windows use `venv\Scripts\activate`)
 35 | 7. Install `bbb-dl`: `pip install .`
 36 | 8. Install [ffmpeg](https://github.com/C0D3D3V/Moodle-Downloader-2/wiki/Installing-ffmpeg)
 37 | 9. Run `playwright install chromium`
 38 | 10. Run `bbb-dl --help` to see all options
 39 | 
 40 | To deactivate the virtual environment run: `deactivate`
 41 | </details>
 42 | 
 43 | ### Usage
 44 | 
 45 | **Temporary files are default stored in the application data folder** 
 46 | 
 47 | - The `--backup` option uses the same location
 48 | - You can change this location with the `--working-dir` option
 49 | - On Windows, the folder is located in `%localappdata%\bbb-dl`
 50 | - On Linux / MacOS, the folder is located in `~/.local/share/bbb-dl/`
 51 | - If you used the `--keep-tmp-files` option and you run the program again with other `--skip-annotations` or `--skip-cursor` options, then you may want to remove the corresponding `frames` folder inside the temporary directory. Because frames are not overwritten. 
 52 | - If ffmpeg has an error and a file has not been finished, it should be deleted from the temporary directory.
 53 | 
 54 | Example call:
 55 | 
 56 | `bbb-dl --skip-cursor https://your.bbb.org/playback/presentation/2.3/playback.html?meetingId=5d9100very_long_id70001800032c-160100033965`
 57 | 
 58 | 
 59 | ```
 60 | usage: bbb-dl [-h] [-ao] [-sw] [-swfd] [-sa] [-sc] [-sz] [-bk] [-kt] [-v] [--ffmpeg-location FFMPEG_LOCATION] [-scv] [-ais] [-uac]
 61 |               [-ftv FORCE_TLS_VERSION] [--version] [--encoder ENCODER] [--audiocodec AUDIOCODEC] [--preset PRESET] [--crf CRF] [-f FILENAME]
 62 |               [-od OUTPUT_DIR] [-wd WORKING_DIR] [-mpc MAX_PARALLEL_CHROMES] [-fw FORCE_WIDTH] [-fh FORCE_HEIGHT]
 63 |               URL
 64 | 
 65 | Big Blue Button Downloader that downloads a BBB lesson as MP4 video
 66 | 
 67 | positional arguments:
 68 |   URL                   URL of a BBB lesson
 69 | 
 70 | options:
 71 |   -h, --help            show this help message and exit
 72 |   -ao, --audio-only     Extract only the audio from the presentation, do not generate video.
 73 |   -sw, --skip-webcam    Skip adding the webcam video as an overlay to the final video. This will reduce the time to generate the final video
 74 |   -swfd, --skip-webcam-freeze-detection
 75 |                         Skip detecting if the webcam video is completely empty. It is assumed the webcam recording is not empty. This will reduce
 76 |                         the time to generate the final video
 77 |   -sa, --skip-annotations
 78 |                         Skip capturing the annotations of the professor. This will reduce the time to generate the final video
 79 |   -sc, --skip-cursor    Skip capturing the cursor of the professor. This will reduce the time to generate the final video
 80 |   -sz, --skip-zoom      Skip zooming into the presentation. All presentation slides are rendered in full size, which may result in sharper output
 81 |                         video. However, consequently also to smaller font.
 82 |   -bk, --backup         Downloads all the content from the server and then stops. After using this option, you can run bbb-dl again to create the
 83 |                         video based on the saved files
 84 |   -kt, --keep-tmp-files
 85 |                         Keep the temporary files after finish. In case of an error bbb-dl will reuse the already generated files
 86 |   -v, --verbose         Print more verbose debug information
 87 |   --ffmpeg-location FFMPEG_LOCATION
 88 |                         Optional path to the directory in that your installed ffmpeg executable is located (Use it if ffmpeg is not located in your
 89 |                         system PATH)
 90 |   -scv, --skip-cert-verify
 91 |                         Suppress HTTPS certificate validation
 92 |   -ais, --allow-insecure-ssl
 93 |                         Allow connections to unpatched servers. Use this option if your server uses a very old SSL version.
 94 |   -uac, --use-all-ciphers
 95 |                         Allow connections to servers that use insecure ciphers. Use this option if your server uses an insecure cipher.
 96 |   -ftv FORCE_TLS_VERSION, --force-tls-version FORCE_TLS_VERSION
 97 |                         Force the client to use a specify tls version. E.g: TLSv1_3
 98 |   --version             Print program version and exit
 99 |   --encoder ENCODER     Optional encoder to pass to ffmpeg (default libx264)
100 |   --audiocodec AUDIOCODEC
101 |                         Optional audiocodec to pass to ffmpeg (default copy the codec from the original source)
102 |   --preset PRESET       Optional preset to pass to ffmpeg (default fast, a preset that can be used with all encoders)
103 |   --crf CRF             Optional crf to pass to ffmpeg (default 23, lower crf (e.g 22) usually means larger file size and better video quality)
104 |   -f FILENAME, --filename FILENAME
105 |                         Optional output filename
106 |   -od OUTPUT_DIR, --output-dir OUTPUT_DIR
107 |                         Optional output directory for final video
108 |   -wd WORKING_DIR, --working-dir WORKING_DIR
109 |                         Optional output directory for all temporary directories/files
110 |   -mpc MAX_PARALLEL_CHROMES, --max-parallel-chromes MAX_PARALLEL_CHROMES
111 |                         Maximum number of chrome browser instances used to generate frames
112 |   -fw FORCE_WIDTH, --force-width FORCE_WIDTH
113 |                         Force width on final output. (e.g. 1280) This can reduce the time to generate the final video
114 |   -fh FORCE_HEIGHT, --force-height FORCE_HEIGHT
115 |                         Force height on final output. (e.g. 720) This can reduce the time to generate the final video
116 | ```
117 |  
118 | ### Batch processing
119 | 
120 |  If you want to do batch processing you can use `bbb-dl-batch`. All passed arguments will be passed to the respective `bbb-dl`. `bbb-dl-batch` itself only needs the path to a text file in which URLs to bbb sessions are specified line by line. See `bbb-dl-batch --help` for more information.
121 | 
122 |  Successfully downloaded URL sessions are added to `successful.txt` in the output folder. Session URLs that could not be successfully downloaded are added to `failed.txt` in the output folder. 
123 | 
124 | ### The video quality is too low, how can I improve the output quality?
125 | 
126 | First of all, you should check if the BBB session you downloaded really looks better in the browser than the video you created. When comparing, make sure that the presentation in the browser has the same resolution as the video. 
127 | 
128 | Among other things, `ffmpeg` offers two options with which you can influence the output quality. You can experiment with them and see if the output improves.
129 | 
130 | - `--preset` is the first of these options, it can take values from -1 to 13 or in words ultrafast, superfast, veryfast, faster, fast (default), medium, slow and veryslow. A slower encoder often delivers better quality, so try `--preset medium` to see if the quality improves.
131 | - `--crf` is the second of these options, it can take values from -1 to 63. A lower crf (e.g 22) usually means larger file size and better video quality, so try `--crf 22` to see if the quality improves.
132 | 
133 | `bbb-dl` tries to estimate a suitable output resolution for the final video, this choice may or may not be good. You can force your own output resolution with the `--force-width` and `--force-height` options.
134 | 
135 | - A high resolution would be e.g. FullHD with 1920x1080. Be warned if the slides themselves are not that large you may get blurry slides.
136 | - A lower resolution for faster rendering would be e.g. HD with 1280x720. It may be that the output looks sharper or less sharp, test it yourself.
137 | 
138 | ### How can I speed up the rendering process?
139 | 
140 | FFmpeg can use different hardware accelerators for encoding videos. You can find more information about this here: https://trac.ffmpeg.org/wiki/HWAccelIntro
141 | 
142 | To use such hardware for encoding you may need to install drivers as indicated on the website and then set the `--encoder` option to the appropriate encoder. 
143 | 
144 | For example, if you have an **Nvidia** graphics card installed on a computer, you can use it with the [NVENC](https://trac.ffmpeg.org/wiki/HWAccelIntro#CUDANVENCNVDEC) encoder. For this, you simply set the option `--encoder h264_nvenc`. You can see on the [Nvidia website which graphics cards support this option](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new). If your graphics card also supports H.265 (HEVC) you can set the option `--encoder hevc_nvenc` instead, which might be even faster (you have to test this yourself).
145 | 
146 | - For Intel CPUs, you can try the encoder `h264_qsv` (Use the option `--encoder h264_qsv`). Sometimes this encoder is faster than your graphics card encoder.
147 | 
148 | - For AMD CPUs / GPUs, you can try the encoder `h264_amf` (Use the option `--encoder h264_amf`).
149 | 
150 | > You have to test yourself if it is faster to use your hardware encoder or not. In some cases, hardware encoders are slower than using the CPU directly. 
151 | 
152 | 
153 | ### Other downloader
154 | 
155 | [bbb-video-download](https://github.com/tilmanmoser/bbb-video-download)
156 | - It uses a clever approach written in Node.js that can be easily integrated into a bbb server
157 | - You can use the `--backup` option to feed `bbb-video-download`.
158 | - A multi-threaded port in go-lang can be found here: [bbb-video-converter](https://github.com/cli-ish/bbb-video-converter)
159 | 
160 | [bbb-download](https://github.com/fossasia/bbb-download)
161 | - Takes advantage of the fact that you can use the bbb-player to play the session data offline.
162 | - Instead of creating a video file, this downloader downloads only the necessary files from the server, so you can use the bbb-player to play the session offline. The player is provided to you via shortcut.
163 | 
164 | If someone wants to link another downloader here, which offers e.g. functions that bbb-dl does not offer, feel free to open an issue. 
165 | 
166 | ### License
167 | This project is licensed under the terms of the *MIT License*. For further information, please look [here](LICENSE).
168 | 


--------------------------------------------------------------------------------
/bbb_dl/ffmpeg.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import subprocess
  4 | from dataclasses import dataclass
  5 | from itertools import cycle
  6 | from subprocess import CalledProcessError
  7 | from typing import List
  8 | 
  9 | from ffmpeg import Progress
 10 | from ffmpeg.asyncio import FFmpeg
 11 | 
 12 | from bbb_dl.utils import Log
 13 | from bbb_dl.utils import PathTools as PT
 14 | from bbb_dl.utils import formatSeconds
 15 | 
 16 | 
 17 | @dataclass
 18 | class VideoInfo:
 19 |     path: str
 20 |     duration: float
 21 |     width: int
 22 |     height: int
 23 | 
 24 | 
 25 | class FFMPEG:
 26 |     def __init__(self, verbose: bool, ffmpeg_location: str, encoder: str, audiocodec: str, preset: str, crf: int):
 27 |         self.verbose = verbose
 28 |         self.ffmpeg_path = 'ffmpeg'
 29 |         self.ffprobe_path = 'ffprobe'
 30 |         self.spinner = cycle('/|\\-')
 31 | 
 32 |         if ffmpeg_location is not None:
 33 |             found = False
 34 |             for check_name in ['ffmpeg', 'ffmpeg.exe']:
 35 |                 check_path = PT.get_in_dir(ffmpeg_location, check_name)
 36 |                 if os.path.isfile(check_path):
 37 |                     self.ffmpeg_path = check_path
 38 |                     found = True
 39 |             if not found:
 40 |                 Log.error('Error: ffmpeg was not found in your specified --ffmpeg-location path')
 41 |                 exit(-8)
 42 |             found = False
 43 |             for check_name in ['ffprobe', 'ffprobe.exe']:
 44 |                 check_path = PT.get_in_dir(ffmpeg_location, check_name)
 45 |                 if os.path.isfile(check_path):
 46 |                     self.ffprobe_path = check_path
 47 |                     found = True
 48 |             if not found:
 49 |                 Log.error('Error: ffprobe was not found in your specified --ffmpeg-location path')
 50 |                 exit(-9)
 51 | 
 52 |         self.encoder = encoder
 53 |         self.audiocodec = audiocodec
 54 |         self.preset = preset
 55 |         self.crf = crf
 56 |         self.stderr_log = []
 57 | 
 58 |     def on_error(self, code: int):
 59 |         if self.verbose:
 60 |             for line in self.stderr_log:
 61 |                 print(line)
 62 |         else:
 63 |             Log.warning(
 64 |                 'Please run bbb-dl again with the extra option --verbose to get the ffmpeg error message.'
 65 |                 + ' Then add the log output to a new issue on https://github.com/C0D3D3V/bbb-dl/issues'
 66 |             )
 67 |         Log.error(f"Error: FFMpeg failed and returned error code {code}")
 68 |         exit(-10)
 69 | 
 70 |     def on_start(self, arguments: List[str]):
 71 |         self.stderr_log = []
 72 |         if self.verbose:
 73 |             Log.info(f"Running command: {' '.join(arguments)}")
 74 | 
 75 |     def on_progress(self, progress: Progress):
 76 |         print(
 77 |             f'\r\033[KFrame: {progress.frame} FPS: {progress.fps} Size: {progress.size}'
 78 |             + f' Time: {formatSeconds(progress.time.total_seconds())} Bitrate: {progress.bitrate}'
 79 |             + f' Speed: {progress.speed}x {next(self.spinner)}',
 80 |             end='',
 81 |         )
 82 | 
 83 |     def on_completed(self):
 84 |         print()
 85 |         Log.info('Command finished')
 86 | 
 87 |     def on_log_stderr(self, line):
 88 |         if self.verbose:
 89 |             if line.find('bitrate=') == -1 and line.find('time=') == -1:
 90 |                 self.stderr_log.append(line)
 91 | 
 92 |     def add_standard_handlers(self, ffmpeg_obj):
 93 |         ffmpeg_obj.on("start", self.on_start)
 94 |         ffmpeg_obj.on("error", self.on_error)
 95 |         ffmpeg_obj.on("progress", self.on_progress)
 96 |         ffmpeg_obj.on("stderr", self.on_log_stderr)
 97 |         ffmpeg_obj.on("completed", self.on_completed)
 98 | 
 99 |     def get_video_infos(self, video_path: str) -> VideoInfo:
100 |         try:
101 |             if self.verbose:
102 |                 Log.info(f'Checking video information of `{video_path}`')
103 |             result = subprocess.run(
104 |                 [
105 |                     self.ffprobe_path,
106 |                     '-v',
107 |                     'error',
108 |                     '-select_streams',
109 |                     "v:0",
110 |                     "-show_entries",
111 |                     "stream=width,height,duration",
112 |                     "-of",
113 |                     "json",
114 |                     video_path,
115 |                 ],
116 |                 capture_output=True,
117 |                 encoding='utf-8',
118 |                 text=True,
119 |                 check=True,
120 |             )
121 |             streams = json.loads(result.stdout).get('streams', [])
122 |             if len(streams) == 0:
123 |                 Log.warning(f"Error: No Stream found in {video_path}")
124 |                 return VideoInfo(video_path, None, 0, 0)
125 |             stream = streams[0]
126 |             return VideoInfo(video_path, stream.get('duration', None), stream.get('width', 0), stream.get('height', 0))
127 |         except CalledProcessError as err:
128 |             print(f"Error: {err}")
129 |             exit(-10)
130 | 
131 |     async def freeze_detect(self, video_path: str) -> bool:
132 |         """
133 |         return true if video is 100% freezed
134 |         """
135 |         ffmpeg = (
136 |             FFmpeg(self.ffmpeg_path)
137 |             .option("hide_banner")
138 |             # .option("nostats")
139 |             .input(video_path)
140 |             .output(
141 |                 '-',
142 |                 vf='freezedetect=n=-60dB:d=2',
143 |                 map='0:v:0',
144 |                 f='null',
145 |             )
146 |         )
147 | 
148 |         freeze_starts = []
149 |         freeze_ends = []
150 | 
151 |         @ffmpeg.on("stderr")
152 |         def on_stderr(line):
153 |             if line.find('lavfi.freezedetect.freeze_end') >= 0:
154 |                 end = float(line.rsplit('lavfi.freezedetect.freeze_end: ', 1)[1])
155 |                 freeze_ends.append(end)
156 |             elif line.find('lavfi.freezedetect.freeze_start') >= 0:
157 |                 start = float(line.rsplit('lavfi.freezedetect.freeze_start: ', 1)[1])
158 |                 freeze_starts.append(start)
159 | 
160 |         self.add_standard_handlers(ffmpeg)
161 | 
162 |         await ffmpeg.execute()
163 |         if len(freeze_ends) == 0 and len(freeze_starts) == 1 and freeze_starts[0] <= 10:
164 |             return True
165 |         return False
166 | 
167 |     async def create_slideshow(self, concat_file_path: str, output_path: str):
168 |         ffmpeg = (
169 |             FFmpeg(self.ffmpeg_path)
170 |             .option("hide_banner")
171 |             .input(
172 |                 concat_file_path,
173 |                 f='concat',
174 |                 # safe='0',
175 |                 # hwaccel="auto",  # In tests it was slower with hwaccel
176 |             )
177 |             .output(
178 |                 output_path,
179 |                 {
180 |                     'c:v': self.encoder,
181 |                     'c:a': self.audiocodec,
182 |                 },
183 |                 framerate='24',
184 |                 r='24',
185 |                 pix_fmt='yuv420p',
186 |                 # g='1',  # activate intra frame codec
187 |                 strict='experimental',
188 |                 crf=self.crf,
189 |                 preset=self.preset,
190 |             )
191 |         )
192 |         self.add_standard_handlers(ffmpeg)
193 | 
194 |         await ffmpeg.execute()
195 | 
196 |     async def resize_deskshare(self, deskshare_path: str, resized_deskshare_path: str, width: int, height: int):
197 |         ffmpeg = (
198 |             FFmpeg(self.ffmpeg_path)
199 |             .option("hide_banner")
200 |             .input(
201 |                 deskshare_path,
202 |                 # hwaccel="auto", # Use encoder to activate hwaccel
203 |             )
204 |             .output(
205 |                 resized_deskshare_path,
206 |                 {
207 |                     'c:v': self.encoder,
208 |                     'c:a': self.audiocodec,
209 |                 },
210 |                 vf=(
211 |                     f'scale=w={width}:h={height}:force_original_aspect_ratio=decrease,'
212 |                     + f'pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=white'
213 |                 ),
214 |                 strict='experimental',
215 |                 crf=self.crf,
216 |                 preset=self.preset,
217 |                 framerate='24',
218 |                 r='24',
219 |                 pix_fmt='yuv420p',
220 |                 # g='1',  # activate intra frame codec
221 |             )
222 |         )
223 | 
224 |         self.add_standard_handlers(ffmpeg)
225 | 
226 |         await ffmpeg.execute()
227 | 
228 |     async def add_deskshare_to_slideshow(self, concat_file_path: str, output_path: str):
229 |         ffmpeg = (
230 |             FFmpeg(self.ffmpeg_path)
231 |             .option("hide_banner")
232 |             .input(
233 |                 concat_file_path,
234 |                 f='concat',
235 |                 # safe='0',
236 |                 # hwaccel="auto",   # In tests it was slower with hwaccel
237 |             )
238 |             .output(
239 |                 output_path,
240 |                 {
241 |                     'c:v': self.encoder,
242 |                     'c:a': self.audiocodec,
243 |                 },
244 |                 strict='experimental',
245 |                 crf=self.crf,
246 |                 preset=self.preset,
247 |             )
248 |         )
249 |         self.add_standard_handlers(ffmpeg)
250 | 
251 |         await ffmpeg.execute()
252 | 
253 |     def get_webcam_size(self, slideshow_width, slideshow_height):
254 |         webcam_width = slideshow_width // 5
255 |         webcam_height = webcam_width * 3 // 4
256 | 
257 |         if webcam_height > slideshow_height:
258 |             webcam_height = slideshow_height
259 | 
260 |         if webcam_width % 2:
261 |             webcam_width -= 1
262 |         if webcam_height % 2:
263 |             webcam_height -= 1
264 | 
265 |         return webcam_width, webcam_height
266 | 
267 |     async def add_webcam_to_slideshow(
268 |         self,
269 |         slideshow_path: str,
270 |         webcams_path: str,
271 |         slideshow_width: int,
272 |         slideshow_height: int,
273 |         result_path: str,
274 |     ):
275 |         webcam_width, webcam_height = self.get_webcam_size(slideshow_width, slideshow_height)
276 | 
277 |         ffmpeg = (
278 |             FFmpeg(self.ffmpeg_path)
279 |             .option("hide_banner")
280 |             .input(webcams_path)
281 |             .input(slideshow_path)
282 |             .output(
283 |                 result_path,
284 |                 {
285 |                     'c:v': self.encoder,
286 |                     'c:a': self.audiocodec,
287 |                 },
288 |                 filter_complex=(
289 |                     f'[0:v]scale={webcam_width}:{webcam_height},setpts=PTS-STARTPTS,'
290 |                     + 'format=rgba,colorchannelmixer=aa=0.8'
291 |                     + '[ovrl];[1:v]fps=24,setpts=PTS-STARTPTS[bg];[bg][ovrl]overlay=W-w:H-h:shortest=1'
292 |                 ),
293 |                 strict='experimental',
294 |                 crf=self.crf,
295 |                 preset=self.preset,
296 |             )
297 |         )
298 |         self.add_standard_handlers(ffmpeg)
299 | 
300 |         await ffmpeg.execute()
301 | 
302 |     async def add_audio_to_slideshow(self, slideshow_path: str, webcams_path: str, result_path: str):
303 |         ffmpeg = (
304 |             FFmpeg(self.ffmpeg_path)
305 |             .option("hide_banner")
306 |             .input(webcams_path)
307 |             .input(slideshow_path)
308 |             .output(
309 |                 result_path,
310 |                 {
311 |                     'c:v': self.encoder,
312 |                     'c:a': self.audiocodec,
313 |                 },
314 |                 map=['0:a', '1:v'],
315 |                 strict='experimental',
316 |                 crf=self.crf,
317 |                 preset=self.preset,
318 |                 shortest=None,
319 |             )
320 |         )
321 |         self.add_standard_handlers(ffmpeg)
322 | 
323 |         await ffmpeg.execute()
324 | 
325 |     async def extract_audio(self, webcams_path: str, result_path: str):
326 |         ffmpeg = (
327 |             FFmpeg(self.ffmpeg_path)
328 |             .option("hide_banner")
329 |             .input(webcams_path)
330 |             .output(
331 |                 result_path,
332 |                 {
333 |                     'codec:a': 'libmp3lame',
334 |                     'qscale:a': 2,
335 |                 },
336 |             )
337 |         )
338 |         self.add_standard_handlers(ffmpeg)
339 | 
340 |         await ffmpeg.execute()
341 | 


--------------------------------------------------------------------------------
/bbb_dl/batch.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import subprocess
  4 | from subprocess import CalledProcessError
  5 | from typing import List
  6 | 
  7 | from colorama import just_fix_windows_console
  8 | 
  9 | from bbb_dl.utils import Log
 10 | from bbb_dl.utils import PathTools as PT
 11 | from bbb_dl.utils import Timer, formatSeconds
 12 | from bbb_dl.version import __version__
 13 | 
 14 | 
 15 | class BatchProcessor:
 16 |     def __init__(
 17 |         self,
 18 |         dl_urls_file_path: str,
 19 |         bbb_dl_path: str,
 20 |         output_dir: str,
 21 |         verbose: bool,
 22 |         skip_cert_verify: bool,
 23 |         allow_insecure_ssl: bool,
 24 |         use_all_ciphers: bool,
 25 |         force_tls_version: str,
 26 |         encoder: str,
 27 |         audiocodec: str,
 28 |         audio_only: bool,
 29 |         skip_webcam: bool,
 30 |         skip_webcam_freeze_detection: bool,
 31 |         skip_annotations: bool,
 32 |         skip_cursor: bool,
 33 |         skip_zoom: bool,
 34 |         keep_tmp_files: bool,
 35 |         ffmpeg_location: str,
 36 |         working_dir: str,
 37 |         backup: bool,
 38 |         max_parallel_chromes: int,
 39 |         force_width: int,
 40 |         force_height: int,
 41 |         preset: str,
 42 |         crf: int,
 43 |     ):
 44 |         self.bbb_dl_path = bbb_dl_path
 45 |         option_list = []
 46 |         self.add_bool_option(option_list, '--audio-only', audio_only)
 47 |         self.add_bool_option(option_list, '--skip-webcam', skip_webcam)
 48 |         self.add_bool_option(option_list, '--skip-webcam-freeze-detection', skip_webcam_freeze_detection)
 49 |         self.add_bool_option(option_list, '--skip-annotations', skip_annotations)
 50 |         self.add_bool_option(option_list, '--skip-cursor', skip_cursor)
 51 |         self.add_bool_option(option_list, '--skip-zoom', skip_zoom)
 52 |         self.add_bool_option(option_list, '--backup', backup)
 53 |         self.add_bool_option(option_list, '--verbose', verbose)
 54 |         self.add_bool_option(option_list, '--skip-cert-verify', skip_cert_verify)
 55 |         self.add_bool_option(option_list, '--allow-insecure-ssl', allow_insecure_ssl)
 56 |         self.add_bool_option(option_list, '--use-all-ciphers', use_all_ciphers)
 57 |         self.add_value_option(option_list, '--force-tls-version', force_tls_version)
 58 |         self.add_bool_option(option_list, '--keep-tmp-files', keep_tmp_files)
 59 |         self.add_value_option(option_list, '--ffmpeg-location', ffmpeg_location)
 60 |         self.add_value_option(option_list, '--working-dir', working_dir)
 61 |         self.add_value_option(option_list, '--output-dir', output_dir)
 62 |         self.add_value_option(option_list, '--encoder', encoder)
 63 |         self.add_value_option(option_list, '--audiocodec', audiocodec)
 64 |         self.add_value_option(option_list, '--max-parallel-chromes', max_parallel_chromes)
 65 |         self.add_value_option(option_list, '--force-width', force_width)
 66 |         self.add_value_option(option_list, '--force-height', force_height)
 67 |         self.add_value_option(option_list, '--preset', preset)
 68 |         self.add_value_option(option_list, '--crf', crf)
 69 |         self.default_option_list = option_list
 70 |         self.dl_urls_file_path = dl_urls_file_path
 71 | 
 72 |         self.output_dir_path = self.get_output_dir(output_dir)
 73 | 
 74 |     def get_output_dir(
 75 |         self,
 76 |         output_dir: str,
 77 |     ):
 78 |         return self.check_directory(output_dir, os.getcwd(), 'output', '--output-dir')
 79 | 
 80 |     def check_directory(self, path: str, default_path: str, file_type: str, option_name: str):
 81 |         if path is None:
 82 |             path = default_path
 83 |         else:
 84 |             path = PT.sanitize_path(path)
 85 | 
 86 |         path = PT.get_abs_path(path)
 87 |         try:
 88 |             PT.make_dirs(path)
 89 |         except (OSError, IOError) as err:
 90 |             Log.error(f'Error: Unable to create directory "{path}" for {file_type} files: {str(err)}')
 91 |             Log.warning(
 92 |                 f'You can choose an alternative directory for the {file_type} files with the {option_name} option.'
 93 |             )
 94 |             exit(-2)
 95 | 
 96 |         if not os.access(path, os.R_OK) or not os.access(path, os.W_OK):
 97 |             Log.error(f'Error: Unable to read or write in the directory for {file_type} files {path}')
 98 |             Log.warning(
 99 |                 f'You can choose an alternative directory for the {file_type} files with the {option_name} option.'
100 |             )
101 |             exit(-3)
102 |         return path
103 | 
104 |     def add_value_option(self, option_list, option_name, option):
105 |         if option is not None:
106 |             if option_name not in option_list:
107 |                 option_list.append(option_name)
108 |                 option_list.append(str(option))
109 | 
110 |     def add_bool_option(self, option_list, option_name, option):
111 |         if option:
112 |             if option_name not in option_list:
113 |                 option_list.append(option_name)
114 | 
115 |     def add_url_to_file(self, url: str, file_name: str):
116 |         file_path = PT.get_in_dir(self.output_dir_path, file_name)
117 |         with open(file_path, mode='a+', encoding='utf-8') as fh:
118 |             fh.write(f"{url}\n")
119 | 
120 |     def run(self):
121 |         if not os.path.isfile(self.dl_urls_file_path):
122 |             Log.error(f'Can not find URLs file: {self.dl_urls_file_path}')
123 |             exit(-1)
124 | 
125 |         URL_List = []
126 |         try:
127 |             with open(self.dl_urls_file_path, mode='r', encoding='utf-8') as fh:
128 |                 URL_List = [line.strip() for line in fh.readlines()]
129 |         except OSError as err:
130 |             Log.error(f'Error: {str(err)}')
131 |             exit(-1)
132 | 
133 |         for url in URL_List:
134 |             successful = self.execute_bbb_dl(url)
135 |             if successful:
136 |                 self.add_url_to_file(url, 'successful.txt')
137 |             else:
138 |                 self.add_url_to_file(url, 'failed.txt')
139 | 
140 |     def build_arguments(self, dl_url: str) -> List[str]:
141 |         arguments = [self.bbb_dl_path, dl_url]
142 |         arguments.extend(self.default_option_list)
143 |         return arguments
144 | 
145 |     def execute_bbb_dl(self, url: str) -> bool:
146 |         arguments = self.build_arguments(url)
147 | 
148 |         try:
149 |             result = subprocess.run(
150 |                 arguments,
151 |                 check=True,
152 |             )
153 |         except CalledProcessError as err:
154 |             print(f"BBB-DL exited with Error: {err}")
155 |             return False
156 | 
157 |         if result.returncode == 0:
158 |             Log.success('Completed successfully')
159 |             return True
160 |         else:
161 |             Log.error(f'BBB-DL failed with Error: {result.returncode}')
162 |             return False
163 | 
164 | 
165 | def get_parser():
166 |     """
167 |     Creates a new argument parser.
168 |     """
169 |     parser = argparse.ArgumentParser(description=('Big Blue Button Batch Downloader'))
170 | 
171 |     parser.add_argument(
172 |         'URLs',
173 |         type=str,
174 |         help='Path to a text file containing URLs of BBB lessons, one line per URL',
175 |     )
176 | 
177 |     parser.add_argument(
178 |         '-ao',
179 |         '--audio-only',
180 |         action='store_true',
181 |         help='Extract only the audio from the presentations, do not generate videos.',
182 |     )
183 | 
184 |     parser.add_argument(
185 |         '-bp',
186 |         '--bbb-dl-path',
187 |         type=str,
188 |         default='bbb-dl',
189 |         help='Path to bbb-dl. Use it if bbb-dl is not in your system PATH',
190 |     )
191 |     parser.add_argument(
192 |         '-sw',
193 |         '--skip-webcam',
194 |         action='store_true',
195 |         help='Skip adding the webcam video as an overlay to the final videos.',
196 |     )
197 |     parser.add_argument(
198 |         '-swfd',
199 |         '--skip-webcam-freeze-detection',
200 |         action='store_true',
201 |         help='Skip detecting if the webcam video is completely empty.'
202 |         + ' It is assumed the webcam recordings are not empty.',
203 |     )
204 |     parser.add_argument(
205 |         '-sa',
206 |         '--skip-annotations',
207 |         action='store_true',
208 |         help='Skip capturing the annotations of the professor',
209 |     )
210 |     parser.add_argument(
211 |         '-sc',
212 |         '--skip-cursor',
213 |         action='store_true',
214 |         help='Skip capturing the cursor of the professor',
215 |     )
216 |     parser.add_argument(
217 |         '-sz',
218 |         '--skip-zoom',
219 |         action='store_true',
220 |         help=(
221 |             'Skip zooming into the presentations. All presentation slides are rendered in full size,'
222 |             + ' which may result in sharper output videos. However, consequently also to smaller font.'
223 |         ),
224 |     )
225 | 
226 |     parser.add_argument(
227 |         '-bk',
228 |         '--backup',
229 |         action='store_true',
230 |         help=(
231 |             'Downloads all the content from the server and then stops. After using this option, you can run bbb-dl'
232 |             + ' again to create the video based on the saved files'
233 |         ),
234 |     )
235 |     parser.add_argument(
236 |         '-kt',
237 |         '--keep-tmp-files',
238 |         action='store_true',
239 |         help=(
240 |             'Keep the temporary files after finish. In case of an error bbb-dl will reuse the already generated files'
241 |         ),
242 |     )
243 | 
244 |     parser.add_argument(
245 |         '-v',
246 |         '--verbose',
247 |         action='store_true',
248 |         help=('Print more verbose debug information'),
249 |     )
250 | 
251 |     parser.add_argument(
252 |         '--ffmpeg-location',
253 |         type=str,
254 |         default=None,
255 |         help=(
256 |             'Optional path to the directory in that your installed ffmpeg executable is located'
257 |             + ' (Use it if ffmpeg is not located in your system PATH)'
258 |         ),
259 |     )
260 | 
261 |     parser.add_argument(
262 |         '-scv',
263 |         '--skip-cert-verify',
264 |         action='store_true',
265 |         help=('Suppress HTTPS certificate validation'),
266 |     )
267 |     parser.add_argument(
268 |         '-ais',
269 |         '--allow-insecure-ssl',
270 |         dest='allow_insecure_ssl',
271 |         default=False,
272 |         action='store_true',
273 |         help='Allow connections to unpatched servers. Use this option if your server uses a very old SSL version.',
274 |     )
275 |     parser.add_argument(
276 |         '-uac',
277 |         '--use-all-ciphers',
278 |         dest='use_all_ciphers',
279 |         default=False,
280 |         action='store_true',
281 |         help=(
282 |             'Allow connections to servers that use insecure ciphers.'
283 |             + ' Use this option if your server uses an insecure cipher.'
284 |         ),
285 |     )
286 |     parser.add_argument(
287 |         '-ftv',
288 |         '--force-tls-version',
289 |         type=str,
290 |         help=('Force the client to use a specify tls version. E.g: TLSv1_3'),
291 |     )
292 | 
293 |     parser.add_argument(
294 |         '--version',
295 |         action='version',
296 |         version='bbb-dl ' + __version__,
297 |         help='Print program version and exit',
298 |     )
299 | 
300 |     parser.add_argument(
301 |         '--encoder',
302 |         dest='encoder',
303 |         type=str,
304 |         default='libx264',
305 |         help='Optional encoder to pass to ffmpeg (default libx264)',
306 |     )
307 |     parser.add_argument(
308 |         '--audiocodec',
309 |         dest='audiocodec',
310 |         type=str,
311 |         default='copy',
312 |         help='Optional audiocodec to pass to ffmpeg (default copy the codec from the original source)',
313 |     )
314 |     parser.add_argument(
315 |         '--preset',
316 |         dest='preset',
317 |         type=str,
318 |         default='fast',
319 |         help='Optional preset to pass to ffmpeg (default fast, a preset that can be used with all encoders)',
320 |     )
321 |     parser.add_argument(
322 |         '--crf',
323 |         dest='crf',
324 |         type=int,
325 |         default=23,
326 |         help=(
327 |             'Optional crf to pass to ffmpeg'
328 |             + ' (default 23, lower crf (e.g 22) usually means larger file size and better video quality)'
329 |         ),
330 |     )
331 | 
332 |     parser.add_argument(
333 |         '-od',
334 |         '--output-dir',
335 |         type=str,
336 |         default=None,
337 |         help='Optional output directory for final videos',
338 |     )
339 | 
340 |     parser.add_argument(
341 |         '-wd',
342 |         '--working-dir',
343 |         type=str,
344 |         default=None,
345 |         help='Optional output directory for all temporary directories/files',
346 |     )
347 | 
348 |     parser.add_argument(
349 |         '-mpc',
350 |         '--max-parallel-chromes',
351 |         type=int,
352 |         default=10,
353 |         help='Maximum number of chrome browser instances used to generate frames',
354 |     )
355 | 
356 |     parser.add_argument(
357 |         '-fw',
358 |         '--force-width',
359 |         type=int,
360 |         default=None,
361 |         help='Force width on final outputs',
362 |     )
363 | 
364 |     parser.add_argument(
365 |         '-fh',
366 |         '--force-height',
367 |         type=int,
368 |         default=None,
369 |         help='Force height on final outputs',
370 |     )
371 | 
372 |     return parser
373 | 
374 | 
375 | # --- called at the program invocation: -------------------------------------
376 | def main(args=None):
377 |     just_fix_windows_console()
378 |     parser = get_parser()
379 |     args = parser.parse_args(args)
380 | 
381 |     with Timer() as final_t:
382 |         BatchProcessor(
383 |             args.URLs,
384 |             args.bbb_dl_path,
385 |             args.output_dir,
386 |             args.verbose,
387 |             args.skip_cert_verify,
388 |             args.allow_insecure_ssl,
389 |             args.use_all_ciphers,
390 |             args.force_tls_version,
391 |             args.encoder,
392 |             args.audiocodec,
393 |             args.audio_only,
394 |             args.skip_webcam,
395 |             args.skip_webcam_freeze_detection,
396 |             args.skip_annotations,
397 |             args.skip_cursor,
398 |             args.skip_zoom,
399 |             args.keep_tmp_files,
400 |             args.ffmpeg_location,
401 |             args.working_dir,
402 |             args.backup,
403 |             args.max_parallel_chromes,
404 |             args.force_width,
405 |             args.force_height,
406 |             args.preset,
407 |             args.crf,
408 |         ).run()
409 |     Log.info(f'BBB-DL finished and took: {formatSeconds(final_t.duration)}')
410 | 


--------------------------------------------------------------------------------
/bbb_dl/utils.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import contextlib
  3 | import functools
  4 | import html
  5 | import http
  6 | import http.cookiejar
  7 | import http.server
  8 | import io
  9 | import itertools
 10 | import math
 11 | import os
 12 | import re
 13 | import socket
 14 | import ssl
 15 | import sys
 16 | import time
 17 | import unicodedata
 18 | from functools import lru_cache
 19 | from pathlib import Path
 20 | 
 21 | import requests
 22 | import urllib3
 23 | from aiohttp.cookiejar import CookieJar
 24 | from requests.utils import DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths
 25 | 
 26 | 
 27 | class QuietRequestHandler(http.server.SimpleHTTPRequestHandler):
 28 |     def log_message(self, *args):
 29 |         pass
 30 | 
 31 | 
 32 | def get_free_port():
 33 |     """
 34 |     Ask the system for a free port.
 35 |     In case of error return error message.
 36 |     :return: {Tuple}
 37 |     """
 38 |     port = None
 39 |     error = {}
 40 |     with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as soc:
 41 |         try:
 42 |             soc.bind(('127.0.0.1', 0))
 43 |             sock_name = soc.getsockname()
 44 |             if isinstance(sock_name, tuple) and len(sock_name) == 2:
 45 |                 port = sock_name[1]
 46 |         except socket.error as e:
 47 |             error = {'errno': e.errno, 'msg': str(e)}
 48 | 
 49 |         return port, error
 50 | 
 51 | 
 52 | def check_verbose() -> bool:
 53 |     """Return if the verbose mode is active"""
 54 |     return '-v' in sys.argv or '--verbose' in sys.argv
 55 | 
 56 | 
 57 | def check_debug() -> bool:
 58 |     """Return if the debugger is currently active"""
 59 |     return 'pydevd' in sys.modules or (hasattr(sys, 'gettrace') and sys.gettrace() is not None)
 60 | 
 61 | 
 62 | _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 63 | 
 64 | 
 65 | def float_or_none(v, scale=1, invscale=1, default=None):
 66 |     if v is None:
 67 |         return default
 68 |     try:
 69 |         return float(v) * invscale / scale
 70 |     except (ValueError, TypeError):
 71 |         return default
 72 | 
 73 | 
 74 | def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
 75 |     """Formats numbers with decimal sufixes like K, M, etc"""
 76 |     num, factor = float_or_none(num), float(factor)
 77 |     if num is None or num < 0:
 78 |         return None
 79 |     POSSIBLE_SUFFIXES = 'kMGTPEZY'
 80 |     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
 81 |     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
 82 |     if factor == 1024:
 83 |         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
 84 |     converted = num / (factor**exponent)
 85 |     return fmt % (converted, suffix)
 86 | 
 87 | 
 88 | def format_bytes(bytes):
 89 |     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
 90 | 
 91 | 
 92 | def append_get_idx(list_obj, item):
 93 |     idx = len(list_obj)
 94 |     list_obj.append(item)
 95 |     return idx
 96 | 
 97 | 
 98 | def timetuple_from_msec(msec):
 99 |     secs, msec = divmod(msec, 1000)
100 |     mins, secs = divmod(secs, 60)
101 |     hrs, mins = divmod(mins, 60)
102 |     return _timetuple(hrs, mins, secs, msec)
103 | 
104 | 
105 | def formatSeconds(secs, delim=':', msec=False):
106 |     time = timetuple_from_msec(secs * 1000)
107 |     if time.hours:
108 |         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
109 |     elif time.minutes:
110 |         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
111 |     else:
112 |         ret = '%d' % time.seconds
113 |     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
114 | 
115 | 
116 | KNOWN_VIDEO_AUDIO_EXTENSIONS = (
117 |     ['avi', 'flv', 'mkv', 'mov', 'mp4', 'webm', '3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v']
118 |     + ['wmv', 'aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav', 'aac', 'ape', 'asf', 'f4a', 'f4b']
119 |     + ['m4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba', 'jpg', 'png', 'webp']
120 |     + ['mhtml', 'srt', 'vtt', 'ass', 'lrc', 'f4f', 'f4m']
121 | )
122 | 
123 | 
124 | def xpath_text(node, xpath):
125 |     n = node.find(xpath)
126 |     if n is None:
127 |         return n
128 |     if n.text is None:
129 |         return None
130 |     return n.text
131 | 
132 | 
133 | def xpath_with_ns(path, ns_map):
134 |     components = [c.split(':') for c in path.split('/')]
135 |     replaced = []
136 |     for c in components:
137 |         if len(c) == 1:
138 |             replaced.append(c[0])
139 |         else:
140 |             ns, tag = c
141 |             replaced.append('{%s}%s' % (ns_map[ns], tag))
142 |     return '/'.join(replaced)
143 | 
144 | 
145 | _s = functools.partial(
146 |     xpath_with_ns,
147 |     ns_map={'svg': 'http://www.w3.org/2000/svg'},
148 | )
149 | _x = functools.partial(
150 |     xpath_with_ns,
151 |     ns_map={
152 |         # 'xmlns': 'http://www.w3.org/2000/svg',
153 |         'xml': 'http://www.w3.org/XML/1998/namespace',
154 |         'ttml': 'http://www.w3.org/ns/ttml',
155 |         'tts': 'http://www.w3.org/ns/ttml#styling',
156 |         'xlink': 'http://www.w3.org/1999/xlink',
157 |     },
158 | )
159 | 
160 | 
161 | RESET_SEQ = '\033[0m'
162 | COLOR_SEQ = '\033[1;%dm'
163 | 
164 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(30, 38)
165 | 
166 | 
167 | class Log:
168 |     """
169 |     Logs a given string to output with colors
170 |     :param logString: the string that should be logged
171 | 
172 |     The string functions returns the strings that would be logged.
173 |     """
174 | 
175 |     @staticmethod
176 |     def info_str(logString: str):
177 |         return COLOR_SEQ % WHITE + logString + RESET_SEQ
178 | 
179 |     @staticmethod
180 |     def success_str(logString: str):
181 |         return COLOR_SEQ % GREEN + logString + RESET_SEQ
182 | 
183 |     @staticmethod
184 |     def warning_str(logString: str):
185 |         return COLOR_SEQ % YELLOW + logString + RESET_SEQ
186 | 
187 |     @staticmethod
188 |     def yellow_str(logString: str):
189 |         return COLOR_SEQ % YELLOW + logString + RESET_SEQ
190 | 
191 |     @staticmethod
192 |     def error_str(logString: str):
193 |         return COLOR_SEQ % RED + logString + RESET_SEQ
194 | 
195 |     @staticmethod
196 |     def debug_str(logString: str):
197 |         return COLOR_SEQ % CYAN + logString + RESET_SEQ
198 | 
199 |     @staticmethod
200 |     def blue_str(logString: str):
201 |         return COLOR_SEQ % BLUE + logString + RESET_SEQ
202 | 
203 |     @staticmethod
204 |     def magenta_str(logString: str):
205 |         return COLOR_SEQ % MAGENTA + logString + RESET_SEQ
206 | 
207 |     @staticmethod
208 |     def info(logString: str):
209 |         print(Log.info_str(logString))
210 | 
211 |     @staticmethod
212 |     def success(logString: str):
213 |         print(Log.success_str(logString))
214 | 
215 |     @staticmethod
216 |     def warning(logString: str):
217 |         print(Log.warning_str(logString))
218 | 
219 |     @staticmethod
220 |     def yellow(logString: str):
221 |         print(Log.yellow_str(logString))
222 | 
223 |     @staticmethod
224 |     def error(logString: str):
225 |         print(Log.error_str(logString))
226 | 
227 |     @staticmethod
228 |     def debug(logString: str):
229 |         print(Log.debug_str(logString))
230 | 
231 |     @staticmethod
232 |     def blue(logString: str):
233 |         print(Log.blue_str(logString))
234 | 
235 |     @staticmethod
236 |     def magenta(logString: str):
237 |         print(Log.magenta_str(logString))
238 | 
239 | 
240 | def is_path_like(f):
241 |     return isinstance(f, (str, bytes, os.PathLike))
242 | 
243 | 
244 | def str_or_none(v, default=None):
245 |     return default if v is None else str(v)
246 | 
247 | 
248 | class SslHelper:
249 |     warned_about_certifi = False
250 | 
251 |     @classmethod
252 |     def load_default_certs(cls, ssl_context: ssl.SSLContext):
253 |         cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH)
254 | 
255 |         if not cert_loc or not os.path.exists(cert_loc):
256 |             if not cls.warned_about_certifi:
257 |                 Log.warning(f"Certifi could not find a suitable TLS CA certificate bundle, invalid path: {cert_loc}")
258 |                 cls.warned_about_certifi = True
259 |             ssl_context.load_default_certs()
260 |         else:
261 |             if not os.path.isdir(cert_loc):
262 |                 ssl_context.load_verify_locations(cafile=cert_loc)
263 |             else:
264 |                 ssl_context.load_verify_locations(capath=cert_loc)
265 | 
266 |     @classmethod
267 |     @lru_cache(maxsize=16)
268 |     def get_ssl_context(
269 |         cls,
270 |         skip_cert_verify: bool,
271 |         allow_insecure_ssl: bool,
272 |         use_all_ciphers: bool,
273 |         force_tls_version: str,
274 |     ):
275 |         if not skip_cert_verify:
276 |             ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
277 |             cls.load_default_certs(ssl_context)
278 |         else:
279 |             ssl_context = ssl._create_unverified_context()  # pylint: disable=protected-access
280 | 
281 |         # Activate ALPN extension
282 |         ssl_context.set_alpn_protocols(['http/1.1'])
283 | 
284 |         if allow_insecure_ssl:
285 |             # This allows connections to legacy insecure servers
286 |             # https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_options.html#SECURE-RENEGOTIATION
287 |             # Be warned the insecure renegotiation allows an attack, see:
288 |             # https://nvd.nist.gov/vuln/detail/CVE-2009-3555
289 |             ssl_context.options |= 0x4  # set ssl.OP_LEGACY_SERVER_CONNECT bit
290 |         if use_all_ciphers:
291 |             ssl_context.set_ciphers('ALL')
292 |         if force_tls_version:
293 |             if hasattr(ssl.TLSVersion, force_tls_version):
294 |                 version = getattr(ssl.TLSVersion, force_tls_version)
295 |                 ssl_context.minimum_version = version
296 |                 ssl_context.maximum_version = version
297 |             else:
298 |                 Log.warning(
299 |                     'TLS Version is not forced, please use any of the following strings: '
300 |                     + ', '.join(v for v in dir(ssl.TLSVersion) if v.startswith(('TLS', 'SSL')))
301 |                 )
302 | 
303 |         return ssl_context
304 | 
305 |     class CustomHttpAdapter(requests.adapters.HTTPAdapter):
306 |         '''
307 |         Transport adapter that allows us to use custom ssl_context.
308 |         See https://stackoverflow.com/a/71646353 for more details.
309 |         '''
310 | 
311 |         def __init__(self, ssl_context=None, **kwargs):
312 |             self.ssl_context = ssl_context
313 |             super().__init__(**kwargs)
314 | 
315 |         def init_poolmanager(self, connections, maxsize, block=False, **pool_kwargs):
316 |             self.poolmanager = urllib3.poolmanager.PoolManager(
317 |                 num_pools=connections, maxsize=maxsize, block=block, ssl_context=self.ssl_context, **pool_kwargs
318 |             )
319 | 
320 |     @classmethod
321 |     def custom_requests_session(
322 |         cls,
323 |         skip_cert_verify: bool,
324 |         allow_insecure_ssl: bool,
325 |         use_all_ciphers: bool,
326 |         force_tls_version: str,
327 |     ):
328 |         """
329 |         Return a new requests session with custom SSL context
330 |         """
331 |         session = requests.Session()
332 |         ssl_context = cls.get_ssl_context(skip_cert_verify, allow_insecure_ssl, use_all_ciphers, force_tls_version)
333 |         session.mount('https://', cls.CustomHttpAdapter(ssl_context))
334 |         session.verify = not skip_cert_verify
335 |         return session
336 | 
337 | 
338 | def convert_to_aiohttp_cookie_jar(mozilla_cookie_jar: http.cookiejar.MozillaCookieJar):
339 |     """
340 |     Convert an http.cookiejar.MozillaCookieJar that uses a Netscape HTTP Cookie File to an aiohttp.cookiejar.CookieJar
341 |     Tested with aiohttp v3.8.4
342 |     """
343 |     aiohttp_cookie_jar = CookieJar(unsafe=True)  # unsafe = Allow also cookies for IPs
344 | 
345 |     # pylint: disable=protected-access
346 |     for cookie_domain, domain_cookies in mozilla_cookie_jar._cookies.items():
347 |         for cookie_path, path_cookies in domain_cookies.items():
348 |             for cookie_name, cookie in path_cookies.items():
349 |                 # cookie_name is cookie.name; cookie_path is cookie.path; cookie_domain is cookie.domain
350 |                 morsel = http.cookies.Morsel()
351 |                 morsel.update(
352 |                     {
353 |                         "expires": cookie.expires,
354 |                         "path": cookie.path,
355 |                         "comment": cookie.comment,
356 |                         "domain": cookie.domain,
357 |                         # "max-age"  : "Max-Age",
358 |                         "secure": cookie.secure,
359 |                         # "httponly": "HttpOnly",
360 |                         "version": cookie.version,
361 |                         # "samesite": "SameSite",
362 |                     }
363 |                 )
364 |                 # pylint: disable=protected-access
365 |                 morsel.set(cookie.name, cookie.value, http.cookies._quote(cookie.value))
366 |                 aiohttp_cookie_jar._cookies[(cookie_domain, cookie_path)][cookie_name] = morsel
367 | 
368 |     return aiohttp_cookie_jar
369 | 
370 | 
371 | class BBBDLCookieJar(http.cookiejar.MozillaCookieJar):
372 |     """
373 |     Taken from yt-dlp: Last update 9. Sep. 2022
374 |     See [1] for cookie file format.
375 | 
376 |     1. https://curl.haxx.se/docs/http-cookies.html
377 |     """
378 | 
379 |     _HTTPONLY_PREFIX = '#HttpOnly_'
380 |     _ENTRY_LEN = 7
381 |     _HEADER = '''# Netscape HTTP Cookie File
382 | # This file is generated by bbb-dl.  Do not edit.
383 | 
384 | '''
385 |     _CookieFileEntry = collections.namedtuple(
386 |         'CookieFileEntry', ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')
387 |     )
388 | 
389 |     def __init__(self, filename=None, *args, **kwargs):
390 |         super().__init__(None, *args, **kwargs)
391 |         if is_path_like(filename):
392 |             filename = os.fspath(filename)
393 |         self.filename = filename
394 | 
395 |     @staticmethod
396 |     def _true_or_false(cndn):
397 |         return 'TRUE' if cndn else 'FALSE'
398 | 
399 |     @contextlib.contextmanager
400 |     def open(self, file, *, write=False):
401 |         if is_path_like(file):
402 |             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
403 |                 yield f
404 |         else:
405 |             if write:
406 |                 file.truncate(0)
407 |             yield file
408 | 
409 |     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
410 |         now = time.time()
411 |         for cookie in self:
412 |             if not ignore_discard and cookie.discard or not ignore_expires and cookie.is_expired(now):
413 |                 continue
414 |             name, value = cookie.name, cookie.value
415 |             if value is None:
416 |                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
417 |                 # with no name, whereas http.cookiejar regards it as a
418 |                 # cookie with no value.
419 |                 name, value = '', name
420 |             f.write(
421 |                 '%s\n'
422 |                 % '\t'.join(
423 |                     (
424 |                         cookie.domain,
425 |                         self._true_or_false(cookie.domain.startswith('.')),
426 |                         cookie.path,
427 |                         self._true_or_false(cookie.secure),
428 |                         str_or_none(cookie.expires, default=''),
429 |                         name,
430 |                         value,
431 |                     )
432 |                 )
433 |             )
434 | 
435 |     def save(self, filename=None, *args, **kwargs):
436 |         """
437 |         Save cookies to a file.
438 |         Code is taken from CPython 3.6
439 |         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117
440 |         """
441 | 
442 |         if filename is None:
443 |             if self.filename is not None:
444 |                 filename = self.filename
445 |             else:
446 |                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
447 | 
448 |         # Store session cookies with `expires` set to 0 instead of an empty string
449 |         for cookie in self:
450 |             if cookie.expires is None:
451 |                 cookie.expires = 0
452 | 
453 |         with self.open(filename, write=True) as f:
454 |             f.write(self._HEADER)
455 |             self._really_save(f, *args, **kwargs)
456 | 
457 |     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
458 |         """Load cookies from a file."""
459 |         if filename is None:
460 |             if self.filename is not None:
461 |                 filename = self.filename
462 |             else:
463 |                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
464 | 
465 |         def prepare_line(line):
466 |             if line.startswith(self._HTTPONLY_PREFIX):
467 |                 line = line[len(self._HTTPONLY_PREFIX) :]
468 |             # comments and empty lines are fine
469 |             if line.startswith('#') or not line.strip():
470 |                 return line
471 |             cookie_list = line.split('\t')
472 |             if len(cookie_list) != self._ENTRY_LEN:
473 |                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
474 |             cookie = self._CookieFileEntry(*cookie_list)
475 |             if cookie.expires_at and not cookie.expires_at.isdigit():
476 |                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
477 |             return line
478 | 
479 |         cf = io.StringIO()
480 |         with self.open(filename) as input_file:
481 |             for line in input_file:
482 |                 try:
483 |                     cf.write(prepare_line(line))
484 |                 except http.cookiejar.LoadError as cookie_err:
485 |                     if f'{line.strip()} '[0] in '[{"':
486 |                         raise http.cookiejar.LoadError(
487 |                             'Cookies file must be Netscape formatted, not JSON. See  '
488 |                             'https://github.com/C0D3D3V/Moodle-DL/wiki/Use-cookies-when-downloading'
489 |                         )
490 |                     Log.info(f'WARNING: Skipping cookie file entry due to {cookie_err}: {line!r}')
491 |                     continue
492 |         cf.seek(0)
493 |         self._really_load(cf, filename, ignore_discard, ignore_expires)
494 |         # Session cookies are denoted by either `expires` field set to
495 |         # an empty string or 0. MozillaCookieJar only recognizes the former
496 |         # (see [1]). So we need force the latter to be recognized as session
497 |         # cookies on our own.
498 |         # Session cookies may be important for cookies-based authentication,
499 |         # e.g. usually, when user does not check 'Remember me' check box while
500 |         # logging in on a site, some important cookies are stored as session
501 |         # cookies so that not recognizing them will result in failed login.
502 |         # 1. https://bugs.python.org/issue17164
503 |         for cookie in self:
504 |             # Treat `expires=0` cookies as session cookies
505 |             if cookie.expires == 0:
506 |                 cookie.expires = None
507 |                 cookie.discard = True
508 | 
509 | 
510 | class Timer:
511 |     '''
512 |     Timing Context Manager
513 |     Can be used for future speed comparisons, like this:
514 | 
515 |     with Timer() as t:
516 |         Do.stuff()
517 |     print(f'Do.stuff() took:\t {t.duration:.3f} \tseconds.')
518 |     '''
519 | 
520 |     def __init__(self, nanoseconds=False):
521 |         self.start = 0.0
522 |         self.duration = 0.0
523 |         self.nanoseconds = nanoseconds
524 | 
525 |     def __enter__(self):
526 |         if self.nanoseconds:
527 |             self.start = time.perf_counter_ns()
528 |         else:
529 |             self.start = time.time()
530 |         return self
531 | 
532 |     def __exit__(self, *args):
533 |         if self.nanoseconds:
534 |             end = time.perf_counter_ns()
535 |             self.duration = (end - self.start) * 10**-9  # 1 nano-sec = 10^-9 sec
536 |         else:
537 |             end = time.time()
538 |             self.duration = end - self.start
539 | 
540 | 
541 | NO_DEFAULT = object()
542 | 
543 | # needed for sanitizing filenames in restricted mode
544 | ACCENT_CHARS = dict(
545 |     zip(
546 |         'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
547 |         itertools.chain(
548 |             'AAAAAA',
549 |             ['AE'],
550 |             'CEEEEIIIIDNOOOOOOO',
551 |             ['OE'],
552 |             'UUUUUY',
553 |             ['TH', 'ss'],
554 |             'aaaaaa',
555 |             ['ae'],
556 |             'ceeeeiiiionooooooo',
557 |             ['oe'],
558 |             'uuuuuy',
559 |             ['th'],
560 |             'y',
561 |         ),
562 |     )
563 | )
564 | 
565 | 
566 | class PathTools:
567 |     """A set of methods to create correct paths."""
568 | 
569 |     restricted_filenames = False
570 | 
571 |     @staticmethod
572 |     def to_valid_name(name: str) -> str:
573 |         """Filtering invalid characters in filenames and paths.
574 | 
575 |         Args:
576 |             name (str): The string that will go through the filtering
577 | 
578 |         Returns:
579 |             str: The filtered string, that can be used as a filename.
580 |         """
581 | 
582 |         if name is None:
583 |             return None
584 | 
585 |         name = html.unescape(name)
586 |         name = unicodedata.normalize('NFKC', name)
587 | 
588 |         name = name.replace('\n', ' ')
589 |         name = name.replace('\r', ' ')
590 |         name = name.replace('\t', ' ')
591 |         name = name.replace('\xad', '')
592 |         while '  ' in name:
593 |             name = name.replace('  ', ' ')
594 |         name = PathTools.sanitize_filename(name, PathTools.restricted_filenames)
595 |         name = name.strip('. ')
596 |         name = name.strip()
597 | 
598 |         return name
599 | 
600 |     @staticmethod
601 |     def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
602 |         """Sanitizes a string so it could be used as part of a filename.
603 |         @param restricted   Use a stricter subset of allowed characters
604 |         @param is_id        Whether this is an ID that should be kept unchanged if possible.
605 |                             If unset, yt-dlp's new sanitization rules are in effect
606 |         """
607 |         if s == '':
608 |             return ''
609 | 
610 |         def replace_insane(char):
611 |             if restricted and char in ACCENT_CHARS:
612 |                 return ACCENT_CHARS[char]
613 |             elif not restricted and char == '\n':
614 |                 return '\0 '
615 |             elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
616 |                 # Replace with their full-width unicode counterparts
617 |                 return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xFEE0))
618 |             elif char == '?' or ord(char) < 32 or ord(char) == 127:
619 |                 return ''
620 |             elif char == '"':
621 |                 return '' if restricted else '\''
622 |             elif char == ':':
623 |                 return '\0_\0-' if restricted else '\0 \0-'
624 |             elif char in '\\/|*<>':
625 |                 return '\0_'
626 |             if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
627 |                 return '\0_'
628 |             return char
629 | 
630 |         if restricted and is_id is NO_DEFAULT:
631 |             s = unicodedata.normalize('NFKC', s)
632 |         s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
633 |         result = ''.join(map(replace_insane, s))
634 |         if is_id is NO_DEFAULT:
635 |             result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
636 |             STRIP_RE = r'(?:\0.|[ _-])*'
637 |             result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
638 |         result = result.replace('\0', '') or '_'
639 | 
640 |         if not is_id:
641 |             while '__' in result:
642 |                 result = result.replace('__', '_')
643 |             result = result.strip('_')
644 |             # Common case of "Foreign band name - English song title"
645 |             if restricted and result.startswith('-_'):
646 |                 result = result[2:]
647 |             if result.startswith('-'):
648 |                 result = '_' + result[len('-') :]
649 |             result = result.lstrip('.')
650 |             if not result:
651 |                 result = '_'
652 |         return result
653 | 
654 |     @staticmethod
655 |     def remove_start(s, start):
656 |         return s[len(start) :] if s is not None and s.startswith(start) else s
657 | 
658 |     @staticmethod
659 |     def sanitize_path(path: str):
660 |         """
661 |         @param path: A path to sanitize.
662 |         @return: A path where every part was sanitized using to_valid_name.
663 |         """
664 |         drive_or_unc, _ = os.path.splitdrive(path)
665 |         norm_path = os.path.normpath(PathTools.remove_start(path, drive_or_unc)).split(os.path.sep)
666 |         if drive_or_unc:
667 |             norm_path.pop(0)
668 | 
669 |         sanitized_path = [
670 |             path_part if path_part in ['.', '..'] else PathTools.to_valid_name(path_part) for path_part in norm_path
671 |         ]
672 | 
673 |         if drive_or_unc:
674 |             sanitized_path.insert(0, drive_or_unc + os.path.sep)
675 |         return os.path.join(*sanitized_path)
676 | 
677 |     @staticmethod
678 |     def get_abs_path(path: str):
679 |         return str(Path(path).resolve())
680 | 
681 |     @staticmethod
682 |     def get_in_dir(path: str, filename: str):
683 |         return str(Path(path) / filename)
684 | 
685 |     @staticmethod
686 |     def make_base_dir(path_to_file: str):
687 |         Path(path_to_file).parent.mkdir(parents=True, exist_ok=True)
688 | 
689 |     @staticmethod
690 |     def make_dirs(path_to_dir: str):
691 |         Path(path_to_dir).mkdir(parents=True, exist_ok=True)
692 | 
693 |     @staticmethod
694 |     def get_file_ext(filename: str) -> str:
695 |         file_splits = filename.rsplit('.', 1)
696 |         if len(file_splits) == 2:
697 |             return file_splits[-1].lower()
698 |         return None
699 | 
700 |     @staticmethod
701 |     def get_user_data_directory():
702 |         """Returns a platform-specific root directory for user application data."""
703 |         if os.name == "nt":
704 |             appdata = os.getenv("LOCALAPPDATA")
705 |             if appdata:
706 |                 return appdata
707 |             appdata = os.getenv("APPDATA")
708 |             if appdata:
709 |                 return appdata
710 |             return None
711 |         # On non-windows, use XDG_DATA_HOME if set, else default to ~/.config.
712 |         xdg_config_home = os.getenv("XDG_DATA_HOME")
713 |         if xdg_config_home:
714 |             return xdg_config_home
715 |         return os.path.join(os.path.expanduser("~"), ".local/share")
716 | 
717 |     @staticmethod
718 |     def get_project_data_directory():
719 |         """
720 |         Returns an Path object to the project config directory
721 |         """
722 |         data_dir = Path(PathTools.get_user_data_directory()) / "bbb-dl"
723 |         if not data_dir.is_dir():
724 |             data_dir.mkdir(parents=True, exist_ok=True)
725 |         return str(data_dir)
726 | 
727 |     @staticmethod
728 |     def make_path(path: str, *filenames: str):
729 |         result_path = Path(path)
730 |         for filename in filenames:
731 |             result_path = result_path / filename
732 |         return str(result_path)
733 | 


--------------------------------------------------------------------------------
/bbb_dl/main.py:
--------------------------------------------------------------------------------
   1 | # Python script that downloads a lessen video from a published bbb recording.
   2 | 
   3 | import argparse
   4 | import asyncio
   5 | import hashlib
   6 | import math
   7 | import os
   8 | import re
   9 | import shutil
  10 | import traceback
  11 | from dataclasses import dataclass
  12 | from datetime import datetime
  13 | from enum import Enum
  14 | from functools import partial
  15 | from http.server import ThreadingHTTPServer
  16 | from io import StringIO
  17 | from itertools import cycle
  18 | from pathlib import Path
  19 | from threading import Thread
  20 | from typing import Any, Dict, List, Tuple
  21 | from xml.etree import ElementTree as ET
  22 | from xml.etree.ElementTree import Element, ParseError
  23 | 
  24 | import aiofiles
  25 | import aiohttp
  26 | from aiohttp.client_exceptions import ClientError, ClientResponseError
  27 | from colorama import just_fix_windows_console
  28 | from playwright.async_api import async_playwright
  29 | from playwright.async_api._generated import Page
  30 | 
  31 | from bbb_dl.ffmpeg import FFMPEG
  32 | from bbb_dl.utils import KNOWN_VIDEO_AUDIO_EXTENSIONS, BBBDLCookieJar, Log
  33 | from bbb_dl.utils import PathTools as PT
  34 | from bbb_dl.utils import (
  35 |     QuietRequestHandler,
  36 |     SslHelper,
  37 |     Timer,
  38 |     _s,
  39 |     _x,
  40 |     append_get_idx,
  41 |     convert_to_aiohttp_cookie_jar,
  42 |     format_bytes,
  43 |     formatSeconds,
  44 |     get_free_port,
  45 |     xpath_text,
  46 | )
  47 | from bbb_dl.version import __version__
  48 | 
  49 | 
  50 | class ActionType(Enum):
  51 |     show_image = 1
  52 |     hide_image = 2
  53 |     show_drawing = 3
  54 |     hide_drawing = 4
  55 |     set_view_box = 5
  56 |     move_cursor = 6
  57 | 
  58 | 
  59 | @dataclass
  60 | class Action:
  61 |     action_type: ActionType
  62 |     element_id: str = None
  63 |     value: Any = None
  64 |     width: int = None
  65 |     height: int = None
  66 |     x: int = None
  67 |     y: int = None
  68 | 
  69 | 
  70 | @dataclass
  71 | class Metadata:
  72 |     date: int
  73 |     date_formatted: str
  74 |     duration: float
  75 |     title: str
  76 |     bbb_version: str = None
  77 | 
  78 | 
  79 | @dataclass
  80 | class Frame:
  81 |     timestamp: float
  82 |     actions: [Action]
  83 |     capture_filename: str = None
  84 |     capture_path: str = None
  85 | 
  86 | 
  87 | @dataclass
  88 | class Deskshare:
  89 |     start_timestamp: float
  90 |     stop_timestamp: float
  91 |     width: int
  92 |     height: int
  93 | 
  94 | 
  95 | class ContentRangeError(ConnectionError):
  96 |     pass
  97 | 
  98 | 
  99 | class BBBDL:
 100 |     VALID_URL_RE = re.compile(
 101 |         r'''(?x)
 102 |             (?P<website>https?://[^/]+)/playback/presentation/
 103 |             (?P<version>[\d\.]+)/
 104 |             (playback.html\?.*?meetingId=)?
 105 |             (?P<id>[0-9a-f\-]+)
 106 |         '''
 107 |     )
 108 |     NUMBER_RE = re.compile(r'\d+')
 109 | 
 110 |     headers = {
 111 |         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 112 |         'Accept-Language': 'en',
 113 |         'Accept-Encoding': 'deflate, gzip',
 114 |     }
 115 | 
 116 |     def __init__(
 117 |         self,
 118 |         dl_url: str,
 119 |         filename: str,
 120 |         output_dir: str,
 121 |         verbose: bool,
 122 |         skip_cert_verify: bool,
 123 |         allow_insecure_ssl: bool,
 124 |         use_all_ciphers: bool,
 125 |         force_tls_version: str,
 126 |         encoder: str,
 127 |         audiocodec: str,
 128 |         skip_webcam: bool,
 129 |         skip_webcam_freeze_detection: bool,
 130 |         skip_annotations: bool,
 131 |         skip_cursor: bool,
 132 |         skip_zoom: bool,
 133 |         keep_tmp_files: bool,
 134 |         ffmpeg_location: str,
 135 |         working_dir: str,
 136 |         backup: bool,
 137 |         max_parallel_chromes: int,
 138 |         force_width: int,
 139 |         force_height: int,
 140 |         preset: str,
 141 |         crf: str,
 142 |     ):
 143 |         # Rendering options
 144 |         self.skip_webcam_opt = skip_webcam
 145 |         self.skip_webcam_freeze_detection_opt = skip_webcam_freeze_detection
 146 |         self.skip_annotations_opt = skip_annotations
 147 |         self.skip_cursor_opt = skip_cursor
 148 |         self.skip_zoom_opt = skip_zoom
 149 |         # BBB-dl Options
 150 |         self.keep_tmp_files = keep_tmp_files
 151 |         self.backup = backup
 152 |         self.working_dir = self.get_working_dir(working_dir)
 153 |         self.verbose = verbose
 154 |         self.skip_cert_verify = skip_cert_verify
 155 |         self.allow_insecure_ssl = allow_insecure_ssl
 156 |         self.use_all_ciphers = use_all_ciphers
 157 |         self.force_tls_version = force_tls_version
 158 |         self.max_dl_retries = 10
 159 |         self.max_parallel_dl = 5
 160 |         self.max_parallel_chromes = int(max_parallel_chromes)
 161 | 
 162 |         # Job Options
 163 |         self.dl_url = dl_url
 164 |         self.filename = filename
 165 |         self.output_dir = self.get_output_dir(output_dir)
 166 |         self.slideshow_width = int(force_width) if force_width is not None else None
 167 |         self.slideshow_height = int(force_height) if force_height is not None else None
 168 | 
 169 |         self.ffmpeg = FFMPEG(verbose, ffmpeg_location, encoder, audiocodec, preset, crf)
 170 | 
 171 |         self.cookies_path = PT.make_path(self.working_dir, "cookies.txt")
 172 |         self.cookies_text = None
 173 |         if os.path.isfile(self.cookies_path):
 174 |             with open(self.cookies_path, 'r', encoding='utf-8') as cookie_file:
 175 |                 self.cookies_text = cookie_file.read()
 176 | 
 177 |         # Check DL-URL
 178 |         m_obj = re.match(self.VALID_URL_RE, self.dl_url)
 179 | 
 180 |         if m_obj is None:
 181 |             Log.error(
 182 |                 f'Error: Your URL {self.dl_url} does not match the bbb session pattern.'
 183 |                 + ' If you think this URL should work, please open an issue on https://github.com/C0D3D3V/bbb-dl/issues'
 184 |             )
 185 |             exit(-4)
 186 | 
 187 |         self.video_id = m_obj.group('id')
 188 |         self.video_website = m_obj.group('website')
 189 |         self.presentation_base_url = self.video_website + '/presentation/' + self.video_id
 190 |         self.tmp_dir = self.get_tmp_dir(self.video_id)
 191 |         self.frames_dir = self.get_frames_dir()
 192 | 
 193 |     def get_cookie_jar(self) -> aiohttp.CookieJar:
 194 |         if self.cookies_text is not None:
 195 |             cookie_jar = BBBDLCookieJar(StringIO(self.cookies_text))
 196 |             cookie_jar.load(ignore_discard=True, ignore_expires=True)
 197 |             return convert_to_aiohttp_cookie_jar(cookie_jar)
 198 |         return None
 199 | 
 200 |     def run(self):
 201 |         if not self.backup:
 202 |             Log.yellow(f'Output directory for the final video is: {self.output_dir}')
 203 |             Log.yellow(f'Directory for the temporary files is: {self.tmp_dir}')
 204 |         else:
 205 |             Log.yellow(f'Output directory for backup is: {self.tmp_dir}')
 206 | 
 207 |         Log.info("Downloading meta information")
 208 | 
 209 |         dl_jobs = ['metadata.xml', 'shapes.svg']
 210 |         _ = asyncio.run(self.batch_download_from_bbb(dl_jobs))
 211 | 
 212 |         Log.info("Downloading webcams / deskshare")
 213 |         dl_jobs = [
 214 |             'cursor.xml',
 215 |             'panzooms.xml',
 216 |             'captions.json',
 217 |             'deskshare.xml',
 218 |             'events.xml',
 219 |             'presentation_text.json',
 220 |             'slides_new.xml',
 221 |             'notes.html',
 222 |             'polls.json',
 223 |             'external_videos.json',
 224 |         ]
 225 |         cam_webm_idx = append_get_idx(dl_jobs, 'video/webcams.webm')
 226 |         cam_mp4_idx = append_get_idx(dl_jobs, 'video/webcams.mp4')
 227 |         dsk_webm_idx = append_get_idx(dl_jobs, 'deskshare/deskshare.webm')
 228 |         dsk_mp4_idx = append_get_idx(dl_jobs, 'deskshare/deskshare.mp4')
 229 | 
 230 |         dl_results = asyncio.run(self.batch_download_from_bbb(dl_jobs, False))
 231 | 
 232 |         if not dl_results[cam_webm_idx] and not dl_results[cam_mp4_idx]:
 233 |             Log.error('Error: webcams video is essential. Abort! Please try again later!')
 234 |             exit(4)
 235 |         webcams_rel_path = 'video/webcams.webm' if dl_results[cam_webm_idx] else 'video/webcams.mp4'
 236 |         webcams_path = PT.get_in_dir(self.tmp_dir, webcams_rel_path)
 237 | 
 238 |         deskshare_rel_path = (
 239 |             'deskshare/deskshare.webm'
 240 |             if dl_results[dsk_webm_idx]
 241 |             else 'deskshare/deskshare.mp4' if dl_results[dsk_mp4_idx] else None
 242 |         )
 243 |         deskshare_path = PT.get_in_dir(self.tmp_dir, deskshare_rel_path) if deskshare_rel_path is not None else None
 244 | 
 245 |         Log.info("Downloading slides")
 246 |         loaded_shapes = self.load_xml('shapes.svg')
 247 |         dl_jobs = self.get_all_image_urls(loaded_shapes)
 248 |         _ = asyncio.run(self.batch_download_from_bbb(dl_jobs))
 249 | 
 250 |         metadata = self.parse_metadata()
 251 |         deskshare_events = self.parse_deskshare_data(metadata.duration)
 252 |         if deskshare_path is None and len(deskshare_events) == 0:
 253 |             Log.yellow('No desk was shared in this session')
 254 |         elif deskshare_path is None and len(deskshare_events) > 0:
 255 |             Log.error(
 256 |                 'Error: deskshare video is essential, because a desk was shared in this session.'
 257 |                 + ' Abort! Please try again later!'
 258 |             )
 259 |             exit(5)
 260 | 
 261 |         if self.backup:
 262 |             Log.success("Backup Finished")
 263 |             Log.info("You can run bbb-dl again to generate the video based on the backed up files!")
 264 |             Log.yellow(f"Backup is located in: {self.tmp_dir}")
 265 |             return
 266 | 
 267 |         frames, only_zooms, partitions = self.parse_slides_data(loaded_shapes, metadata)
 268 | 
 269 |         if self.slideshow_width is None and self.slideshow_height is None:
 270 |             guessed_slideshow_width, guessed_slideshow_height = self.get_slideshow_size(
 271 |                 only_zooms, deskshare_path, loaded_shapes
 272 |             )
 273 |             if self.slideshow_width is None:
 274 |                 self.slideshow_width = guessed_slideshow_width
 275 |             if self.slideshow_height is None:
 276 |                 self.slideshow_height = guessed_slideshow_height
 277 | 
 278 |         self.create_frames(frames, only_zooms, partitions)
 279 | 
 280 |         slideshow_path = self.create_slideshow(frames)
 281 |         slideshow_path = self.add_deskshare_to_slideshow(slideshow_path, deskshare_path, deskshare_events, metadata)
 282 | 
 283 |         result_path = self.final_mux(slideshow_path, webcams_path, webcams_rel_path, metadata)
 284 | 
 285 |         if not self.keep_tmp_files:
 286 |             self.remove_tmp_dir()
 287 |         else:
 288 |             Log.warning(f'Temporary directory will not be deleted: {self.tmp_dir}')
 289 |         Log.success(f'All done! Final video: {result_path}')
 290 | 
 291 |     def run_audio_only(self):
 292 |         if not self.backup:
 293 |             Log.yellow(f'Output directory for the final audio is: {self.output_dir}')
 294 |             Log.yellow(f'Directory for the temporary files is: {self.tmp_dir}')
 295 |         else:
 296 |             Log.error('Please use the backup option only without the audio only mode')
 297 |             exit(-11)
 298 | 
 299 |         Log.info("Downloading meta information")
 300 | 
 301 |         dl_jobs = ['metadata.xml']
 302 |         _ = asyncio.run(self.batch_download_from_bbb(dl_jobs))
 303 | 
 304 |         Log.info("Downloading webcams file")
 305 |         dl_jobs = []
 306 |         cam_webm_idx = append_get_idx(dl_jobs, 'video/webcams.webm')
 307 |         cam_mp4_idx = append_get_idx(dl_jobs, 'video/webcams.mp4')
 308 | 
 309 |         dl_results = asyncio.run(self.batch_download_from_bbb(dl_jobs, False))
 310 | 
 311 |         if not dl_results[cam_webm_idx] and not dl_results[cam_mp4_idx]:
 312 |             Log.error('Error: webcams video is essential. Abort! Please try again later!')
 313 |             exit(4)
 314 |         webcams_rel_path = 'video/webcams.webm' if dl_results[cam_webm_idx] else 'video/webcams.mp4'
 315 |         webcams_path = PT.get_in_dir(self.tmp_dir, webcams_rel_path)
 316 | 
 317 |         metadata = self.parse_metadata()
 318 |         result_path = self.extract_audio(webcams_path, metadata)
 319 | 
 320 |         if not self.keep_tmp_files:
 321 |             self.remove_tmp_dir()
 322 |         else:
 323 |             Log.warning(f'Temporary directory will not be deleted: {self.tmp_dir}')
 324 |         Log.success(f'All done! Final audio: {result_path}')
 325 | 
 326 |     def parse_deskshare_data(self, recording_duration) -> List[Deskshare]:
 327 |         result_list = []
 328 |         loaded_deskshare = self.load_xml('deskshare.xml', False)
 329 |         if loaded_deskshare is None:
 330 |             return result_list
 331 |         deskshares = loaded_deskshare.findall("./event[@start_timestamp]")
 332 |         for deskshare in deskshares:
 333 |             deskshare_in = float(deskshare.get('start_timestamp'))
 334 |             deskshare_out = float(deskshare.get('stop_timestamp'))
 335 |             deskshare_width = int(deskshare.get('video_width'))
 336 |             deskshare_height = int(deskshare.get('video_height'))
 337 |             if deskshare_in < recording_duration:
 338 |                 result_list.append(
 339 |                     Deskshare(
 340 |                         start_timestamp=deskshare_in,
 341 |                         stop_timestamp=deskshare_out,
 342 |                         width=deskshare_width,
 343 |                         height=deskshare_height,
 344 |                     )
 345 |                 )
 346 |         result_list = sorted(result_list, key=lambda item: item.start_timestamp)
 347 |         return result_list
 348 | 
 349 |     def get_slideshow_size(self, only_zooms: Dict[float, Frame], deskshare_path: str, loaded_shapes: Element):
 350 |         widths = []
 351 |         heights = []
 352 |         if deskshare_path is not None:
 353 |             video_info = self.ffmpeg.get_video_infos(deskshare_path)
 354 |             widths.append(video_info.width)
 355 |             heights.append(video_info.height)
 356 | 
 357 |         if self.skip_zoom_opt:
 358 |             # Use slides sizes as frame resolution
 359 |             slides_widths, slides_heights = self.get_all_slide_sizes(loaded_shapes)
 360 |             widths.extend(slides_widths)
 361 |             heights.extend(slides_heights)
 362 |         else:
 363 |             # Use zoom view box size as frame resolution
 364 |             for _, frame in only_zooms.items():
 365 |                 action = frame.actions[0]
 366 |                 widths.append(int(action.width))
 367 |                 heights.append(int(action.height))
 368 | 
 369 |         if len(widths) == 0 or len(heights) == 0:
 370 |             return
 371 | 
 372 |         max_width = max(widths)
 373 |         max_height = max(heights)
 374 | 
 375 |         if max_width % 2:
 376 |             max_width += 1
 377 |         if max_height % 2:
 378 |             max_height += 1
 379 | 
 380 |         return max_width, max_height
 381 | 
 382 |     def create_frames(self, frames: Dict[float, Frame], only_zooms: Dict[float, Frame], partitions: List[Tuple]):
 383 |         Log.info('Start capturing frames...')
 384 |         Log.info(f'Output directory for frames is: {self.frames_dir}')
 385 |         Log.info('Initialization takes a few seconds...')
 386 |         # Setup a server for Chrome browser to access
 387 |         port, port_error = get_free_port()
 388 |         if port is None:
 389 |             Log.error(f'Error: Could not open a port for a local http server: {port_error}')
 390 |             Log.warning(
 391 |                 'Please check your Antivirus, to allow bbb-dl to open a local port.'
 392 |                 + ' This is needed so we can use chrome browser to generate the presentation frames.'
 393 |             )
 394 |             exit(3)
 395 | 
 396 |         simple_handler = partial(QuietRequestHandler, directory=self.tmp_dir)
 397 |         server = ThreadingHTTPServer(('127.0.0.1', port), simple_handler)
 398 |         thread = Thread(target=server.serve_forever, daemon=True)
 399 |         thread.start()
 400 | 
 401 |         with Timer() as t:
 402 |             _ = asyncio.run(self.multi_capture_frames(f'http://localhost:{port}', frames, only_zooms, partitions))
 403 | 
 404 |         print()
 405 |         Log.info(f'Frames capturing is finished and took: {formatSeconds(t.duration)}.')
 406 | 
 407 |         server.shutdown()
 408 |         thread.join(timeout=10)
 409 | 
 410 |     async def display_capture_status(self, status_dict: Dict):
 411 |         spinner = cycle('/|\\-')
 412 |         print()
 413 |         while (status_dict.get('done', 0)) < status_dict.get('total', 0):
 414 |             print(
 415 |                 "\r\033[KDone:"
 416 |                 + f" {status_dict.get('done', 0):05} / {status_dict.get('total', 0):05} Frames"
 417 |                 + f" | {status_dict.get('done_partitions', 0):03} / {status_dict.get('total_partitions', 0):03} Parts"
 418 |                 + f" {next(spinner)}",
 419 |                 end='',
 420 |             )
 421 |             await asyncio.sleep(1)
 422 | 
 423 |     async def _real_multi_capture_frames(
 424 |         self,
 425 |         server_url: str,
 426 |         frames: Dict[float, Frame],
 427 |         only_zooms: Dict[float, Frame],
 428 |         partitions: List[Tuple],
 429 |         status_dict: Dict,
 430 |     ):
 431 |         semaphore = asyncio.Semaphore(self.max_parallel_chromes)
 432 |         gather_jobs = asyncio.gather(
 433 |             *[
 434 |                 self.capture_frames(server_url, frames, only_zooms, partition, semaphore, status_dict)
 435 |                 for partition in partitions
 436 |             ]
 437 |         )
 438 |         try:
 439 |             await gather_jobs
 440 |         except Exception:
 441 |             traceback.print_exc()
 442 |             gather_jobs.cancel()
 443 |             Log.error(
 444 |                 'Unexpected Error! Press Ctr+C to exit.'
 445 |                 + ' Please try to set a low number of threads with `--max-parallel-chromes`.'
 446 |                 + ' You can contact bbb-dl support.'
 447 |             )
 448 |             exit(-1)
 449 | 
 450 |     async def multi_capture_frames(
 451 |         self,
 452 |         server_url: str,
 453 |         frames: Dict[float, Frame],
 454 |         only_zooms: Dict[float, Frame],
 455 |         partitions: List[Tuple],
 456 |     ):
 457 |         status_dict = {
 458 |             'done': 0,
 459 |             'total': len(frames),
 460 |             'done_partitions': 0,
 461 |             'total_partitions': len(partitions),
 462 |         }
 463 |         await asyncio.wait(
 464 |             [
 465 |                 asyncio.create_task(
 466 |                     self._real_multi_capture_frames(server_url, frames, only_zooms, partitions, status_dict)
 467 |                 ),
 468 |                 asyncio.create_task(self.display_capture_status(status_dict)),
 469 |             ],
 470 |         )
 471 | 
 472 |     async def capture_frames(
 473 |         self,
 474 |         server_url: str,
 475 |         frames: Dict[float, Frame],
 476 |         only_zooms: Dict[float, Frame],
 477 |         partition: Tuple,
 478 |         semaphore: asyncio.Semaphore,
 479 |         status_dict: Dict,
 480 |     ):
 481 |         async with semaphore, async_playwright() as p:
 482 |             first_timestamp = partition[0]
 483 |             last_timestamp = partition[1]
 484 | 
 485 |             # Check if partition is already done
 486 |             partition_already_done = True
 487 |             total_frames_in_partition = 0
 488 |             for timestamp, frame in frames.items():
 489 |                 if timestamp > last_timestamp:
 490 |                     break
 491 |                 if timestamp < first_timestamp:
 492 |                     continue
 493 |                 if not os.path.isfile(frame.capture_path):
 494 |                     partition_already_done = False
 495 |                     break
 496 |                 total_frames_in_partition += 1
 497 | 
 498 |             if partition_already_done:
 499 |                 status_dict['done'] += total_frames_in_partition
 500 |                 print()
 501 |                 status_dict['done_partitions'] += 1
 502 |                 Log.info(
 503 |                     f'{status_dict["done_partitions"]}/{status_dict["total_partitions"]}'
 504 |                     + f' Partition already finished: {formatSeconds(partition[0])} to {formatSeconds(partition[1])}'
 505 |                 )
 506 |                 return
 507 | 
 508 |             browser = await p.chromium.launch()
 509 |             page = await browser.new_page()
 510 | 
 511 |             await page.set_viewport_size({"width": int(self.slideshow_width), "height": int(self.slideshow_height)})
 512 |             await page.goto(server_url + '/shapes.svg')
 513 |             await page.wait_for_selector('#svgfile')
 514 |             # add cursor
 515 |             await page.evaluate(
 516 |                 """() => { 
 517 |                 let el = document.querySelector('#svgfile')
 518 |                 el.innerHTML = el.innerHTML + '<circle id="cursor" cx="9999" cy="9999" r="5" stroke="red" stroke-width="3" fill="red" style="visibility:hidden" />'
 519 |             }"""
 520 |             )
 521 |             current_view_box = None
 522 |             # Set initial view box for this partition
 523 |             for timestamp, frame in only_zooms.items():
 524 |                 if timestamp > first_timestamp:
 525 |                     continue
 526 |                 # We only set one initial ViewBox, the first we find before the partition
 527 |                 current_view_box = frame.actions[0]
 528 |                 if not self.skip_zoom_opt:
 529 |                     # Use this view box only if we want to zoom
 530 |                     await self.set_view_box(page, current_view_box)
 531 |                 break
 532 |             for timestamp, frame in frames.items():
 533 |                 if timestamp > last_timestamp:
 534 |                     break
 535 |                 if timestamp < first_timestamp:
 536 |                     continue
 537 |                 for action in frame.actions:
 538 |                     if action.action_type == ActionType.show_image:
 539 |                         await self.show_image(page, action)
 540 |                         await self.show_cursor(page)
 541 |                         if self.skip_zoom_opt:
 542 |                             # Use custom view box if we do not want to zoom
 543 |                             zoom_action = Action(
 544 |                                 action_type=ActionType.set_view_box,
 545 |                                 value=f"0 0 {action.width} {action.height}",
 546 |                                 x=0,
 547 |                                 y=0,
 548 |                                 width=action.width,
 549 |                                 height=action.height,
 550 |                             )
 551 |                             await self.set_view_box(page, zoom_action)
 552 |                     elif action.action_type == ActionType.hide_image:
 553 |                         await self.hide_image(page, action)
 554 |                         await self.hide_cursor(page)
 555 |                     elif action.action_type == ActionType.show_drawing:
 556 |                         await self.show_drawing(page, action)
 557 |                     elif action.action_type == ActionType.hide_drawing:
 558 |                         await self.hide_drawing(page, action)
 559 |                     elif action.action_type == ActionType.set_view_box:
 560 |                         current_view_box = action
 561 |                         if not self.skip_zoom_opt:
 562 |                             # Use this view box only if we want to zoom
 563 |                             await self.set_view_box(page, action)
 564 |                     elif action.action_type == ActionType.move_cursor:
 565 |                         if current_view_box is None:
 566 |                             Log.warning('No ViewBox, cursor position unclear!')
 567 |                             await self.move_cursor(page, -1, -1)
 568 |                         if current_view_box is not None:
 569 |                             if action.x == -1 and action.y == -1:
 570 |                                 await self.move_cursor(page, -1, -1)
 571 |                             else:
 572 |                                 await self.move_cursor(
 573 |                                     page,
 574 |                                     current_view_box.x + (action.x * current_view_box.width),
 575 |                                     current_view_box.y + (action.y * current_view_box.height),
 576 |                                 )
 577 | 
 578 |                 if not os.path.isfile(frame.capture_path):
 579 |                     await page.screenshot(path=frame.capture_path)
 580 |                 status_dict['done'] += 1
 581 | 
 582 |             await browser.close()
 583 |             print()
 584 |             status_dict['done_partitions'] += 1
 585 |             Log.info(
 586 |                 f'{status_dict["done_partitions"]}/{status_dict["total_partitions"]}'
 587 |                 + f' Partition finished: {formatSeconds(partition[0])} to {formatSeconds(partition[1])}'
 588 |             )
 589 | 
 590 |     async def show_image(self, page: Page, action: Action):
 591 |         await page.evaluate(
 592 |             """([id, canvas_num]) => {
 593 |                 document.querySelector('#' + id).style.visibility = 'visible'
 594 |                 const canvas = document.querySelector('#canvas' + canvas_num)
 595 |                 if (canvas) canvas.setAttribute('display', 'block')
 596 |             }""",
 597 |             [action.element_id, action.value],
 598 |         )
 599 | 
 600 |     async def hide_image(self, page: Page, action: Action):
 601 |         await page.evaluate(
 602 |             """([id, canvas_num]) => {
 603 |                 document.querySelector('#' + id).style.visibility = 'hidden'
 604 |                 const canvas = document.querySelector('#canvas' + canvas_num)
 605 |                 if (canvas) canvas.setAttribute('display', 'none')
 606 |             }""",
 607 |             [action.element_id, action.value],
 608 |         )
 609 | 
 610 |     async def show_drawing(self, page: Page, action: Action):
 611 |         await page.evaluate(
 612 |             """([id, shape_id]) => {
 613 |                 document.querySelectorAll('[shape=' + shape_id + ']').forEach( element => {
 614 |                     element.style.visibility = 'hidden'
 615 |                 })
 616 |                 document.querySelector('#' + id).style.visibility = 'visible'
 617 |             }""",
 618 |             [action.element_id, action.value],
 619 |         )
 620 | 
 621 |     async def hide_drawing(self, page: Page, action: Action):
 622 |         await page.evaluate(
 623 |             """(id) => {
 624 |                 document.querySelector('#' + id).style.display = 'none'
 625 |             }""",
 626 |             action.element_id,
 627 |         )  # Maybe use visibility?
 628 | 
 629 |     async def set_view_box(self, page: Page, action: Action):
 630 |         # First try to use whole slideshow width
 631 |         aspect_ratio = action.width / action.height
 632 |         width = self.slideshow_width
 633 |         height = int(math.trunc(width / aspect_ratio / 2) * 2)
 634 | 
 635 |         if height > self.slideshow_height:
 636 |             # Try to use whole slideshow height
 637 |             aspect_ratio = action.height / action.width
 638 |             height = self.slideshow_height
 639 |             width = int(math.trunc(height / aspect_ratio / 2) * 2)
 640 | 
 641 |         # Center the slide on the screen
 642 |         pos_x = int((self.slideshow_width - width) / 2)
 643 |         pos_y = int((self.slideshow_height - height) / 2)
 644 |         await page.evaluate(
 645 |             """([viewBox, width, height, pos_x, pos_y]) => {
 646 |                 let el = document.querySelector('#svgfile')
 647 |                 el.style.position = 'absolute'
 648 |                 el.style.width = width + 'px'
 649 |                 el.style.height = height + 'px'
 650 |                 el.style.left = pos_x + 'px'
 651 |                 el.style.top = pos_y + 'px'
 652 |                 el.setAttribute('viewBox', viewBox)
 653 |             }""",
 654 |             [action.value, width, height, pos_x, pos_y],
 655 |         )
 656 | 
 657 |     async def show_cursor(self, page: Page):
 658 |         await page.evaluate(
 659 |             """() => {
 660 |                 document.querySelector('#cursor').style.visibility = 'visible'
 661 |             }""",
 662 |         )
 663 | 
 664 |     async def hide_cursor(self, page: Page):
 665 |         await page.evaluate(
 666 |             """() => {
 667 |                 document.querySelector('#cursor').style.visibility = 'hidden'
 668 |             }""",
 669 |         )
 670 | 
 671 |     async def move_cursor(self, page: Page, x: float, y: float):
 672 |         await page.evaluate(
 673 |             """([x,y]) => {
 674 |                 document.querySelector('#cursor').setAttribute('cx', x)
 675 |                 document.querySelector('#cursor').setAttribute('cy', y)
 676 |             }""",
 677 |             [x, y],
 678 |         )
 679 | 
 680 |     def get_all_image_urls(self, loaded_shapes: Element) -> (List[str], List[Tuple[int]]):
 681 |         image_urls = []
 682 |         shapes_images = loaded_shapes.findall(_s(".//svg:image"))
 683 |         for image in shapes_images:
 684 |             image_rel_path = image.get(_x('xlink:href'))
 685 |             if image_rel_path not in image_urls:
 686 |                 image_urls.append(image_rel_path)
 687 |         return image_urls
 688 | 
 689 |     def get_all_slide_sizes(self, loaded_shapes: Element) -> (List[int], List[int]):
 690 |         widths = []
 691 |         heights = []
 692 |         slides = loaded_shapes.findall(_s("./svg:image[@class='slide']"))
 693 |         for image in slides:
 694 |             image_width = int(float(image.get('width')))
 695 |             image_height = int(float(image.get('height')))
 696 |             widths.append(image_width)
 697 |             heights.append(image_height)
 698 |         return widths, heights
 699 | 
 700 |     def parse_metadata(self) -> Metadata:
 701 |         loaded_metadata = self.load_xml('metadata.xml')
 702 | 
 703 |         date = xpath_text(loaded_metadata, 'start_time')  # date on that the recording took place
 704 |         date_formatted = datetime.fromtimestamp(int(date) / 1000).strftime('%Y-%m-%dT%H-%M-%S')
 705 |         duration = float(xpath_text(loaded_metadata, './playback/duration')) / 1000.0  # in seconds
 706 |         title = xpath_text(loaded_metadata, './meta/meetingName')
 707 | 
 708 |         Log.info(f"Recording title: {title}")
 709 |         Log.info(f"Recording date: {date_formatted}")
 710 |         Log.info(f"Recording duration: {formatSeconds(duration)}")
 711 | 
 712 |         bbb_version = None
 713 |         if self.verbose:
 714 |             try:
 715 |                 bbb_origin_version = xpath_text(loaded_metadata, './meta/bbb-origin-version')
 716 |                 if bbb_origin_version is not None:
 717 |                     bbb_version = bbb_origin_version.split(' ')[0]
 718 |                     Log.info(f"BBB version: {bbb_version}")
 719 |             except IndexError:
 720 |                 pass
 721 | 
 722 |         return Metadata(date, date_formatted, duration, title, bbb_version)
 723 | 
 724 |     def parse_slides_data(self, loaded_shapes: Element, metadata: Metadata) -> Dict[float, Frame]:
 725 |         frames = {}
 726 | 
 727 |         partitions = self.parse_slide_partitions(loaded_shapes, metadata.duration)
 728 |         self.parse_images(loaded_shapes, frames, metadata.duration)
 729 |         if not self.skip_annotations_opt:
 730 |             self.parse_drawings(loaded_shapes, frames, metadata.duration)
 731 | 
 732 |         only_zooms = {}
 733 |         loaded_zooms = self.load_xml('panzooms.xml', False)
 734 |         if loaded_zooms is not None:
 735 |             self.parse_zooms(loaded_zooms, frames, only_zooms, metadata.duration)
 736 | 
 737 |         if not self.skip_cursor_opt:
 738 |             loaded_cursors = self.load_xml('cursor.xml', False)
 739 |             if loaded_cursors is not None:
 740 |                 self.parse_cursors(loaded_cursors, frames, metadata.duration)
 741 | 
 742 |         frames = dict(sorted(frames.items(), key=lambda item: item[0]))
 743 |         only_zooms = dict(sorted(only_zooms.items(), key=lambda item: item[0], reverse=True))
 744 | 
 745 |         return frames, only_zooms, partitions
 746 | 
 747 |     def get_frame_by_timestamp(self, frames: Dict[float, Frame], timestamp: float):
 748 |         if timestamp not in frames:
 749 |             capture_filename = f'{timestamp}.png'
 750 |             capture_path = PT.get_in_dir(self.frames_dir, capture_filename)
 751 |             frames[timestamp] = Frame(timestamp, [], capture_filename, capture_path)
 752 |         return frames[timestamp]
 753 | 
 754 |     def parse_slide_partitions(self, loaded_shapes: Element, recording_duration: float) -> List[Tuple]:
 755 |         partitions = []
 756 |         slides = loaded_shapes.findall(_s("./svg:image[@class='slide']"))
 757 |         for image in slides:
 758 |             image_in = float(image.get('in'))
 759 |             image_out = float(image.get('out'))
 760 |             partitions.append((image_in, image_out))
 761 |         return partitions
 762 | 
 763 |     def parse_images(self, loaded_shapes: Element, frames: Dict[float, Frame], recording_duration: float):
 764 |         slides = loaded_shapes.findall(_s("./svg:image[@class='slide']"))
 765 |         for image in slides:
 766 |             image_id = image.get('id')
 767 |             image_id_value = self.NUMBER_RE.search(image_id).group()
 768 |             image_in = float(image.get('in'))
 769 |             image_out = float(image.get('out'))
 770 |             image_width = int(float(image.get('width')))
 771 |             image_height = int(float(image.get('height')))
 772 |             if image_in < recording_duration:
 773 |                 self.get_frame_by_timestamp(frames, image_in).actions.append(
 774 |                     Action(
 775 |                         action_type=ActionType.show_image,
 776 |                         element_id=image_id,
 777 |                         value=image_id_value,
 778 |                         width=image_width,
 779 |                         height=image_height,
 780 |                     )
 781 |                 )
 782 |                 self.get_frame_by_timestamp(frames, min(recording_duration, image_out)).actions.append(
 783 |                     Action(
 784 |                         action_type=ActionType.hide_image,
 785 |                         element_id=image_id,
 786 |                         value=image_id_value,
 787 |                     )
 788 |                 )
 789 | 
 790 |     def parse_drawings(self, loaded_shapes: Element, frames: Dict[float, Frame], recording_duration: float):
 791 |         drawings = loaded_shapes.findall(_s(".//svg:g[@timestamp]"))
 792 |         for drawing in drawings:
 793 |             drawing_id = drawing.get('id')
 794 |             drawing_shape_value = drawing.get('shape')
 795 |             drawing_in = float(drawing.get('timestamp'))
 796 |             drawing_out = float(drawing.get('undo'))
 797 |             if drawing_in < recording_duration:
 798 |                 self.get_frame_by_timestamp(frames, drawing_in).actions.append(
 799 |                     Action(
 800 |                         action_type=ActionType.show_drawing,
 801 |                         element_id=drawing_id,
 802 |                         value=drawing_shape_value,
 803 |                     )
 804 |                 )
 805 |                 if drawing_out != -1:
 806 |                     self.get_frame_by_timestamp(frames, min(recording_duration, drawing_out)).actions.append(
 807 |                         Action(
 808 |                             action_type=ActionType.hide_drawing,
 809 |                             element_id=drawing_id,
 810 |                         )
 811 |                     )
 812 | 
 813 |     def parse_zooms(
 814 |         self,
 815 |         loaded_zooms: Element,
 816 |         frames: Dict[float, Frame],
 817 |         only_zooms: Dict[float, Frame],
 818 |         recording_duration: float,
 819 |     ):
 820 |         zooms = loaded_zooms.findall("./event[@timestamp]")
 821 |         for zoom in zooms:
 822 |             zoom_in = float(zoom.get('timestamp'))
 823 |             zoom_value = zoom.find('viewBox').text
 824 |             zoom_value_split = zoom_value.split(' ')  # min-x min-y width height
 825 |             zoom_x = float(zoom_value_split[0])
 826 |             zoom_y = float(zoom_value_split[1])
 827 |             zoom_width = float(zoom_value_split[2])
 828 |             zoom_height = float(zoom_value_split[3])
 829 |             if zoom_in < recording_duration and zoom_width > 0 and zoom_height > 0:
 830 |                 zoom_action = Action(
 831 |                     action_type=ActionType.set_view_box,
 832 |                     value=zoom_value,
 833 |                     x=zoom_x,
 834 |                     y=zoom_y,
 835 |                     width=zoom_width,
 836 |                     height=zoom_height,
 837 |                 )
 838 |                 self.get_frame_by_timestamp(frames, zoom_in).actions.append(zoom_action)
 839 |                 self.get_frame_by_timestamp(only_zooms, zoom_in).actions.append(zoom_action)
 840 | 
 841 |     def parse_cursors(self, loaded_cursors: Element, frames: Dict[float, Frame], recording_duration: float):
 842 |         cursors = loaded_cursors.findall("./event[@timestamp]")
 843 |         for cursor in cursors:
 844 |             cursor_in = float(cursor.get('timestamp'))
 845 |             cursor_value_text = cursor.find('cursor').text.split(' ')
 846 |             cursor_x = float(cursor_value_text[0])
 847 |             cursor_y = float(cursor_value_text[1])
 848 |             cursor_value = (float(cursor_value_text[0]), float(cursor_value_text[1]))
 849 |             if cursor_in < recording_duration:
 850 |                 self.get_frame_by_timestamp(frames, cursor_in).actions.append(
 851 |                     Action(
 852 |                         action_type=ActionType.move_cursor,
 853 |                         x=cursor_x,
 854 |                         y=cursor_y,
 855 |                         value=cursor_value,
 856 |                     )
 857 |                 )
 858 | 
 859 |     def get_output_file_path(self, metadata: Metadata):
 860 |         if self.filename is not None:
 861 |             file_ext = PT.get_file_ext(self.filename)
 862 |             if file_ext is None or file_ext not in KNOWN_VIDEO_AUDIO_EXTENSIONS:
 863 |                 self.filename += '.mp4'
 864 |             return str(Path(self.output_dir) / PT.to_valid_name(self.filename))
 865 |         else:
 866 |             return str(
 867 |                 Path(self.output_dir) / PT.to_valid_name(metadata.date_formatted + '_' + metadata.title + '.mp4')
 868 |             )
 869 | 
 870 |     def get_output_audio_file_path(self, metadata: Metadata):
 871 |         if self.filename is not None:
 872 |             file_ext = PT.get_file_ext(self.filename)
 873 |             if file_ext is None or file_ext not in KNOWN_VIDEO_AUDIO_EXTENSIONS:
 874 |                 self.filename += '.mp3'
 875 |             return str(Path(self.output_dir) / PT.to_valid_name(self.filename))
 876 |         else:
 877 |             return str(
 878 |                 Path(self.output_dir) / PT.to_valid_name(metadata.date_formatted + '_' + metadata.title + '.mp3')
 879 |             )
 880 | 
 881 |     @classmethod
 882 |     def get_output_dir(
 883 |         cls,
 884 |         output_dir: str,
 885 |     ):
 886 |         return cls.check_directory(output_dir, os.getcwd(), 'output', '--output-dir')
 887 | 
 888 |     @classmethod
 889 |     def get_working_dir(
 890 |         cls,
 891 |         working_dir: str,
 892 |     ):
 893 |         return cls.check_directory(working_dir, PT.get_project_data_directory(), 'temporary', '--working-dir')
 894 | 
 895 |     @staticmethod
 896 |     def check_directory(path: str, default_path: str, file_type: str, option_name: str):
 897 |         if path is None:
 898 |             path = default_path
 899 | 
 900 |         path = PT.get_abs_path(path)
 901 |         try:
 902 |             PT.make_dirs(path)
 903 |         except (OSError, IOError) as err:
 904 |             Log.error(f'Error: Unable to create directory "{path}" for {file_type} files: {str(err)}')
 905 |             Log.warning(
 906 |                 f'You can choose an alternative directory for the {file_type} files with the {option_name} option.'
 907 |             )
 908 |             exit(-2)
 909 | 
 910 |         if not os.access(path, os.R_OK) or not os.access(path, os.W_OK):
 911 |             Log.error(f'Error: Unable to read or write in the directory for {file_type} files {path}')
 912 |             Log.warning(
 913 |                 f'You can choose an alternative directory for the {file_type} files with the {option_name} option.'
 914 |             )
 915 |             exit(-3)
 916 |         return path
 917 | 
 918 |     def get_frames_dir(self):
 919 |         frames_dir = PT.get_in_dir(self.tmp_dir, 'frames')
 920 |         try:
 921 |             PT.make_dirs(frames_dir)
 922 |         except (OSError, IOError) as err:
 923 |             Log.error(f'Error: Unable to create directory "{frames_dir}" for generated frames: {str(err)}')
 924 |             exit(-7)
 925 | 
 926 |         return frames_dir
 927 | 
 928 |     def get_tmp_dir(self, video_id):
 929 |         # We use a shorted version of the video id as name for the temporary directory
 930 |         short_video_id = hashlib.md5(video_id.encode(encoding='utf-8')).hexdigest()
 931 | 
 932 |         tmp_dir = PT.get_in_dir(self.working_dir, short_video_id)
 933 |         try:
 934 |             PT.make_dirs(tmp_dir)
 935 |         except (OSError, IOError) as err:
 936 |             Log.error(f'Error: Unable to create directory "{tmp_dir}" for temporary files: {str(err)}')
 937 |             exit(-5)
 938 | 
 939 |         return tmp_dir
 940 | 
 941 |     def remove_tmp_dir(self):
 942 |         Log.info("Cleanup")
 943 |         try:
 944 |             if os.path.exists(self.tmp_dir):
 945 |                 shutil.rmtree(self.tmp_dir)
 946 |         except (OSError, IOError) as err:
 947 |             Log.error(f'Error: Unable to remove directory "{self.tmp_dir}" for temporary files: {str(err)}')
 948 |             exit(-6)
 949 | 
 950 |     def get_bbb_link(self, rel_file_path: str):
 951 |         assert not rel_file_path.startswith('/') and not rel_file_path.startswith('\\')
 952 |         return self.presentation_base_url + '/' + rel_file_path
 953 | 
 954 |     async def get_can_continue_on_fail(self, url, session):
 955 |         try:
 956 |             headers = self.headers.copy()
 957 |             headers['Range'] = 'bytes=0-4'
 958 |             ssl_context = SslHelper.get_ssl_context(
 959 |                 skip_cert_verify=self.skip_cert_verify,
 960 |                 allow_insecure_ssl=self.allow_insecure_ssl,
 961 |                 use_all_ciphers=self.use_all_ciphers,
 962 |                 force_tls_version=self.force_tls_version,
 963 |             )
 964 |             resp = await session.get(url, headers=headers, ssl=ssl_context)
 965 |             return resp.headers.get('Content-Range') is not None and resp.status == 206
 966 |         except Exception as err:
 967 |             if self.verbose:
 968 |                 Log.debug(f"Failed to check if download can be continued on fail: {err}")
 969 |         return False
 970 | 
 971 |     async def batch_download_from_bbb(self, dl_jobs: List[str], is_essential: bool = True) -> List[bool]:
 972 |         """
 973 |         @param dl_jobs: List of rel_file_path
 974 |         @param is_essential: Applied to all jobs
 975 |         """
 976 | 
 977 |         semaphore = asyncio.Semaphore(self.max_parallel_dl)
 978 |         dl_results = await asyncio.gather(
 979 |             *[self.download_from_bbb(dl_job, semaphore) for dl_job in dl_jobs]
 980 |         )
 981 |         if is_essential:
 982 |             for idx, downloaded in enumerate(dl_results):
 983 |                 if not downloaded:
 984 |                     Log.error(f'Error: {dl_jobs[idx]} is essential. Abort! Please try again later!')
 985 |                     exit(1)
 986 |         return dl_results
 987 | 
 988 |     async def download_from_bbb(
 989 |         self,
 990 |         rel_file_path: str,
 991 |         semaphore: asyncio.Semaphore,
 992 |         conn_timeout: int = 10,
 993 |         read_timeout: int = 1800,
 994 |     ) -> bool:
 995 |         """Returns True if the file was successfully downloaded or exists"""
 996 |         local_path = PT.get_in_dir(self.tmp_dir, rel_file_path)
 997 |         if os.path.exists(local_path):
 998 |             # Warning: We do not check if the file is complete
 999 |             Log.info(f'{rel_file_path} is already present')
1000 |             return True
1001 |         else:
1002 |             PT.make_base_dir(local_path)
1003 |             dl_url = self.get_bbb_link(rel_file_path)
1004 |             if self.verbose:
1005 |                 Log.info(f'Downloading {rel_file_path} from: {dl_url}')
1006 |             else:
1007 |                 Log.info(f'Downloading {rel_file_path}...')
1008 | 
1009 |             received = 0
1010 |             total = 0
1011 |             tries_num = 0
1012 |             file_obj = None
1013 |             can_continue_on_fail = False
1014 |             headers = self.headers.copy()
1015 |             finished_successfully = False
1016 |             async with semaphore, aiohttp.ClientSession(
1017 |                 cookie_jar=self.get_cookie_jar(), conn_timeout=conn_timeout, read_timeout=read_timeout
1018 |             ) as session:
1019 |                 while tries_num < self.max_dl_retries:
1020 |                     try:
1021 |                         if tries_num > 0 and can_continue_on_fail:
1022 |                             headers["Range"] = f"bytes={received}-"
1023 |                         elif not can_continue_on_fail and 'Range' in headers:
1024 |                             del headers['Range']
1025 |                         ssl_context = SslHelper.get_ssl_context(
1026 |                             skip_cert_verify=self.skip_cert_verify,
1027 |                             allow_insecure_ssl=self.allow_insecure_ssl,
1028 |                             use_all_ciphers=self.use_all_ciphers,
1029 |                             force_tls_version=self.force_tls_version,
1030 |                         )
1031 |                         async with session.get(dl_url, headers=headers, raise_for_status=True, ssl=ssl_context) as resp:
1032 |                             # Download the file.
1033 |                             total = int(resp.headers.get("Content-Length", 0))
1034 |                             content_range = resp.headers.get("Content-Range", "")  # Example: bytes 200-1000/67589
1035 | 
1036 |                             if resp.status not in [200, 206]:
1037 |                                 if self.verbose:
1038 |                                     Log.debug(f"Warning {rel_file_path} got status {resp.status}")
1039 | 
1040 |                             if tries_num > 0 and can_continue_on_fail and not content_range and resp.status != 206:
1041 |                                 raise ContentRangeError(
1042 |                                     f"Server did not response for {rel_file_path} with requested range data"
1043 |                                 )
1044 |                             file_obj = file_obj or await aiofiles.open(local_path, "wb")
1045 |                             chunk = await resp.content.read(1024 * 10)
1046 |                             chunk_idx = 0
1047 |                             while chunk:
1048 |                                 received += len(chunk)
1049 |                                 if chunk_idx % 100 == 0:
1050 |                                     Log.info(f"{rel_file_path} got {format_bytes(received)} / {format_bytes(total)}")
1051 |                                 await file_obj.write(chunk)
1052 |                                 chunk = await resp.content.read(1024 * 10)
1053 |                                 chunk_idx += 1
1054 | 
1055 |                         if self.verbose:
1056 |                             Log.success(f'Downloaded {rel_file_path} to: {local_path}')
1057 |                         else:
1058 |                             Log.success(f'Successfully downloaded {rel_file_path}')
1059 | 
1060 |                         finished_successfully = True
1061 |                         break
1062 | 
1063 |                     except (ClientError, OSError, ValueError, ContentRangeError) as err:
1064 |                         if tries_num == 0:
1065 |                             can_continue_on_fail = await self.get_can_continue_on_fail(dl_url, session)
1066 |                         if (not can_continue_on_fail and received > 0) or isinstance(err, ContentRangeError):
1067 |                             can_continue_on_fail = False
1068 |                             # Clean up failed file because we can not recover
1069 |                             if file_obj is not None:
1070 |                                 await file_obj.close()
1071 |                                 file_obj = None
1072 |                             if os.path.exists(local_path):
1073 |                                 os.unlink(local_path)
1074 |                             received = 0
1075 | 
1076 |                         if isinstance(err, ClientResponseError):
1077 |                             if err.status in [408, 409, 429]:  # pylint: disable=no-member
1078 |                                 # 408 (timeout) or 409 (conflict) and 429 (too many requests)
1079 |                                 # Retry after 1 sec
1080 |                                 await asyncio.sleep(1)
1081 |                             else:
1082 |                                 Log.info(f'{rel_file_path} could not be downloaded: {err.status} {err.message}')
1083 |                                 if self.verbose:
1084 |                                     Log.info(f'Error: {str(err)}')
1085 |                                 break
1086 | 
1087 |                         if self.verbose:
1088 |                             Log.warning(
1089 |                                 f'(Try {tries_num} of {self.max_dl_retries})'
1090 |                                 + f' Unable to download "{rel_file_path}": {str(err)}'
1091 |                             )
1092 |                         tries_num += 1
1093 | 
1094 |             if file_obj is not None:
1095 |                 await file_obj.close()
1096 |             if not finished_successfully:
1097 |                 if os.path.exists(local_path):
1098 |                     os.unlink(local_path)
1099 |                 return False
1100 |             return True
1101 | 
1102 |     def load_xml(self, rel_file_path: str, is_essential: bool = True):
1103 |         local_path = PT.get_in_dir(self.tmp_dir, rel_file_path)
1104 |         if os.path.exists(local_path):
1105 |             try:
1106 |                 tree_root = ET.parse(local_path).getroot()
1107 |                 return tree_root
1108 |             except ParseError as err:
1109 |                 Log.error(f'Unable to parse XML file "{local_path}": {str(err)}')
1110 |                 if is_essential:
1111 |                     Log.error('Error: This XML file is essential. Abort! Please try again later!')
1112 |                     exit(2)
1113 |                 else:
1114 |                     return None
1115 |         else:
1116 |             if is_essential:
1117 |                 Log.error(f'Error: Can not find {local_path}. This XML file is essential. Please try again later!')
1118 |                 exit(2)
1119 |             else:
1120 |                 return None
1121 | 
1122 |     def final_mux(
1123 |         self,
1124 |         slideshow_path: str,
1125 |         webcams_path: str,
1126 |         webcams_rel_path: str,
1127 |         metadata: Metadata,
1128 |     ):
1129 |         webcam_is_empty = False
1130 |         if not self.skip_webcam_opt and not self.skip_webcam_freeze_detection_opt:
1131 |             Log.info(f'Try to detect freeze in {webcams_rel_path}...')
1132 |             with Timer() as t:
1133 |                 webcam_is_empty = asyncio.run(self.ffmpeg.freeze_detect(webcams_path))
1134 | 
1135 |             Log.info(f'Detection of freeze finished and took: {formatSeconds(t.duration)}')
1136 |             if webcam_is_empty:
1137 |                 Log.yellow('Webcam is empty, webcam will not be added to the final presentation')
1138 | 
1139 |         Log.info("Mux final slideshow")
1140 |         result_path = self.get_output_file_path(metadata)
1141 |         if os.path.isfile(result_path):
1142 |             Log.warning("Final Slideshow already exists. Abort!")
1143 |             exit(0)
1144 | 
1145 |         with Timer() as t:
1146 |             if self.skip_webcam_opt or webcam_is_empty:
1147 |                 asyncio.run(
1148 |                     self.ffmpeg.add_audio_to_slideshow(
1149 |                         slideshow_path,
1150 |                         webcams_path,
1151 |                         result_path,
1152 |                     )
1153 |                 )
1154 |             else:
1155 |                 asyncio.run(
1156 |                     self.ffmpeg.add_webcam_to_slideshow(
1157 |                         slideshow_path,
1158 |                         webcams_path,
1159 |                         self.slideshow_width,
1160 |                         self.slideshow_height,
1161 |                         result_path,
1162 |                     )
1163 |                 )
1164 | 
1165 |         Log.info(f'Mux final slideshow finished and took: {formatSeconds(t.duration)}')
1166 |         return result_path
1167 | 
1168 |     def add_deskshare_to_slideshow(
1169 |         self,
1170 |         slideshow_path: str,
1171 |         deskshare_path: str,
1172 |         deskshare_events: List[Deskshare],
1173 |         metadata: Metadata,
1174 |     ):
1175 |         if deskshare_path is None or len(deskshare_events) == 0:
1176 |             return slideshow_path
1177 | 
1178 |         presentation_path = PT.get_in_dir(self.tmp_dir, 'presentation.mp4')
1179 |         if os.path.isfile(presentation_path):
1180 |             Log.warning('Slideshow with deskshare does already exist! Skipping rendering!')
1181 |             return presentation_path
1182 | 
1183 |         Log.info('Resizing screen share...')
1184 |         resized_deskshare_path = PT.get_in_dir(self.tmp_dir, 'deskshare.mp4')
1185 |         if os.path.isfile(resized_deskshare_path):
1186 |             Log.warning('Resized screen share does already exist! Skipping rendering!')
1187 |         else:
1188 |             with Timer() as t:
1189 |                 asyncio.run(
1190 |                     self.ffmpeg.resize_deskshare(
1191 |                         deskshare_path,
1192 |                         resized_deskshare_path,
1193 |                         self.slideshow_width,
1194 |                         self.slideshow_height,
1195 |                     )
1196 |                 )
1197 |             Log.info(f'Resizing screen share finished and took: {formatSeconds(t.duration)}')
1198 | 
1199 |         Log.info('Start adding screen share to slideshow...')
1200 |         deskshare_txt_path = PT.get_in_dir(self.tmp_dir, 'deskshare.txt')
1201 |         with open(deskshare_txt_path, 'w', encoding="utf-8") as concat_file:
1202 |             for idx, event in enumerate(deskshare_events):
1203 |                 if idx == 0 and event.start_timestamp > 0:
1204 |                     # Adding beginning
1205 |                     duration = math.floor(10 * (event.start_timestamp) + 0.5) / 10
1206 |                     concat_file.write("file 'slideshow.mp4'\n")
1207 |                     concat_file.write("inpoint 0.0\n")
1208 |                     concat_file.write(f"outpoint {formatSeconds(event.start_timestamp, msec=True)}\n")
1209 |                     concat_file.write(f"duration {formatSeconds(duration, msec=True)}\n")
1210 |                 elif idx > 0:
1211 |                     # Adding part between deskshare
1212 |                     duration = (
1213 |                         math.floor(10 * (event.start_timestamp - deskshare_events[idx - 1].stop_timestamp) + 0.5) / 10
1214 |                     )
1215 |                     concat_file.write("file 'slideshow.mp4'\n")
1216 |                     concat_file.write(f"inpoint {formatSeconds(deskshare_events[idx - 1].stop_timestamp, msec=True)}\n")
1217 |                     concat_file.write(f"outpoint {formatSeconds(event.start_timestamp, msec=True)}\n")
1218 |                     concat_file.write(f"duration {formatSeconds(duration, msec=True)}\n")
1219 | 
1220 |                 # Adding deskshare
1221 |                 duration = math.floor(10 * (event.stop_timestamp - event.start_timestamp) + 0.5) / 10
1222 |                 concat_file.write("file 'deskshare.mp4'\n")
1223 |                 concat_file.write(f"inpoint {formatSeconds(event.start_timestamp, msec=True)}\n")
1224 |                 concat_file.write(f"outpoint {formatSeconds(event.stop_timestamp, msec=True)}\n")
1225 |                 concat_file.write(f"duration {formatSeconds(duration, msec=True)}\n")
1226 | 
1227 |                 if idx == (len(deskshare_events) - 1) and event.stop_timestamp < metadata.duration:
1228 |                     # Adding finish
1229 |                     duration = math.floor(10 * (metadata.duration - event.stop_timestamp) + 0.5) / 10
1230 |                     concat_file.write("file 'slideshow.mp4'\n")
1231 |                     concat_file.write(f"inpoint {formatSeconds(event.stop_timestamp, msec=True)}\n")
1232 |                     concat_file.write(f"outpoint {formatSeconds(metadata.duration, msec=True)}\n")
1233 |                     concat_file.write(f"duration {formatSeconds(duration, msec=True)}\n")
1234 | 
1235 |         with Timer() as t:
1236 |             asyncio.run(self.ffmpeg.add_deskshare_to_slideshow(deskshare_txt_path, presentation_path))
1237 |         Log.info(f'Adding screen share to slideshow finished and took: {formatSeconds(t.duration)}')
1238 |         return presentation_path
1239 | 
1240 |     def create_slideshow(self, frames: Dict[float, Frame]):
1241 |         Log.info('Start creating slideshow...')
1242 |         slideshow_path = PT.get_in_dir(self.tmp_dir, 'slideshow.mp4')
1243 |         if os.path.isfile(slideshow_path):
1244 |             Log.warning('Slideshow does already exist! Skipping rendering!')
1245 |             return slideshow_path
1246 | 
1247 |         slideshow_txt_path = PT.get_in_dir(self.frames_dir, 'slideshow.txt')
1248 |         with open(slideshow_txt_path, 'w', encoding="utf-8") as concat_file:
1249 |             timestamps = list(frames.keys())
1250 |             for idx in range(len(timestamps) - 1):
1251 |                 duration = math.floor(10 * (timestamps[idx + 1] - timestamps[idx]) + 0.5) / 10
1252 |                 concat_file.write(f"file '{frames[timestamps[idx]].capture_filename}'\n")
1253 |                 concat_file.write(f"duration {formatSeconds(duration, msec=True)}\n")
1254 | 
1255 |             # We use the second to last frame again, because the last frame is always empty.
1256 |             # concat_file.write(f"file {frames[timestamps[-2]].capture_filename}\n")
1257 | 
1258 |         with Timer() as t:
1259 |             asyncio.run(self.ffmpeg.create_slideshow(slideshow_txt_path, slideshow_path))
1260 |         Log.info(f'Creating slideshow finished and took: {formatSeconds(t.duration)}')
1261 |         return slideshow_path
1262 | 
1263 |     def extract_audio(
1264 |         self,
1265 |         webcams_path: str,
1266 |         metadata: Metadata,
1267 |     ):
1268 |         Log.info('Start extracting audio...')
1269 |         result_path = self.get_output_audio_file_path(metadata)
1270 |         if os.path.isfile(result_path):
1271 |             Log.warning('Final Audio already exists. Abort!')
1272 |             return result_path
1273 |         with Timer() as t:
1274 |             asyncio.run(self.ffmpeg.extract_audio(webcams_path, result_path))
1275 |         Log.info(f'Extracting audio finished and took: {formatSeconds(t.duration)}')
1276 |         return result_path
1277 | 
1278 | 
1279 | def get_parser():
1280 |     parser = argparse.ArgumentParser(
1281 |         description=('Big Blue Button Downloader that downloads a BBB lesson as MP4 video')
1282 |     )
1283 | 
1284 |     parser.add_argument('URL', type=str, help='URL of a BBB lesson')
1285 | 
1286 |     parser.add_argument(
1287 |         '-ao',
1288 |         '--audio-only',
1289 |         action='store_true',
1290 |         help='Extract only the audio from the presentation, do not generate video.',
1291 |     )
1292 | 
1293 |     parser.add_argument(
1294 |         '-sw',
1295 |         '--skip-webcam',
1296 |         action='store_true',
1297 |         help='Skip adding the webcam video as an overlay to the final video.'
1298 |         + ' This will reduce the time to generate the final video',
1299 |     )
1300 |     parser.add_argument(
1301 |         '-swfd',
1302 |         '--skip-webcam-freeze-detection',
1303 |         action='store_true',
1304 |         help='Skip detecting if the webcam video is completely empty.'
1305 |         + ' It is assumed the webcam recording is not empty. This will reduce the time to generate the final video',
1306 |     )
1307 |     parser.add_argument(
1308 |         '-sa',
1309 |         '--skip-annotations',
1310 |         action='store_true',
1311 |         help='Skip capturing the annotations of the professor. This will reduce the time to generate the final video',
1312 |     )
1313 |     parser.add_argument(
1314 |         '-sc',
1315 |         '--skip-cursor',
1316 |         action='store_true',
1317 |         help='Skip capturing the cursor of the professor. This will reduce the time to generate the final video',
1318 |     )
1319 |     parser.add_argument(
1320 |         '-sz',
1321 |         '--skip-zoom',
1322 |         action='store_true',
1323 |         help=(
1324 |             'Skip zooming into the presentation. All presentation slides are rendered in full size,'
1325 |             + ' which may result in sharper output video. However, consequently also to smaller font.'
1326 |         ),
1327 |     )
1328 | 
1329 |     parser.add_argument(
1330 |         '-bk',
1331 |         '--backup',
1332 |         action='store_true',
1333 |         help=(
1334 |             'Downloads all the content from the server and then stops. After using this option, you can run bbb-dl'
1335 |             + ' again to create the video based on the saved files'
1336 |         ),
1337 |     )
1338 |     parser.add_argument(
1339 |         '-kt',
1340 |         '--keep-tmp-files',
1341 |         action='store_true',
1342 |         help=(
1343 |             'Keep the temporary files after finish. In case of an error bbb-dl will reuse the already generated files'
1344 |         ),
1345 |     )
1346 | 
1347 |     parser.add_argument(
1348 |         '-v',
1349 |         '--verbose',
1350 |         action='store_true',
1351 |         help=('Print more verbose debug information'),
1352 |     )
1353 | 
1354 |     parser.add_argument(
1355 |         '--ffmpeg-location',
1356 |         type=str,
1357 |         default=None,
1358 |         help=(
1359 |             'Optional path to the directory in that your installed ffmpeg executable is located'
1360 |             + ' (Use it if ffmpeg is not located in your system PATH)'
1361 |         ),
1362 |     )
1363 | 
1364 |     parser.add_argument(
1365 |         '-scv',
1366 |         '--skip-cert-verify',
1367 |         action='store_true',
1368 |         help=('Suppress HTTPS certificate validation'),
1369 |     )
1370 |     parser.add_argument(
1371 |         '-ais',
1372 |         '--allow-insecure-ssl',
1373 |         dest='allow_insecure_ssl',
1374 |         default=False,
1375 |         action='store_true',
1376 |         help='Allow connections to unpatched servers. Use this option if your server uses a very old SSL version.',
1377 |     )
1378 |     parser.add_argument(
1379 |         '-uac',
1380 |         '--use-all-ciphers',
1381 |         dest='use_all_ciphers',
1382 |         default=False,
1383 |         action='store_true',
1384 |         help=(
1385 |             'Allow connections to servers that use insecure ciphers.'
1386 |             + ' Use this option if your server uses an insecure cipher.'
1387 |         ),
1388 |     )
1389 |     parser.add_argument(
1390 |         '-ftv',
1391 |         '--force-tls-version',
1392 |         type=str,
1393 |         help=('Force the client to use a specify tls version. E.g: TLSv1_3'),
1394 |     )
1395 | 
1396 |     parser.add_argument(
1397 |         '--version',
1398 |         action='version',
1399 |         version='bbb-dl ' + __version__,
1400 |         help='Print program version and exit',
1401 |     )
1402 | 
1403 |     parser.add_argument(
1404 |         '--encoder',
1405 |         dest='encoder',
1406 |         type=str,
1407 |         default='libx264',
1408 |         help='Optional encoder to pass to ffmpeg (default libx264)',
1409 |     )
1410 |     parser.add_argument(
1411 |         '--audiocodec',
1412 |         dest='audiocodec',
1413 |         type=str,
1414 |         default='copy',
1415 |         help='Optional audiocodec to pass to ffmpeg (default copy the codec from the original source)',
1416 |     )
1417 |     parser.add_argument(
1418 |         '--preset',
1419 |         dest='preset',
1420 |         type=str,
1421 |         default='fast',
1422 |         help='Optional preset to pass to ffmpeg (default fast, a preset that can be used with all encoders)',
1423 |     )
1424 |     parser.add_argument(
1425 |         '--crf',
1426 |         dest='crf',
1427 |         type=int,
1428 |         default=23,
1429 |         help=(
1430 |             'Optional crf to pass to ffmpeg'
1431 |             + ' (default 23, lower crf (e.g 22) usually means larger file size and better video quality)'
1432 |         ),
1433 |     )
1434 | 
1435 |     parser.add_argument(
1436 |         '-f',
1437 |         '--filename',
1438 |         type=str,
1439 |         default=None,
1440 |         help='Optional output filename',
1441 |     )
1442 | 
1443 |     parser.add_argument(
1444 |         '-od',
1445 |         '--output-dir',
1446 |         type=str,
1447 |         default=None,
1448 |         help='Optional output directory for final video',
1449 |     )
1450 | 
1451 |     parser.add_argument(
1452 |         '-wd',
1453 |         '--working-dir',
1454 |         type=str,
1455 |         default=None,
1456 |         help='Optional output directory for all temporary directories/files',
1457 |     )
1458 | 
1459 |     parser.add_argument(
1460 |         '-mpc',
1461 |         '--max-parallel-chromes',
1462 |         type=int,
1463 |         default=10,
1464 |         help='Maximum number of chrome browser instances used to generate frames',
1465 |     )
1466 | 
1467 |     parser.add_argument(
1468 |         '-fw',
1469 |         '--force-width',
1470 |         type=int,
1471 |         default=None,
1472 |         help='Force width on final output. (e.g. 1280) This can reduce the time to generate the final video',
1473 |     )
1474 | 
1475 |     parser.add_argument(
1476 |         '-fh',
1477 |         '--force-height',
1478 |         type=int,
1479 |         default=None,
1480 |         help='Force height on final output. (e.g. 720) This can reduce the time to generate the final video',
1481 |     )
1482 | 
1483 |     return parser
1484 | 
1485 | 
1486 | # --- called at the program invocation: -------------------------------------
1487 | def main(args=None):
1488 |     just_fix_windows_console()
1489 |     args = get_parser().parse_args(args)
1490 | 
1491 |     with Timer() as final_t:
1492 |         bbb_dl = BBBDL(
1493 |             args.URL,
1494 |             args.filename,
1495 |             args.output_dir,
1496 |             args.verbose,
1497 |             args.skip_cert_verify,
1498 |             args.allow_insecure_ssl,
1499 |             args.use_all_ciphers,
1500 |             args.force_tls_version,
1501 |             args.encoder,
1502 |             args.audiocodec,
1503 |             args.skip_webcam,
1504 |             args.skip_webcam_freeze_detection,
1505 |             args.skip_annotations,
1506 |             args.skip_cursor,
1507 |             args.skip_zoom,
1508 |             args.keep_tmp_files,
1509 |             args.ffmpeg_location,
1510 |             args.working_dir,
1511 |             args.backup,
1512 |             args.max_parallel_chromes,
1513 |             args.force_width,
1514 |             args.force_height,
1515 |             args.preset,
1516 |             args.crf,
1517 |         )
1518 |         if args.audio_only:
1519 |             bbb_dl.run_audio_only()
1520 |         else:
1521 |             bbb_dl.run()
1522 |     Log.info(f'BBB-DL finished and took: {formatSeconds(final_t.duration)}')
1523 | 


--------------------------------------------------------------------------------