├── .github ├── CODEOWNERS └── workflows │ └── ci.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── pyproject.toml ├── setup.cfg ├── src └── corpus_replicator │ ├── __init__.py │ ├── __main__.py │ ├── common.py │ ├── core.py │ ├── generate_corpus.py │ ├── generate_template.py │ ├── recipes │ ├── animation_apng_png_ffmpeg.yml │ ├── animation_gif_gif_ffmpeg.yml │ ├── animation_webp_webp_libwebp.yml │ ├── audio_aac_mp4_ffmpeg.yml │ ├── audio_flac_flac_libflac.yml │ ├── audio_mp3_mp3_libmp3lame.yml │ ├── audio_mp3_mp3_shine.yml │ ├── audio_opus_opus_libopus.yml │ ├── audio_pcm_wav_ffmpeg.yml │ ├── audio_vorbis_ogg_libvorbis.yml │ ├── image_avif_avif_imagemagick.yml │ ├── image_bmp_bmp_ffmpeg.yml │ ├── image_gif_gif_ffmpeg.yml │ ├── image_heic_heic_imagemagick.yml │ ├── image_ico_ico_ffmpeg.yml │ ├── image_jpg_jpg_ffmpeg.yml │ ├── image_png_png_ffmpeg.yml │ ├── image_webp_webp_libwebp.yml │ ├── schema.json │ ├── video_av1_webm_libaom.yml │ ├── video_h264_264_libx264.yml │ ├── video_h264_mp4_libx264.yml │ ├── video_h265_mp4_libx265.yml │ ├── video_theora_ogg_libtheora.yml │ ├── video_vp8_webm_libvpx.yml │ ├── video_vp9_webm_libvpx.yml │ └── video_xvid_mp4_libxvid.yml │ ├── test_common.py │ ├── test_generate_corpus.py │ ├── test_generate_template.py │ ├── test_replicator.py │ └── tools │ ├── __init__.py │ ├── ffmpeg.py │ ├── imagemagick.py │ ├── test_ffmpeg.py │ └── test_imagemagick.py └── tox.ini /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @MozillaSecurity/fuzzing-team-reviewers 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Python CI 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | push: 7 | branches: [main] 8 | release: 9 | types: [released] 10 | 11 | jobs: 12 | test: 13 | name: Python ${{ matrix.python-version }} (${{ matrix.platform }}) 14 | runs-on: ${{ matrix.platform }} 15 | 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | include: 20 | - python-version: "3.9" 21 | platform: ubuntu-latest 22 | toxenv: py39 23 | - python-version: "3.10" 24 | platform: ubuntu-latest 25 | toxenv: py310 26 | - python-version: "3.11" 27 | platform: ubuntu-latest 28 | toxenv: py311 29 | - python-version: "3.12" 30 | platform: ubuntu-latest 31 | toxenv: py312 32 | - python-version: "3.13" 33 | platform: ubuntu-latest 34 | toxenv: py313 35 | - python-version: "3.12" 36 | platform: windows-latest 37 | toxenv: py312 38 | 39 | steps: 40 | - uses: actions/checkout@v4 41 | 42 | - name: Set up Python ${{ matrix.python-version }} 43 | uses: actions/setup-python@v5 44 | with: 45 | python-version: ${{ matrix.python-version }} 46 | 47 | - name: Install tox 48 | run: python -m pip install --upgrade tox 49 | 50 | - name: Run lint 51 | run: tox -e lint 52 | 53 | - name: Run tests 54 | run: tox -e ${{ matrix.toxenv }} 55 | 56 | - name: Run Codecov 57 | env: 58 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 59 | run: tox -e codecov 60 | 61 | publish: 62 | name: Build & Publish to PyPI 63 | if: github.event_name == 'release' 64 | needs: test 65 | runs-on: ubuntu-latest 66 | 67 | steps: 68 | - uses: actions/checkout@v4 69 | 70 | - name: Set up Python 71 | uses: actions/setup-python@v5 72 | with: 73 | python-version: "3.12" 74 | 75 | - name: Install tox 76 | run: python -m pip install --upgrade tox 77 | 78 | - name: Publish to PyPI 79 | env: 80 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 81 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 82 | run: tox -e pypi 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore - List of filenames git should ignore 2 | 3 | ############################### 4 | # Generated by other programs # 5 | ############################### 6 | 7 | *~ 8 | *.pyc 9 | .DS_Store 10 | .gdb_history 11 | 12 | # Code coverage 13 | htmlcov/ 14 | .coverage 15 | 16 | # Linting via pytest 17 | /.cache/ 18 | .pytest_cache/ 19 | .tox/ 20 | 21 | # Setuptools folders. 22 | /build/ 23 | /dist/ 24 | 25 | # Python egg metadata, regenerated from source files by setuptools. 26 | *.egg-info 27 | .eggs/ 28 | 29 | # mypy 30 | .mypy_cache/ 31 | 32 | # profiling data 33 | *.prof 34 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.9.3 4 | hooks: 5 | - id: ruff 6 | args: [--fix] 7 | - id: ruff-format 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v5.0.0 10 | hooks: 11 | - id: check-added-large-files 12 | - id: check-ast 13 | - id: check-case-conflict 14 | - id: check-docstring-first 15 | - id: check-executables-have-shebangs 16 | - id: check-merge-conflict 17 | - id: check-symlinks 18 | - id: check-json 19 | - id: check-toml 20 | - id: check-yaml 21 | - id: debug-statements 22 | - id: end-of-file-fixer 23 | - id: mixed-line-ending 24 | - id: name-tests-test 25 | args: ["--django"] 26 | - id: requirements-txt-fixer 27 | - id: trailing-whitespace 28 | - repo: https://github.com/codespell-project/codespell 29 | rev: v2.4.0 30 | hooks: 31 | - id: codespell 32 | exclude_types: [json] 33 | - repo: https://github.com/python-jsonschema/check-jsonschema 34 | rev: 0.31.0 35 | hooks: 36 | - id: check-jsonschema 37 | name: "Check recipe schema" 38 | files: ^src/corpus_replicator/recipes/ 39 | types: [yaml] 40 | args: ["--schemafile", "./src/corpus_replicator/recipes/schema.json"] 41 | - repo: meta 42 | hooks: 43 | - id: check-useless-excludes 44 | - repo: https://github.com/jorisroovers/gitlint 45 | rev: v0.19.1 46 | hooks: 47 | - id: gitlint 48 | args: [--contrib=contrib-title-conventional-commits, --ignore=body-is-missing, --msg-filename] 49 | stages: [commit-msg] 50 | - repo: local 51 | hooks: 52 | - id: mypy 53 | name: mypy 54 | entry: tox -e mypy -- 55 | language: system 56 | require_serial: true 57 | types: [python] 58 | - id: pylint 59 | name: pylint 60 | entry: tox -e pylint -- 61 | language: system 62 | require_serial: true 63 | types: [python] 64 | 65 | default_language_version: 66 | python: python3 67 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Community Participation Guidelines 2 | 3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines. 4 | For more details, please read the 5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 6 | 7 | ## How to Report 8 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. 9 | 10 | 16 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | ### 1. Definitions 5 | 6 | **1.1. “Contributor”** 7 | means each individual or legal entity that creates, contributes to 8 | the creation of, or owns Covered Software. 9 | 10 | **1.2. “Contributor Version”** 11 | means the combination of the Contributions of others (if any) used 12 | by a Contributor and that particular Contributor's Contribution. 13 | 14 | **1.3. “Contribution”** 15 | means Covered Software of a particular Contributor. 16 | 17 | **1.4. “Covered Software”** 18 | means Source Code Form to which the initial Contributor has attached 19 | the notice in Exhibit A, the Executable Form of such Source Code 20 | Form, and Modifications of such Source Code Form, in each case 21 | including portions thereof. 22 | 23 | **1.5. “Incompatible With Secondary Licenses”** 24 | means 25 | 26 | * **(a)** that the initial Contributor has attached the notice described 27 | in Exhibit B to the Covered Software; or 28 | * **(b)** that the Covered Software was made available under the terms of 29 | version 1.1 or earlier of the License, but not also under the 30 | terms of a Secondary License. 31 | 32 | **1.6. “Executable Form”** 33 | means any form of the work other than Source Code Form. 34 | 35 | **1.7. “Larger Work”** 36 | means a work that combines Covered Software with other material, in 37 | a separate file or files, that is not Covered Software. 38 | 39 | **1.8. “License”** 40 | means this document. 41 | 42 | **1.9. “Licensable”** 43 | means having the right to grant, to the maximum extent possible, 44 | whether at the time of the initial grant or subsequently, any and 45 | all of the rights conveyed by this License. 46 | 47 | **1.10. “Modifications”** 48 | means any of the following: 49 | 50 | * **(a)** any file in Source Code Form that results from an addition to, 51 | deletion from, or modification of the contents of Covered 52 | Software; or 53 | * **(b)** any new file in Source Code Form that contains any Covered 54 | Software. 55 | 56 | **1.11. “Patent Claims” of a Contributor** 57 | means any patent claim(s), including without limitation, method, 58 | process, and apparatus claims, in any patent Licensable by such 59 | Contributor that would be infringed, but for the grant of the 60 | License, by the making, using, selling, offering for sale, having 61 | made, import, or transfer of either its Contributions or its 62 | Contributor Version. 63 | 64 | **1.12. “Secondary License”** 65 | means either the GNU General Public License, Version 2.0, the GNU 66 | Lesser General Public License, Version 2.1, the GNU Affero General 67 | Public License, Version 3.0, or any later versions of those 68 | licenses. 69 | 70 | **1.13. “Source Code Form”** 71 | means the form of the work preferred for making modifications. 72 | 73 | **1.14. “You” (or “Your”)** 74 | means an individual or a legal entity exercising rights under this 75 | License. For legal entities, “You” includes any entity that 76 | controls, is controlled by, or is under common control with You. For 77 | purposes of this definition, “control” means **(a)** the power, direct 78 | or indirect, to cause the direction or management of such entity, 79 | whether by contract or otherwise, or **(b)** ownership of more than 80 | fifty percent (50%) of the outstanding shares or beneficial 81 | ownership of such entity. 82 | 83 | 84 | ### 2. License Grants and Conditions 85 | 86 | #### 2.1. Grants 87 | 88 | Each Contributor hereby grants You a world-wide, royalty-free, 89 | non-exclusive license: 90 | 91 | * **(a)** under intellectual property rights (other than patent or trademark) 92 | Licensable by such Contributor to use, reproduce, make available, 93 | modify, display, perform, distribute, and otherwise exploit its 94 | Contributions, either on an unmodified basis, with Modifications, or 95 | as part of a Larger Work; and 96 | * **(b)** under Patent Claims of such Contributor to make, use, sell, offer 97 | for sale, have made, import, and otherwise transfer either its 98 | Contributions or its Contributor Version. 99 | 100 | #### 2.2. Effective Date 101 | 102 | The licenses granted in Section 2.1 with respect to any Contribution 103 | become effective for each Contribution on the date the Contributor first 104 | distributes such Contribution. 105 | 106 | #### 2.3. Limitations on Grant Scope 107 | 108 | The licenses granted in this Section 2 are the only rights granted under 109 | this License. No additional rights or licenses will be implied from the 110 | distribution or licensing of Covered Software under this License. 111 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 112 | Contributor: 113 | 114 | * **(a)** for any code that a Contributor has removed from Covered Software; 115 | or 116 | * **(b)** for infringements caused by: **(i)** Your and any other third party's 117 | modifications of Covered Software, or **(ii)** the combination of its 118 | Contributions with other software (except as part of its Contributor 119 | Version); or 120 | * **(c)** under Patent Claims infringed by Covered Software in the absence of 121 | its Contributions. 122 | 123 | This License does not grant any rights in the trademarks, service marks, 124 | or logos of any Contributor (except as may be necessary to comply with 125 | the notice requirements in Section 3.4). 126 | 127 | #### 2.4. Subsequent Licenses 128 | 129 | No Contributor makes additional grants as a result of Your choice to 130 | distribute the Covered Software under a subsequent version of this 131 | License (see Section 10.2) or under the terms of a Secondary License (if 132 | permitted under the terms of Section 3.3). 133 | 134 | #### 2.5. Representation 135 | 136 | Each Contributor represents that the Contributor believes its 137 | Contributions are its original creation(s) or it has sufficient rights 138 | to grant the rights to its Contributions conveyed by this License. 139 | 140 | #### 2.6. Fair Use 141 | 142 | This License is not intended to limit any rights You have under 143 | applicable copyright doctrines of fair use, fair dealing, or other 144 | equivalents. 145 | 146 | #### 2.7. Conditions 147 | 148 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 149 | in Section 2.1. 150 | 151 | 152 | ### 3. Responsibilities 153 | 154 | #### 3.1. Distribution of Source Form 155 | 156 | All distribution of Covered Software in Source Code Form, including any 157 | Modifications that You create or to which You contribute, must be under 158 | the terms of this License. You must inform recipients that the Source 159 | Code Form of the Covered Software is governed by the terms of this 160 | License, and how they can obtain a copy of this License. You may not 161 | attempt to alter or restrict the recipients' rights in the Source Code 162 | Form. 163 | 164 | #### 3.2. Distribution of Executable Form 165 | 166 | If You distribute Covered Software in Executable Form then: 167 | 168 | * **(a)** such Covered Software must also be made available in Source Code 169 | Form, as described in Section 3.1, and You must inform recipients of 170 | the Executable Form how they can obtain a copy of such Source Code 171 | Form by reasonable means in a timely manner, at a charge no more 172 | than the cost of distribution to the recipient; and 173 | 174 | * **(b)** You may distribute such Executable Form under the terms of this 175 | License, or sublicense it under different terms, provided that the 176 | license for the Executable Form does not attempt to limit or alter 177 | the recipients' rights in the Source Code Form under this License. 178 | 179 | #### 3.3. Distribution of a Larger Work 180 | 181 | You may create and distribute a Larger Work under terms of Your choice, 182 | provided that You also comply with the requirements of this License for 183 | the Covered Software. If the Larger Work is a combination of Covered 184 | Software with a work governed by one or more Secondary Licenses, and the 185 | Covered Software is not Incompatible With Secondary Licenses, this 186 | License permits You to additionally distribute such Covered Software 187 | under the terms of such Secondary License(s), so that the recipient of 188 | the Larger Work may, at their option, further distribute the Covered 189 | Software under the terms of either this License or such Secondary 190 | License(s). 191 | 192 | #### 3.4. Notices 193 | 194 | You may not remove or alter the substance of any license notices 195 | (including copyright notices, patent notices, disclaimers of warranty, 196 | or limitations of liability) contained within the Source Code Form of 197 | the Covered Software, except that You may alter any license notices to 198 | the extent required to remedy known factual inaccuracies. 199 | 200 | #### 3.5. Application of Additional Terms 201 | 202 | You may choose to offer, and to charge a fee for, warranty, support, 203 | indemnity or liability obligations to one or more recipients of Covered 204 | Software. However, You may do so only on Your own behalf, and not on 205 | behalf of any Contributor. You must make it absolutely clear that any 206 | such warranty, support, indemnity, or liability obligation is offered by 207 | You alone, and You hereby agree to indemnify every Contributor for any 208 | liability incurred by such Contributor as a result of warranty, support, 209 | indemnity or liability terms You offer. You may include additional 210 | disclaimers of warranty and limitations of liability specific to any 211 | jurisdiction. 212 | 213 | 214 | ### 4. Inability to Comply Due to Statute or Regulation 215 | 216 | If it is impossible for You to comply with any of the terms of this 217 | License with respect to some or all of the Covered Software due to 218 | statute, judicial order, or regulation then You must: **(a)** comply with 219 | the terms of this License to the maximum extent possible; and **(b)** 220 | describe the limitations and the code they affect. Such description must 221 | be placed in a text file included with all distributions of the Covered 222 | Software under this License. Except to the extent prohibited by statute 223 | or regulation, such description must be sufficiently detailed for a 224 | recipient of ordinary skill to be able to understand it. 225 | 226 | 227 | ### 5. Termination 228 | 229 | **5.1.** The rights granted under this License will terminate automatically 230 | if You fail to comply with any of its terms. However, if You become 231 | compliant, then the rights granted under this License from a particular 232 | Contributor are reinstated **(a)** provisionally, unless and until such 233 | Contributor explicitly and finally terminates Your grants, and **(b)** on an 234 | ongoing basis, if such Contributor fails to notify You of the 235 | non-compliance by some reasonable means prior to 60 days after You have 236 | come back into compliance. Moreover, Your grants from a particular 237 | Contributor are reinstated on an ongoing basis if such Contributor 238 | notifies You of the non-compliance by some reasonable means, this is the 239 | first time You have received notice of non-compliance with this License 240 | from such Contributor, and You become compliant prior to 30 days after 241 | Your receipt of the notice. 242 | 243 | **5.2.** If You initiate litigation against any entity by asserting a patent 244 | infringement claim (excluding declaratory judgment actions, 245 | counter-claims, and cross-claims) alleging that a Contributor Version 246 | directly or indirectly infringes any patent, then the rights granted to 247 | You by any and all Contributors for the Covered Software under Section 248 | 2.1 of this License shall terminate. 249 | 250 | **5.3.** In the event of termination under Sections 5.1 or 5.2 above, all 251 | end user license agreements (excluding distributors and resellers) which 252 | have been validly granted by You or Your distributors under this License 253 | prior to termination shall survive termination. 254 | 255 | 256 | ### 6. Disclaimer of Warranty 257 | 258 | > Covered Software is provided under this License on an “as is” 259 | > basis, without warranty of any kind, either expressed, implied, or 260 | > statutory, including, without limitation, warranties that the 261 | > Covered Software is free of defects, merchantable, fit for a 262 | > particular purpose or non-infringing. The entire risk as to the 263 | > quality and performance of the Covered Software is with You. 264 | > Should any Covered Software prove defective in any respect, You 265 | > (not any Contributor) assume the cost of any necessary servicing, 266 | > repair, or correction. This disclaimer of warranty constitutes an 267 | > essential part of this License. No use of any Covered Software is 268 | > authorized under this License except under this disclaimer. 269 | 270 | ### 7. Limitation of Liability 271 | 272 | > Under no circumstances and under no legal theory, whether tort 273 | > (including negligence), contract, or otherwise, shall any 274 | > Contributor, or anyone who distributes Covered Software as 275 | > permitted above, be liable to You for any direct, indirect, 276 | > special, incidental, or consequential damages of any character 277 | > including, without limitation, damages for lost profits, loss of 278 | > goodwill, work stoppage, computer failure or malfunction, or any 279 | > and all other commercial damages or losses, even if such party 280 | > shall have been informed of the possibility of such damages. This 281 | > limitation of liability shall not apply to liability for death or 282 | > personal injury resulting from such party's negligence to the 283 | > extent applicable law prohibits such limitation. Some 284 | > jurisdictions do not allow the exclusion or limitation of 285 | > incidental or consequential damages, so this exclusion and 286 | > limitation may not apply to You. 287 | 288 | 289 | ### 8. Litigation 290 | 291 | Any litigation relating to this License may be brought only in the 292 | courts of a jurisdiction where the defendant maintains its principal 293 | place of business and such litigation shall be governed by laws of that 294 | jurisdiction, without reference to its conflict-of-law provisions. 295 | Nothing in this Section shall prevent a party's ability to bring 296 | cross-claims or counter-claims. 297 | 298 | 299 | ### 9. Miscellaneous 300 | 301 | This License represents the complete agreement concerning the subject 302 | matter hereof. If any provision of this License is held to be 303 | unenforceable, such provision shall be reformed only to the extent 304 | necessary to make it enforceable. Any law or regulation which provides 305 | that the language of a contract shall be construed against the drafter 306 | shall not be used to construe this License against a Contributor. 307 | 308 | 309 | ### 10. Versions of the License 310 | 311 | #### 10.1. New Versions 312 | 313 | Mozilla Foundation is the license steward. Except as provided in Section 314 | 10.3, no one other than the license steward has the right to modify or 315 | publish new versions of this License. Each version will be given a 316 | distinguishing version number. 317 | 318 | #### 10.2. Effect of New Versions 319 | 320 | You may distribute the Covered Software under the terms of the version 321 | of the License under which You originally received the Covered Software, 322 | or under the terms of any subsequent version published by the license 323 | steward. 324 | 325 | #### 10.3. Modified Versions 326 | 327 | If you create software not governed by this License, and you want to 328 | create a new license for such software, you may create and use a 329 | modified version of this License if you rename the license and remove 330 | any references to the name of the license steward (except to note that 331 | such modified license differs from this License). 332 | 333 | #### 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses 334 | 335 | If You choose to distribute Source Code Form that is Incompatible With 336 | Secondary Licenses under the terms of this version of the License, the 337 | notice described in Exhibit B of this License must be attached. 338 | 339 | ## Exhibit A - Source Code Form License Notice 340 | 341 | This Source Code Form is subject to the terms of the Mozilla Public 342 | License, v. 2.0. If a copy of the MPL was not distributed with this 343 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 344 | 345 | If it is not possible or desirable to put the notice in a particular 346 | file, then You may include the notice in a location (such as a LICENSE 347 | file in a relevant directory) where a recipient would be likely to look 348 | for such a notice. 349 | 350 | You may add additional accurate notices of copyright ownership. 351 | 352 | ## Exhibit B - “Incompatible With Secondary Licenses” Notice 353 | 354 | This Source Code Form is "Incompatible With Secondary Licenses", as 355 | defined by the Mozilla Public License, v. 2.0. 356 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/corpus_replicator/recipes/*.yml 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Corpus Replicator 2 | ================= 3 | [![CI](https://github.com/MozillaSecurity/corpus-replicator/actions/workflows/ci.yml/badge.svg)](https://github.com/MozillaSecurity/corpus-replicator/actions/workflows/ci.yml) 4 | [![codecov](https://codecov.io/gh/MozillaSecurity/corpus-replicator/branch/main/graph/badge.svg)](https://codecov.io/gh/MozillaSecurity/corpus-replicator) 5 | [![Matrix](https://img.shields.io/badge/chat-%23fuzzing-green?logo=matrix)](https://matrix.to/#/#fuzzing:mozilla.org) 6 | [![PyPI](https://img.shields.io/pypi/v/corpus-replicator)](https://pypi.org/project/corpus-replicator) 7 | 8 | Corpus Replicator is a corpus generation tool that enables the creation of multiple 9 | unique output files based on templates. The primary intended use case is the 10 | creation of a seed corpus that can be used by fuzzers. Support for additional output 11 | formats can be added via the creation of `Recipes`. If a desired format is unsupported, 12 | support can be added via the creation of a `CorpusGenerator`. 13 | 14 | The goal is to create an efficient corpus that maximizes code coverage and minimizes 15 | file size. Small unique files that execute quickly are preferred. 16 | 17 | Currently four media types can be generated `animation`, `audio`, `image` and 18 | `video`. 19 | 20 | Requirements 21 | ------------ 22 | 23 | Corpus Replicator relies on [FFmpeg](https://ffmpeg.org/). 24 | 25 | Installation 26 | ------------ 27 | ``` 28 | pip install corpus-replicator 29 | ``` 30 | 31 | Example 32 | ------- 33 | 34 | This is an example `recipe` file. 35 | 36 | ```yaml 37 | # "base" contains required entries and default flags 38 | base: 39 | codec: "h264" # name of the codec 40 | container: "mp4" # container/file extension 41 | library: "libx264" # name of library 42 | medium: "video" # supported medium 43 | tool: "ffmpeg" # name of supported tool 44 | default_flags: 45 | encoder: # "encoder" flag group 46 | ["-c:v", "libx264"] 47 | resolution: # "resolution" flag group 48 | ["-s", "320x240"] 49 | 50 | # variations allow flags to be added and overwritten 51 | # one file will be generated for each entry in a flag group 52 | variation: 53 | resolution: # flag group - overwrites default flag group in "base" 54 | - ["-s", "640x480"] 55 | - ["-s", "32x18"] 56 | - ["-s", "64x64"] 57 | monochrome: # flag group - adds new flag group 58 | - ["-vf", "hue=s=0"] 59 | ``` 60 | 61 | Running the recipe will generate a corpus: 62 | ``` 63 | $ corpus-replicator example.yml video -t test 64 | Generating templates... 65 | 1 recipe(s) will be used with 1 template(s) to create 4 file(s). 66 | Generating 4 'video/libx264/h264/mp4' file(s) using template 'test'... 67 | Optimizing corpus, checking for duplicates... 68 | Done. 69 | ``` 70 | 71 | Resulting corpus: 72 | ``` 73 | $ ls generated-corpus/ 74 | video-h264-libx264-test-monochrome-00.mp4 75 | video-h264-libx264-test-resolution-01.mp4 76 | video-h264-libx264-test-resolution-00.mp4 77 | video-h264-libx264-test-resolution-02.mp4 78 | ``` 79 | 80 | A more complex corpus can be generated by using multiple `Recipes` and `Templates` at 81 | once. 82 | 83 | Recipes are stored in [src/corpus_replicator/recipes](/src/corpus_replicator/recipes/). 84 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 43", "wheel", "setuptools_scm[toml] >= 3.4"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.coverage.run] 6 | omit = [ 7 | "*/setup.py", 8 | "*/__main__.py", 9 | "*/test_*", 10 | "*/build/*", 11 | "*/dist/*", 12 | "*/.tox/*", 13 | "*/.egg/*", 14 | ] 15 | 16 | [tool.coverage.report] 17 | exclude_lines = [ 18 | "if __name__ == .__main__.:", 19 | "if TYPE_CHECKING:", 20 | "pragma: no cover", 21 | ] 22 | 23 | [tool.mypy] 24 | ignore_missing_imports = true 25 | strict = true 26 | show_error_codes = true 27 | warn_unused_ignores = false 28 | 29 | [tool.pylint.format] 30 | max-line-length = 88 31 | 32 | [tool.pylint.messages_control] 33 | disable = [ 34 | "duplicate-code", 35 | "fixme", 36 | "missing-module-docstring", 37 | "too-few-public-methods", 38 | "too-many-arguments", 39 | "too-many-positional-arguments", 40 | ] 41 | 42 | [tool.pylint.typecheck] 43 | ignored-modules = ["pytest"] 44 | 45 | [tool.pytest.ini_options] 46 | log_level = "DEBUG" 47 | 48 | [tool.ruff] 49 | fix = true 50 | target-version = "py39" 51 | 52 | [tool.ruff.lint] 53 | select = [ 54 | # flake8-comprehensions 55 | "C4", 56 | # pycodestyle 57 | "E", 58 | # Pyflakes 59 | "F", 60 | # Flynt 61 | "FLY", 62 | # isort 63 | "I", 64 | # Perflint 65 | "PERF", 66 | # Ruff-specific rules 67 | "RUF", 68 | # flake8-simplify 69 | "SIM", 70 | # flake8-type-checking 71 | "TCH", 72 | # pyupgrade 73 | "UP", 74 | # pycodestyle 75 | "W", 76 | ] 77 | 78 | [tool.setuptools_scm] 79 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | author = Tyson Smith 3 | author_email = twsmith@mozilla.com 4 | classifiers = 5 | Intended Audience :: Developers 6 | License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) 7 | Programming Language :: Python :: 3 8 | Topic :: Software Development :: Testing 9 | description = A corpus generation tool 10 | keywords = automation corpus fuzz fuzzing security test testing 11 | license = MPL 2.0 12 | long_description = file: README.md 13 | long_description_content_type = text/markdown 14 | maintainer = Mozilla Fuzzing Team 15 | maintainer_email = fuzzing@mozilla.com 16 | name = corpus-replicator 17 | url = https://github.com/MozillaSecurity/corpus-replicator 18 | 19 | [options] 20 | include_package_data = True 21 | install_requires = 22 | PyYAML 23 | package_dir = 24 | = src 25 | packages = 26 | corpus_replicator 27 | python_requires = >=3.9 28 | zip_safe = False 29 | 30 | [options.entry_points] 31 | console_scripts = 32 | corpus-replicator = corpus_replicator.core:main 33 | 34 | [options.extras_require] 35 | dev = 36 | pre-commit 37 | tox 38 | 39 | [codespell] 40 | ignore-words-list = 41 | alls 42 | -------------------------------------------------------------------------------- /src/corpus_replicator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MozillaSecurity/corpus-replicator/d6921d423e4c3ede4f9ac40b98af3fa1cc21cd15/src/corpus_replicator/__init__.py -------------------------------------------------------------------------------- /src/corpus_replicator/__main__.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from .core import main 5 | 6 | main() 7 | -------------------------------------------------------------------------------- /src/corpus_replicator/common.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from __future__ import annotations 5 | 6 | from abc import ABC, abstractmethod 7 | from contextlib import suppress 8 | from logging import DEBUG, basicConfig, getLogger 9 | from pathlib import Path 10 | from re import match 11 | from subprocess import run 12 | from typing import TYPE_CHECKING, Any 13 | 14 | from yaml import YAMLError, safe_load 15 | 16 | if TYPE_CHECKING: 17 | from collections.abc import Iterator 18 | 19 | LOG = getLogger(__name__) 20 | SUPPORTED_MEDIUM = ("animation", "audio", "image", "video") 21 | SUPPORTED_TOOLS = ("ffmpeg", "imagemagick") 22 | TOOL_LOG: str = "replicator-tool-log.txt" 23 | 24 | 25 | class RecipeError(Exception): 26 | """Recipe related errors.""" 27 | 28 | 29 | class ToolError(Exception): 30 | """Tool related errors.""" 31 | 32 | 33 | class Recipe: 34 | """A Recipe contains details about how to generate content using tools. 35 | It stores flags and variations that used to create a corpus based on a template.""" 36 | 37 | __slots__ = ( 38 | "_flags", 39 | "_variations", 40 | "codec", 41 | "container", 42 | "library", 43 | "medium", 44 | "tool", 45 | ) 46 | 47 | def __init__(self, file: Path) -> None: # pylint: disable=too-many-branches 48 | LOG.debug("loading recipe '%s'", file) 49 | # load data from yml file 50 | try: 51 | data: dict[str, Any] = safe_load(file.read_text()) or {} 52 | except (UnicodeDecodeError, YAMLError): 53 | raise RecipeError("Invalid YAML file") from None 54 | 55 | try: 56 | self._flags: dict[str, Any] = data["base"]["default_flags"] or {} 57 | self._variations: dict[str, Any] = data["variation"] or {} 58 | # codec 59 | self.codec: str = data["base"]["codec"] 60 | # container type 61 | self.container: str = data["base"]["container"] 62 | # encoder library 63 | self.library: str = data["base"]["library"] 64 | # content medium 65 | self.medium: str = data["base"]["medium"] 66 | # binary tool 67 | self.tool: str = data["base"]["tool"] 68 | except KeyError as exc: 69 | raise RecipeError(f"Recipe missing entry {exc}") from None 70 | 71 | # validate "base" entries 72 | for key, entry in data["base"].items(): 73 | if key == "default_flags": 74 | if not isinstance(entry, dict): 75 | raise RecipeError("Recipe 'default_flags' is invalid") 76 | for group, flags in entry.items(): 77 | if not flags: 78 | raise RecipeError( 79 | f"Recipe 'default_flags' '{group}' is incomplete" 80 | ) 81 | # all flags must be strings 82 | if not all(isinstance(x, str) for x in flags): 83 | raise RecipeError( 84 | f"Recipe 'default_flags' '{group}' has invalid flags" 85 | ) 86 | # check required properties are strings (must be filesystem safe) 87 | elif not isinstance(entry, str) or not match(r"^[a-zA-Z0-9-]+$", entry): 88 | raise RecipeError(f"Recipe '{key}' entry is invalid") 89 | 90 | # validate variations 91 | if not self._variations: 92 | raise RecipeError("Recipe missing variations") 93 | for key, entry in self._variations.items(): 94 | # validate "variation" flag group names (must be filesystem safe) 95 | if not match(r"^[a-zA-Z0-9-]+$", key): 96 | raise RecipeError(f"Recipe variation name '{key}' is invalid") 97 | # each "variation" entry must have a flag group with entries 98 | if not entry: 99 | raise RecipeError(f"Recipe variation '{key}' is incomplete") 100 | if not isinstance(entry, list): 101 | raise RecipeError(f"Recipe variation '{key}' is invalid") 102 | for flags in entry: 103 | if not flags: 104 | raise RecipeError(f"Recipe variation '{key}' is incomplete") 105 | if not isinstance(flags, list): 106 | raise RecipeError(f"Recipe variation '{key}' is invalid") 107 | # all flags must be strings 108 | if not all(isinstance(x, str) for x in flags): 109 | raise RecipeError(f"Recipe variation '{key}' has invalid flags") 110 | 111 | if self.medium not in SUPPORTED_MEDIUM: 112 | raise RecipeError(f"Recipe medium '{self.medium}' unsupported") 113 | 114 | if self.tool not in SUPPORTED_TOOLS: 115 | raise RecipeError(f"Recipe tool '{self.tool}' unsupported") 116 | 117 | def __iter__(self) -> Iterator[tuple[str, int, list[str]]]: 118 | for flag_group, variations in self._variations.items(): 119 | # add default flags 120 | base_flags = [] 121 | for default_group, default_flags in self._flags.items(): 122 | if default_group != flag_group: 123 | base_flags.extend(default_flags) 124 | # iterate over variations and build commands 125 | for idx, flags in enumerate(variations): 126 | yield flag_group, idx, base_flags + flags 127 | 128 | def __len__(self) -> int: 129 | return sum(True for _ in self) 130 | 131 | 132 | class Template: 133 | """A Template contains input data details.""" 134 | 135 | __slots__ = ("file", "name") 136 | 137 | def __init__(self, name: str, file: Path) -> None: 138 | assert file 139 | assert name 140 | self.file = file 141 | self.name = name 142 | 143 | def unlink(self) -> None: 144 | """Remove template file from filesystem. 145 | 146 | Args: 147 | None 148 | 149 | Returns: 150 | None 151 | """ 152 | LOG.debug("removing template '%s'", self.file) 153 | self.file.unlink(missing_ok=True) 154 | 155 | 156 | class CorpusGenerator(ABC): 157 | """Tool wrapper base class.""" 158 | 159 | __slots__ = ("_dest", "_recipe", "_templates") 160 | 161 | def __init__(self, recipe: Recipe, dest: Path) -> None: 162 | self._dest = dest 163 | self._recipe = recipe 164 | self._templates: list[Template] = [] 165 | 166 | def add_template(self, template: Template) -> None: 167 | """Add a Template to the Generator. 168 | 169 | Args: 170 | template: A template object. 171 | 172 | Returns: 173 | None 174 | """ 175 | self._templates.append(template) 176 | 177 | @abstractmethod 178 | def generate(self) -> Iterator[Path]: 179 | """Generate corpus files.""" 180 | 181 | @property 182 | def description(self) -> str: 183 | """Create a description based on the recipe. 184 | 185 | Args: 186 | None 187 | 188 | Returns: 189 | Descriptive string. 190 | """ 191 | return ( 192 | f"{self._recipe.medium}/{self._recipe.library}/" 193 | f"{self._recipe.codec}/{self._recipe.container}" 194 | ) 195 | 196 | 197 | def init_logging(level: int) -> None: 198 | """Initialize logging 199 | 200 | Arguments: 201 | level: logging verbosity level 202 | 203 | Returns: 204 | None 205 | """ 206 | if level == DEBUG: 207 | date_fmt = None 208 | log_fmt = "%(asctime)s %(levelname).1s %(name)s | %(message)s" 209 | else: 210 | date_fmt = "%H:%M:%S" 211 | log_fmt = "[%(asctime)s] %(message)s" 212 | basicConfig(format=log_fmt, datefmt=date_fmt, level=level) 213 | 214 | 215 | def is_resolution(in_res: str) -> bool: 216 | """Determine whether provided string is a valid resolution. 217 | 218 | Arguments: 219 | in_res: string to evaluate. 220 | 221 | Returns: 222 | True is provided string is a valid resolution otherwise False. 223 | """ 224 | with suppress(ValueError): 225 | x_res, y_res = in_res.lower().split("x") 226 | if int(x_res) > 0 and int(y_res) > 0: 227 | return True 228 | return False 229 | 230 | 231 | def list_recipes() -> Iterator[Path]: 232 | """List built-in Recipe files. 233 | 234 | Args: 235 | None 236 | 237 | Yields: 238 | Recipes files. 239 | """ 240 | path = Path(__file__).parent.resolve() / "recipes" 241 | if path.is_dir(): 242 | for recipe in path.iterdir(): 243 | if recipe.suffix.lower().endswith(".yml"): 244 | yield recipe 245 | 246 | 247 | def run_tool(cmd: list[str]) -> None: 248 | """Wrapper for subprocess.run. 249 | 250 | Arguments: 251 | cmd: command to pass to run. 252 | 253 | Returns: 254 | None 255 | """ 256 | log_file = Path(TOOL_LOG) 257 | with log_file.open("wb") as log_fp: 258 | LOG.debug("running '%s'", " ".join(cmd)) 259 | # use a timeout in case (frame or time) limit flags are forgotten 260 | # typically this should finish in a few seconds 261 | run(cmd, check=True, stderr=log_fp, stdout=log_fp, timeout=600) 262 | # on success we don't need the log so remove it 263 | log_file.unlink() 264 | -------------------------------------------------------------------------------- /src/corpus_replicator/core.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from __future__ import annotations 5 | 6 | from argparse import ArgumentParser, Namespace 7 | from filecmp import cmp 8 | from importlib.metadata import PackageNotFoundError, version 9 | from itertools import product 10 | from logging import DEBUG, INFO, getLogger 11 | from pathlib import Path 12 | from typing import TYPE_CHECKING 13 | 14 | from .common import ( 15 | SUPPORTED_MEDIUM, 16 | TOOL_LOG, 17 | Recipe, 18 | RecipeError, 19 | Template, 20 | init_logging, 21 | is_resolution, 22 | list_recipes, 23 | ) 24 | from .generate_corpus import load_generator 25 | from .generate_template import TEMPLATES, generate_audio, generate_image, generate_video 26 | from .tools.ffmpeg import ffmpeg_available 27 | 28 | if TYPE_CHECKING: 29 | from collections.abc import Iterable 30 | 31 | try: 32 | __version__ = version("corpus-replicator") 33 | except PackageNotFoundError: # pragma: no cover 34 | # package is not installed 35 | __version__ = "unknown" 36 | 37 | LOG = getLogger(__name__) 38 | 39 | 40 | class Replicator: 41 | """Replicator can generate a corpus from recipes and templates.""" 42 | 43 | __slots__ = ("dest", "medium", "recipes", "templates") 44 | 45 | def __init__(self, medium: str, dest: Path, recipes: Iterable[Path]) -> None: 46 | assert medium in SUPPORTED_MEDIUM 47 | self.dest = dest 48 | self.medium = medium 49 | self.recipes: list[Recipe] = [] 50 | self.templates: list[Template] = [] 51 | 52 | # load recipe files 53 | for recipe_file in set(recipes): 54 | recipe = Recipe(recipe_file) 55 | if self.medium == recipe.medium: 56 | self.recipes.append(recipe) 57 | else: 58 | LOG.warning( 59 | "'%s' is incompatible with recipe '%s'", self.medium, recipe_file 60 | ) 61 | LOG.info("Skipping '%s'", recipe_file) 62 | 63 | def __len__(self) -> int: 64 | return sum(len(x) for x in self.recipes) * len(self.templates) 65 | 66 | def generate_corpus(self) -> None: 67 | """Generate a corpus from recipes and templates. 68 | 69 | Args: 70 | None 71 | 72 | Returns: 73 | None 74 | """ 75 | for recipe, template in product(self.recipes, self.templates): 76 | generator = load_generator(recipe, self.dest) 77 | assert generator is not None 78 | generator.add_template(template) 79 | 80 | LOG.info( 81 | "Generating %d '%s' file(s) using template '%s'...", 82 | len(recipe), 83 | generator.description, 84 | template.name, 85 | ) 86 | all(generator.generate()) 87 | 88 | def generate_templates( 89 | self, 90 | template_names: Iterable[str], 91 | duration: float = 1.0, 92 | frames: int = 0, 93 | resolution: str = "1280x768", 94 | ) -> None: 95 | """Generate template files. 96 | 97 | Args: 98 | template_names: Name of template. 99 | duration: Runtime of generated content. 100 | resolution: Resolution of generated content. 101 | 102 | Returns: 103 | None 104 | """ 105 | # TODO: add crop or scale option for templates 106 | unique_templates = set(template_names) 107 | self.dest.mkdir(parents=True, exist_ok=True) 108 | LOG.debug( 109 | "generating %d '%s' template(s)...", len(unique_templates), self.medium 110 | ) 111 | for template in unique_templates: 112 | if self.medium == "audio": 113 | generated = generate_audio(template, self.dest, duration=duration) 114 | elif self.medium == "image": 115 | generated = generate_image(template, self.dest, resolution=resolution) 116 | elif self.medium in ("animation", "video"): 117 | generated = generate_video( 118 | template, 119 | self.dest, 120 | duration=duration, 121 | frames=frames, 122 | resolution=resolution, 123 | ) 124 | else: 125 | raise ValueError(f"Unknown medium '{self.medium}'") 126 | self.templates.append(generated) 127 | LOG.debug( 128 | "generated template(s): %s", ", ".join(str(x.file) for x in self.templates) 129 | ) 130 | 131 | def remove_duplicates(self) -> None: 132 | """Remove duplicate generated corpus files. 133 | 134 | Args: 135 | None 136 | 137 | Returns: 138 | None 139 | """ 140 | removed = set() 141 | for file_1, file_2 in product(sorted(self.dest.iterdir()), self.dest.iterdir()): 142 | if file_1 in removed or file_2 in removed or file_1.samefile(file_2): 143 | continue 144 | if cmp(file_1, file_2, shallow=False): 145 | LOG.debug("'%s' matches '%s'", file_1.name, file_2.name) 146 | file_2.unlink() 147 | removed.add(file_2) 148 | LOG.debug("removed %d duplicate(s)", len(removed)) 149 | 150 | def remove_templates(self) -> None: 151 | """Remove template files. 152 | 153 | Args: 154 | None 155 | 156 | Returns: 157 | None 158 | """ 159 | for template in self.templates: 160 | template.unlink() 161 | 162 | 163 | def parse_args(argv: list[str] | None = None) -> Namespace: 164 | """Argument parsing""" 165 | parser = ArgumentParser(description="Generate a corpus.", prog="corpus-replicator") 166 | recipes = {x.name: x for x in list_recipes()} 167 | # common args 168 | parser.add_argument( 169 | "recipes", 170 | nargs="+", 171 | type=Path, 172 | help=f"Recipe files to use. Built-in recipes: {', '.join(sorted(recipes))}", 173 | ) 174 | parser.add_argument( 175 | "--log-level", 176 | choices=sorted({"INFO": INFO, "DEBUG": DEBUG}), 177 | default="INFO", 178 | help="Configure console logging (default: %(default)s).", 179 | ) 180 | parser.add_argument( 181 | "-o", 182 | "--output", 183 | default=Path.cwd() / "generated-corpus", 184 | type=Path, 185 | help="Output destination (default: '%(default)s').", 186 | ) 187 | parser.add_argument( 188 | "--version", 189 | "-V", 190 | action="version", 191 | version=f"%(prog)s {__version__}", 192 | help="Show version number.", 193 | ) 194 | subparsers = parser.add_subparsers( 195 | dest="medium", required=True, help="Type of files to generate." 196 | ) 197 | 198 | # animation args 199 | animation = subparsers.add_parser("animation") 200 | animation.set_defaults(duration=0) 201 | animation.add_argument( 202 | "--frames", 203 | default=5, 204 | type=int, 205 | help="Number of frames (default: %(default)s).", 206 | ) 207 | animation.add_argument( 208 | "-r", 209 | "--resolution", 210 | default="1280x768", 211 | help="Resolution (default: '%(default)s').", 212 | ) 213 | animation.add_argument( 214 | "-t", 215 | "--templates", 216 | default=["all"], 217 | choices=["all", *list(TEMPLATES["video"])], 218 | nargs="+", 219 | help="Template to use (default: all).", 220 | ) 221 | # audio args 222 | audio = subparsers.add_parser("audio") 223 | audio.set_defaults(frames=0, resolution=None) 224 | audio.add_argument( 225 | "-d", 226 | "--duration", 227 | default=1.0, 228 | type=float, 229 | help="Runtime in seconds (default: %(default)ss).", 230 | ) 231 | audio.add_argument( 232 | "-t", 233 | "--templates", 234 | default=["all"], 235 | choices=["all", *list(TEMPLATES["audio"])], 236 | nargs="+", 237 | help="Template to use (default: all).", 238 | ) 239 | # image args 240 | image = subparsers.add_parser("image") 241 | image.set_defaults(frames=0, duration=None) 242 | image.add_argument( 243 | "-r", 244 | "--resolution", 245 | default="1280x768", 246 | help="Resolution (default: '%(default)s').", 247 | ) 248 | image.add_argument( 249 | "-t", 250 | "--templates", 251 | default=["all"], 252 | choices=["all", *list(TEMPLATES["image"])], 253 | nargs="+", 254 | help="Template to use (default: all).", 255 | ) 256 | # video args 257 | video = subparsers.add_parser("video") 258 | video.set_defaults(frames=None) 259 | video.add_argument( 260 | "-d", 261 | "--duration", 262 | default=1.0, 263 | type=float, 264 | help="Runtime in seconds (default: %(default)ss).", 265 | ) 266 | video.add_argument( 267 | "--frames", 268 | default=0, 269 | type=int, 270 | help="Number of frames. Use 0 for no limit. (default: %(default)s).", 271 | ) 272 | video.add_argument( 273 | "-r", 274 | "--resolution", 275 | default="1280x768", 276 | help="Resolution (default: '%(default)s').", 277 | ) 278 | video.add_argument( 279 | "-t", 280 | "--templates", 281 | default=["all"], 282 | choices=["all", *list(TEMPLATES["video"])], 283 | nargs="+", 284 | help="Template to use (default: all).", 285 | ) 286 | 287 | if not ffmpeg_available(): 288 | parser.error("Please install FFmpeg.") 289 | 290 | args = parser.parse_args(argv) 291 | 292 | # look up built-in recipes 293 | checked_recipes = [] 294 | for recipe in args.recipes: 295 | if str(recipe) in recipes: 296 | checked_recipes.append(recipes[str(recipe)]) 297 | elif not recipe.is_file(): 298 | parser.error(f"Recipe file does not exist: '{recipe}'") 299 | else: 300 | checked_recipes.append(recipe) 301 | args.recipes = checked_recipes 302 | 303 | if args.resolution and not is_resolution(args.resolution): 304 | parser.error(f"argument -r/--resolution: invalid value: {args.resolution!r}") 305 | 306 | # handle 'all' in templates 307 | if "all" in args.templates: 308 | # animation shares templates with video 309 | args.templates = TEMPLATES[ 310 | "video" if args.medium == "animation" else args.medium 311 | ] 312 | 313 | return args 314 | 315 | 316 | def main(argv: list[str] | None = None) -> None: 317 | """Main function""" 318 | args = parse_args(argv) 319 | init_logging(args.log_level) 320 | 321 | try: 322 | replicator = Replicator(args.medium, args.output, args.recipes) 323 | except RecipeError as exc: 324 | LOG.error("Error: %s.", exc) 325 | return 326 | 327 | try: 328 | LOG.info("Generating templates...") 329 | replicator.generate_templates( 330 | args.templates, 331 | duration=args.duration, 332 | resolution=args.resolution, 333 | frames=args.frames, 334 | ) 335 | LOG.info( 336 | "%d recipe(s) will be used with %d template(s) to create %d file(s).", 337 | len(replicator.recipes), 338 | len(replicator.templates), 339 | len(replicator), 340 | ) 341 | replicator.generate_corpus() 342 | replicator.remove_templates() 343 | 344 | LOG.info("Optimizing corpus, checking for duplicates...") 345 | replicator.remove_duplicates() 346 | 347 | except KeyboardInterrupt: 348 | LOG.warning("Aborting...") 349 | 350 | finally: 351 | tool_log = Path(TOOL_LOG) 352 | if tool_log.is_file(): 353 | LOG.warning("A tool log is available '%s'.", tool_log.resolve()) 354 | replicator.remove_templates() 355 | 356 | LOG.info("Done.") 357 | -------------------------------------------------------------------------------- /src/corpus_replicator/generate_corpus.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from __future__ import annotations 5 | 6 | from argparse import ArgumentParser, Namespace 7 | from logging import DEBUG, INFO, getLogger 8 | from pathlib import Path 9 | 10 | from .common import CorpusGenerator, Recipe, Template, ToolError, init_logging 11 | from .tools.ffmpeg import FFmpegGenerator, ffmpeg_available 12 | from .tools.imagemagick import ImageMagickGenerator, imagemagick_available 13 | 14 | LOG = getLogger(__name__) 15 | 16 | 17 | def load_generator(recipe: Recipe, dest: Path) -> CorpusGenerator: 18 | """Load a specific generator to use to create the corpus. 19 | 20 | Args: 21 | recipe: Recipe to use to generate corpus. 22 | dest: Location to place generated corpus. 23 | 24 | Returns: 25 | A corpus generator. 26 | """ 27 | generator: CorpusGenerator | None = None 28 | if recipe.tool == "ffmpeg": 29 | if not ffmpeg_available(): 30 | raise ToolError("FFmpeg is not available") 31 | generator = FFmpegGenerator(recipe, dest) 32 | elif recipe.tool == "imagemagick": 33 | if not imagemagick_available(): 34 | raise ToolError("ImageMagick is not available") 35 | generator = ImageMagickGenerator(recipe, dest) 36 | else: 37 | raise ToolError(f"Unsupported tool {recipe.tool!r}") 38 | assert generator is not None 39 | return generator 40 | 41 | 42 | def main(argv: list[str] | None = None) -> None: 43 | """Main function""" 44 | args = parse_args(argv) 45 | init_logging(args.log_level) 46 | 47 | generator = load_generator(Recipe(args.recipe), args.output) 48 | generator.add_template(Template(args.template_name, args.template_file)) 49 | args.output.mkdir(parents=True, exist_ok=True) 50 | generator.generate() 51 | 52 | 53 | def parse_args(argv: list[str] | None = None) -> Namespace: 54 | """Argument parsing""" 55 | parser = ArgumentParser( 56 | description="Generate a corpus from a recipe and template file." 57 | ) 58 | parser.add_argument("recipe", type=Path, help="Recipe file.") 59 | parser.add_argument("template_file", type=Path, help="File to use as template.") 60 | parser.add_argument( 61 | "--log-level", 62 | choices=sorted({"INFO": INFO, "DEBUG": DEBUG}), 63 | default="INFO", 64 | help="Configure console logging (default: %(default)s).", 65 | ) 66 | parser.add_argument( 67 | "-o", 68 | "--output", 69 | default=Path.cwd() / "generated-corpus", 70 | type=Path, 71 | help="Output destination (default: %(default)s).", 72 | ) 73 | parser.add_argument( 74 | "-n", 75 | "--template-name", 76 | default="custom", 77 | help="Template name used when naming generated files (default: %(default)s).", 78 | ) 79 | 80 | args = parser.parse_args(argv) 81 | 82 | if not args.recipe.is_file(): 83 | parser.error(f"Recipe file does not exist: '{args.recipe}'") 84 | 85 | if not args.template_file.is_file(): 86 | parser.error(f"Template file does not exist: '{args.template_file}'") 87 | 88 | return args 89 | 90 | 91 | if __name__ == "__main__": 92 | main() 93 | -------------------------------------------------------------------------------- /src/corpus_replicator/generate_template.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from __future__ import annotations 5 | 6 | from argparse import ArgumentParser, Namespace 7 | from logging import DEBUG, INFO, getLogger 8 | from pathlib import Path 9 | 10 | from .common import Template, init_logging, is_resolution, run_tool 11 | from .tools.ffmpeg import FFMPEG_BIN, ffmpeg_available 12 | 13 | LOG = getLogger(__name__) 14 | 15 | TEMPLATES = { 16 | "audio": ( 17 | "noise", 18 | "silence", 19 | "sine", 20 | "test", 21 | ), 22 | "image": ( 23 | "noise", 24 | "solid", 25 | "test", 26 | ), 27 | "video": ( 28 | "noise", 29 | "solid", 30 | "test", 31 | ), 32 | } 33 | 34 | 35 | def generate_audio(template: str, dest: Path, duration: float = 3.0) -> Template: 36 | """Generate audio template file. 37 | 38 | Args: 39 | template: Content to generate. 40 | dest: Location to create file. 41 | duration: Target playback duration. 42 | 43 | Returns: 44 | Template containing generated content information. 45 | """ 46 | assert duration > 0 47 | assert template in TEMPLATES["audio"] 48 | cmd = [FFMPEG_BIN, "-y", "-f", "lavfi", "-i"] 49 | if template == "noise": 50 | cmd.append("anoisesrc=a=0.1:c=white") 51 | elif template == "silence": 52 | cmd.append("anullsrc") 53 | elif template == "sine": 54 | cmd.append("sine=frequency=880") 55 | elif template == "test": 56 | cmd.append("aevalsrc=sin(2*PI*(360-2.5/2)*t)|sin(2*PI*(360+2.5/2)*t)") 57 | cmd.extend(["-c:a", "pcm_s16le"]) 58 | cmd.extend(["-t", f"{duration:0.0f}"]) 59 | dst: Path = dest / f"template-audio-{template}.wav" 60 | cmd.append(str(dst)) 61 | run_tool(cmd) 62 | return Template(template, dst) 63 | 64 | 65 | def generate_image(template: str, dest: Path, resolution: str = "1280x768") -> Template: 66 | """Generate image template file. 67 | 68 | Args: 69 | template: Content to generate. 70 | dest: Location to create file. 71 | resolution: Target content resolution. 72 | 73 | Returns: 74 | Template containing generated content information. 75 | """ 76 | assert template in TEMPLATES["image"] 77 | cmd = [FFMPEG_BIN, "-y", "-f", "lavfi", "-i"] 78 | if template == "noise": 79 | cmd.append(f"color=c=gray:s={resolution}, noise=alls=100:allf=t") 80 | elif template == "solid": 81 | cmd.append("color=c=red") 82 | elif template == "test": 83 | cmd.append(f"testsrc=s={resolution}") 84 | cmd.extend(["-frames", "1"]) 85 | dst: Path = dest / f"template-image-{template}-{resolution}.png" 86 | cmd.append(str(dst)) 87 | run_tool(cmd) 88 | return Template(template, dst) 89 | 90 | 91 | def generate_video( 92 | template: str, 93 | dest: Path, 94 | duration: float = 2.0, 95 | frames: int = 0, 96 | resolution: str = "1280x768", 97 | ) -> Template: 98 | """Generate video template file. 99 | 100 | Args: 101 | template: Content to generate. 102 | dest: Location to create file. 103 | duration: Target playback duration. 104 | frames: Number of frames to generate. 105 | resolution: Target content resolution. 106 | 107 | Returns: 108 | Template containing generated content information. 109 | """ 110 | assert duration > 0 or frames > 0 111 | assert template in TEMPLATES["video"] 112 | cmd = [FFMPEG_BIN, "-y", "-f", "lavfi", "-i"] 113 | if template == "noise": 114 | cmd.append(f"color=c=gray:s={resolution}, noise=alls=100:allf=t") 115 | elif template == "solid": 116 | cmd.append("color=c=red") 117 | elif template == "test": 118 | cmd.append(f"testsrc2=s={resolution}") 119 | cmd.extend(["-pix_fmt", "yuv420p"]) 120 | cmd.extend(["-c:v", "libx264"]) 121 | if frames > 0: 122 | cmd.extend(["-frames", str(frames)]) 123 | else: 124 | cmd.extend(["-t", f"{duration:0.0f}"]) 125 | cmd.extend(["-crf", "17"]) 126 | dst: Path = dest / f"template-video-{template}-{resolution}.mp4" 127 | cmd.append(str(dst)) 128 | run_tool(cmd) 129 | return Template(template, dst) 130 | 131 | 132 | def main(argv: list[str] | None = None) -> None: 133 | """Main function""" 134 | args = parse_args(argv) 135 | init_logging(args.log_level) 136 | 137 | args.output.mkdir(parents=True, exist_ok=True) 138 | if args.medium == "audio": 139 | output = generate_audio(args.template, args.output, duration=args.duration) 140 | elif args.medium == "image": 141 | output = generate_image(args.template, args.output, resolution=args.resolution) 142 | elif args.medium == "video": 143 | output = generate_video( 144 | args.template, 145 | args.output, 146 | duration=args.duration, 147 | frames=args.frames, 148 | resolution=args.resolution, 149 | ) 150 | else: # pragma: no cover 151 | # this should be handle by parse_args() 152 | raise ValueError(f"Unknown medium '{args.medium}'") 153 | 154 | LOG.info("Created '%s'", output.file) 155 | 156 | 157 | def parse_args(argv: list[str] | None = None) -> Namespace: 158 | """Argument parsing""" 159 | parser = ArgumentParser(description="Generate a template file.") 160 | # common args 161 | parser.add_argument( 162 | "-o", 163 | "--output", 164 | default=Path.cwd(), 165 | type=Path, 166 | help="Output destination.", 167 | ) 168 | parser.add_argument( 169 | "--log-level", 170 | choices=sorted({"INFO": INFO, "DEBUG": DEBUG}), 171 | default="INFO", 172 | help="Configure console logging (default: %(default)s).", 173 | ) 174 | subparsers = parser.add_subparsers(dest="medium", required=True) 175 | # audio args 176 | audio = subparsers.add_parser("audio") 177 | audio.set_defaults(frames=0, resolution=None) 178 | audio.add_argument("template", choices=TEMPLATES["audio"]) 179 | audio.add_argument( 180 | "-d", "--duration", default=2.0, type=float, help="Runtime in seconds." 181 | ) 182 | # image args 183 | image = subparsers.add_parser("image") 184 | image.set_defaults(frames=0, duration=None) 185 | image.add_argument("template", choices=TEMPLATES["image"]) 186 | image.add_argument("-r", "--resolution", default="1280x768") 187 | # video args 188 | video = subparsers.add_parser("video") 189 | video.add_argument("template", choices=TEMPLATES["video"]) 190 | video.add_argument("--frames", type=int, default=0) 191 | video.add_argument( 192 | "-d", "--duration", default=2.0, type=float, help="Runtime in seconds." 193 | ) 194 | video.add_argument("-r", "--resolution", default="1280x768") 195 | 196 | if not ffmpeg_available(): 197 | parser.error("Please install FFmpeg.") 198 | 199 | args = parser.parse_args(argv) 200 | 201 | if args.resolution and not is_resolution(args.resolution): 202 | parser.error(f"argument -r/--resolution: invalid value: {args.resolution!r}") 203 | 204 | return args 205 | 206 | 207 | if __name__ == "__main__": 208 | main() 209 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/animation_apng_png_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "apng" 3 | container: "png" 4 | library: "ffmpeg" 5 | medium: "animation" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-f", "apng"] 10 | resolution: 11 | ["-s", "320x240"] 12 | 13 | variation: 14 | loop: 15 | - ["-plays", "0"] 16 | - ["-plays", "1"] 17 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/animation_gif_gif_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "gif" 3 | container: "gif" 4 | library: "ffmpeg" 5 | medium: "animation" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | loop: 13 | - ["-plays", "0"] 14 | - ["-plays", "1"] 15 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/animation_webp_webp_libwebp.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "webp" 3 | container: "webp" 4 | library: "libwebp" 5 | medium: "animation" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | loop: 13 | - ["-loop", "0"] 14 | - ["-loop", "1"] 15 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_aac_mp4_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "aac" 3 | container: "mp4" 4 | library: "ffmpeg" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:a", "aac"] 10 | 11 | variation: 12 | vbr: 13 | - ["-vbr", "1"] 14 | - ["-vbr", "3"] 15 | - ["-vbr", "5"] 16 | cbr: 17 | - ["-b:a", "64k"] 18 | channels: 19 | - ["-ac", "1"] 20 | - ["-ac", "2"] 21 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_flac_flac_libflac.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "flac" 3 | container: "flac" 4 | library: "ffmpeg" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: {} 8 | 9 | variation: 10 | bit-depth: 11 | - ["-sample_fmt", "s32"] 12 | channels: 13 | - ["-ac", "1"] 14 | - ["-ac", "2"] 15 | compression: 16 | - ["-compression_level", "0"] 17 | - ["-compression_level", "3"] 18 | - ["-compression_level", "6"] 19 | - ["-compression_level", "9"] 20 | - ["-compression_level", "12"] 21 | metadata: 22 | - ["-write_id3v2", "1", "-metadata", "artist=foo"] 23 | sample-rate: 24 | - ["-ar", "8000"] 25 | - ["-ar", "11025"] 26 | - ["-ar", "12000"] 27 | - ["-ar", "16000"] 28 | - ["-ar", "22050"] 29 | - ["-ar", "24000"] 30 | - ["-ar", "32000"] 31 | - ["-ar", "44100"] 32 | - ["-ar", "48000"] 33 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_mp3_mp3_libmp3lame.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "mp3" 3 | container: "mp3" 4 | library: "libmp3lame" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:a", "libmp3lame"] 10 | 11 | variation: 12 | abr: 13 | - ["-abr", "1"] 14 | vbr: 15 | - ["-q:a", "0"] 16 | - ["-q:a", "5"] 17 | - ["-q:a", "9"] 18 | cbr: 19 | - ["-b:a", "32k"] 20 | - ["-b:a", "64k"] 21 | channels: 22 | # mono 23 | - ["-ac", "1"] 24 | # disable joint stereo 25 | - ["-ac", "2", "-joint_stereo", "0"] 26 | # with joint stereo 27 | - ["-ac", "2", "-joint_stereo", "1"] 28 | id3: 29 | - ["-write_id3v2", "0"] 30 | - ["-write_id3v2", "1", "-id3v2_version", "3"] 31 | - ["-write_id3v2", "1", "-id3v2_version", "4"] 32 | metadata: 33 | - ["-metadata", "artist='Someone'"] 34 | # strip meta data 35 | - ["-flags:a", "+bitexact"] 36 | sample-rate: 37 | - ["-ar", "8000"] 38 | - ["-ar", "11025"] 39 | - ["-ar", "12000"] 40 | - ["-ar", "16000"] 41 | - ["-ar", "22050"] 42 | - ["-ar", "24000"] 43 | - ["-ar", "32000"] 44 | - ["-ar", "44100"] 45 | - ["-ar", "48000"] 46 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_mp3_mp3_shine.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "mp3" 3 | container: "mp3" 4 | library: "libshine" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:a", "libshine"] 10 | 11 | variation: 12 | cbr: 13 | - ["-b:a", "32k"] 14 | - ["-b:a", "64k"] 15 | channels: 16 | - ["-ac", "1"] 17 | - ["-ac", "2"] 18 | sample-rate: 19 | - ["-ar", "32000"] 20 | - ["-ar", "44100"] 21 | - ["-ar", "48000"] 22 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_opus_opus_libopus.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "opus" 3 | container: "opus" 4 | library: "libopus" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:a", "libopus"] 10 | 11 | variation: 12 | application: 13 | - ["-application", "lowdelay"] 14 | - ["-application", "voip"] 15 | vbr: 16 | - ["-vbr", "on", "-b:a", "500"] 17 | - ["-vbr", "on", "-b:a", "16k"] 18 | - ["-vbr", "on", "-b:a", "64k"] 19 | - ["-vbr", "on", "-b:a", "128k"] 20 | - ["-vbr", "on", "-b:a", "256k"] 21 | - ["-vbr", "constrained", "-b:a", "64k"] 22 | cbr: 23 | - ["-vbr", "off", "-b:a", "16k"] 24 | - ["-vbr", "off", "-b:a", "32k"] 25 | - ["-vbr", "off", "-b:a", "64k"] 26 | channels: 27 | - ["-ac", "1"] 28 | - ["-ac", "2"] 29 | - ["-ac", "6"] 30 | compression-level: 31 | - ["-compression_level", "0"] 32 | - ["-compression_level", "3"] 33 | - ["-compression_level", "6"] 34 | - ["-compression_level", "10"] 35 | frame-duration: 36 | - ["-frame_duration", "2.5"] 37 | - ["-frame_duration", "5"] 38 | - ["-frame_duration", "10"] 39 | - ["-frame_duration", "20"] 40 | - ["-frame_duration", "40"] 41 | - ["-frame_duration", "60"] 42 | sample-rate: 43 | - ["-ar", "8000"] 44 | - ["-ar", "12000"] 45 | - ["-ar", "16000"] 46 | - ["-ar", "24000"] 47 | - ["-ar", "48000"] 48 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_pcm_wav_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "pcm" 3 | container: "wav" 4 | library: "ffmpeg" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:a", "pcm_s16le"] 10 | 11 | variation: 12 | encoder: 13 | - ["-c:a", "pcm_u8", "-ar", "1600"] 14 | - ["-c:a", "pcm_s24le"] 15 | - ["-c:a", "pcm_s32le"] 16 | channels: 17 | - ["-ac", "1"] 18 | - ["-ac", "2"] 19 | sample-rate: 20 | - ["-ar", "8000"] 21 | - ["-ar", "11025"] 22 | - ["-ar", "12000"] 23 | - ["-ar", "16000"] 24 | - ["-ar", "22050"] 25 | - ["-ar", "24000"] 26 | - ["-ar", "32000"] 27 | - ["-ar", "44100"] 28 | - ["-ar", "48000"] 29 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/audio_vorbis_ogg_libvorbis.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "vorbis" 3 | container: "ogg" 4 | library: "libvorbis" 5 | medium: "audio" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:a", "libvorbis"] 10 | 11 | variation: 12 | vbr: 13 | - ["-q:a", "0"] 14 | - ["-q:a", "5"] 15 | - ["-q:a", "9"] 16 | channels: 17 | - ["-ac", "1"] 18 | - ["-ac", "2"] 19 | sample-rate: 20 | - ["-ar", "8000"] 21 | - ["-ar", "11025"] 22 | - ["-ar", "12000"] 23 | - ["-ar", "16000"] 24 | - ["-ar", "22050"] 25 | - ["-ar", "24000"] 26 | - ["-ar", "32000"] 27 | - ["-ar", "44100"] 28 | - ["-ar", "48000"] 29 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_avif_avif_imagemagick.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "avif" 3 | container: "avif" 4 | library: "imagemagick" 5 | medium: "image" 6 | tool: "imagemagick" 7 | default_flags: 8 | resolution: 9 | ["-resize", "320x240"] 10 | 11 | variation: 12 | resolution: 13 | - ["-resize", "1x1"] 14 | - ["-resize", "16x16"] 15 | - ["-resize", "18x32"] 16 | - ["-resize", "64x64"] 17 | - ["-resize", "320x180"] 18 | - ["-resize", "320x240"] 19 | - ["-resize", "480x320"] 20 | - ["-resize", "1280x720"] 21 | monochrome: 22 | - ["-monochrome"] 23 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_bmp_bmp_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "bmp" 3 | container: "bmp" 4 | library: "ffmpeg" 5 | medium: "image" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | bit-depth: 13 | - ["-pix_fmt:v", "monob"] 14 | - ["-pix_fmt:v", "gray"] 15 | - ["-pix_fmt:v", "rgb4"] 16 | - ["-pix_fmt:v", "rgb8"] 17 | - ["-pix_fmt:v", "rgb24"] 18 | - ["-pix_fmt:v", "rgba"] 19 | resolution: 20 | - ["-s", "1x1"] 21 | - ["-s", "16x16"] 22 | - ["-s", "18x32"] 23 | - ["-s", "64x64"] 24 | - ["-s", "320x180"] 25 | - ["-s", "320x240"] 26 | - ["-s", "480x320"] 27 | - ["-s", "1280x720"] 28 | - ["-s", "1920x1080"] 29 | - ["-s", "4096x2160"] 30 | monochrome: 31 | - ["-vf", "hue=s=0"] 32 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_gif_gif_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "gif" 3 | container: "gif" 4 | library: "ffmpeg" 5 | medium: "image" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | bit-depth: 13 | - ["-pix_fmt:v", "monob"] 14 | - ["-pix_fmt:v", "gray"] 15 | - ["-pix_fmt:v", "rgb4"] 16 | - ["-pix_fmt:v", "rgb8"] 17 | - ["-pix_fmt:v", "rgb24"] 18 | - ["-pix_fmt:v", "rgba"] 19 | resolution: 20 | - ["-s", "1x1"] 21 | - ["-s", "16x16"] 22 | - ["-s", "18x32"] 23 | - ["-s", "64x64"] 24 | - ["-s", "320x180"] 25 | - ["-s", "320x240"] 26 | - ["-s", "480x320"] 27 | - ["-s", "1280x720"] 28 | - ["-s", "1920x1080"] 29 | - ["-s", "4096x2160"] 30 | monochrome: 31 | - ["-vf", "hue=s=0"] 32 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_heic_heic_imagemagick.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "heic" 3 | container: "heic" 4 | library: "imagemagick" 5 | medium: "image" 6 | tool: "imagemagick" 7 | default_flags: 8 | resolution: 9 | ["-resize", "320x240"] 10 | 11 | variation: 12 | resolution: 13 | - ["-resize", "1x1"] 14 | - ["-resize", "16x16"] 15 | - ["-resize", "18x32"] 16 | - ["-resize", "64x64"] 17 | - ["-resize", "320x180"] 18 | - ["-resize", "320x240"] 19 | - ["-resize", "480x320"] 20 | - ["-resize", "1280x720"] 21 | monochrome: 22 | - ["-monochrome"] 23 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_ico_ico_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "ico" 3 | container: "ico" 4 | library: "ffmpeg" 5 | medium: "image" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "32x32"] 10 | 11 | variation: 12 | bit-depth: 13 | - ["-pix_fmt:v", "pal8"] 14 | - ["-pix_fmt:v", "rgb24"] 15 | - ["-pix_fmt:v", "rgba"] 16 | resolution: 17 | - ["-s", "1x1"] 18 | - ["-s", "16x16"] 19 | - ["-s", "18x32"] 20 | - ["-s", "64x64"] 21 | - ["-s", "256x256"] 22 | monochrome: 23 | - ["-vf", "hue=s=0"] 24 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_jpg_jpg_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "jpeg" 3 | container: "jpg" 4 | library: "ffmpeg" 5 | medium: "image" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | bitdepth: 13 | - ["-pix_fmt:v", "monob"] 14 | - ["-pix_fmt:v", "gray"] 15 | - ["-pix_fmt:v", "rgb4"] 16 | - ["-pix_fmt:v", "rgb8"] 17 | - ["-pix_fmt:v", "rgb24"] 18 | - ["-pix_fmt:v", "rgba"] 19 | quality: 20 | - ["-q:v", "0"] 21 | - ["-q:v", "1"] 22 | - ["-q:v", "5"] 23 | - ["-q:v", "10"] 24 | - ["-q:v", "20"] 25 | - ["-q:v", "31"] 26 | resolution: 27 | - ["-s", "18x32"] 28 | - ["-s", "32x18"] 29 | - ["-s", "64x64"] 30 | - ["-s", "320x180"] 31 | - ["-s", "320x200"] 32 | - ["-s", "480x320"] 33 | - ["-s", "1280x720"] 34 | - ["-s", "1920x1080"] 35 | - ["-s", "2560x1080"] 36 | - ["-s", "4096x2160"] 37 | monochrome: 38 | - ["-vf", "hue=s=0"] 39 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_png_png_ffmpeg.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "png" 3 | container: "png" 4 | library: "ffmpeg" 5 | medium: "image" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | bit-depth: 13 | - ["-pix_fmt:v", "monob"] 14 | - ["-pix_fmt:v", "gray"] 15 | - ["-pix_fmt:v", "rgb4"] 16 | - ["-pix_fmt:v", "rgb8"] 17 | - ["-pix_fmt:v", "rgb24"] 18 | - ["-pix_fmt:v", "rgba"] 19 | resolution: 20 | - ["-s", "18x32"] 21 | - ["-s", "32x18"] 22 | - ["-s", "64x64"] 23 | - ["-s", "320x180"] 24 | - ["-s", "320x200"] 25 | - ["-s", "480x320"] 26 | - ["-s", "1280x720"] 27 | - ["-s", "1920x1080"] 28 | - ["-s", "2560x1080"] 29 | - ["-s", "4096x2160"] 30 | monochrome: 31 | - ["-vf", "hue=s=0"] 32 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/image_webp_webp_libwebp.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "webp" 3 | container: "webp" 4 | library: "libwebp" 5 | medium: "image" 6 | tool: "ffmpeg" 7 | default_flags: 8 | resolution: 9 | ["-s", "320x240"] 10 | 11 | variation: 12 | bit-depth: 13 | - ["-pix_fmt:v", "monob"] 14 | - ["-pix_fmt:v", "gray"] 15 | - ["-pix_fmt:v", "rgb4"] 16 | - ["-pix_fmt:v", "rgb8"] 17 | - ["-pix_fmt:v", "rgb24"] 18 | - ["-pix_fmt:v", "rgba"] 19 | resolution: 20 | - ["-s", "18x32"] 21 | - ["-s", "32x18"] 22 | - ["-s", "64x64"] 23 | - ["-s", "320x180"] 24 | - ["-s", "320x200"] 25 | - ["-s", "480x320"] 26 | - ["-s", "1280x720"] 27 | - ["-s", "1920x1080"] 28 | - ["-s", "4096x2160"] 29 | lossless: 30 | - ["-lossless", "1"] 31 | quality: 32 | - ["-quality", "0"] 33 | - ["-quality", "25"] 34 | - ["-quality", "50"] 35 | - ["-quality", "75"] 36 | - ["-quality", "100"] 37 | preset: 38 | - ["-preset", "picture"] 39 | - ["-preset", "photo"] 40 | - ["-preset", "drawing"] 41 | - ["-preset", "icon"] 42 | - ["-preset", "text"] 43 | monochrome: 44 | - ["-vf", "hue=s=0"] 45 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "description": "corpus-replicator recipe schema", 4 | "type": "object", 5 | "additionalProperties": false, 6 | "properties": { 7 | "base": { 8 | "type": "object", 9 | "additionalProperties": false, 10 | "properties": { 11 | "codec": { 12 | "type": "string", 13 | "pattern": "^[a-zA-Z0-9-]+$" 14 | }, 15 | "container": { 16 | "type": "string", 17 | "pattern": "^[a-zA-Z0-9-]+$" 18 | }, 19 | "library": { 20 | "type": "string", 21 | "pattern": "^[a-zA-Z0-9-]+$" 22 | }, 23 | "medium": { 24 | "type": "string", 25 | "pattern": "animation|audio|image|video" 26 | }, 27 | "tool": { 28 | "type": "string", 29 | "pattern": "ffmpeg|imagemagick" 30 | }, 31 | "default_flags": { 32 | "type": "object", 33 | "additionalProperties": false, 34 | "patternProperties": { 35 | "^[a-zA-Z0-9-]+$": { 36 | "type": "array", 37 | "minItems": 1, 38 | "items": { 39 | "type": "string", 40 | "pattern": "^\\S+$" 41 | } 42 | } 43 | } 44 | } 45 | }, 46 | "required": [ 47 | "codec", 48 | "container", 49 | "library", 50 | "medium", 51 | "tool", 52 | "default_flags" 53 | ] 54 | }, 55 | "variation": { 56 | "type": "object", 57 | "additionalProperties": false, 58 | "minProperties": 1, 59 | "patternProperties": { 60 | "^[a-zA-Z0-9-]+$": { 61 | "type": "array", 62 | "minItems": 1, 63 | "items": { 64 | "type": "array", 65 | "minItems": 1, 66 | "items": { 67 | "type": "string", 68 | "pattern": "^\\S+$" 69 | } 70 | } 71 | } 72 | } 73 | } 74 | }, 75 | "required": [ 76 | "base", 77 | "variation" 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_av1_webm_libaom.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "av1" 3 | container: "webm" 4 | library: "libaom" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libaom-av1"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "320x240"] 16 | multi-thread: 17 | ["-cpu-used", "2", "-row-mt", "1"] 18 | 19 | variation: 20 | resolution: 21 | - ["-s", "18x32"] 22 | - ["-s", "32x18"] 23 | - ["-s", "64x64"] 24 | - ["-s", "320x180"] 25 | - ["-s", "320x200"] 26 | - ["-s", "480x320"] 27 | - ["-s", "1280x720"] 28 | - ["-s", "1920x1080"] 29 | - ["-s", "4096x2160"] 30 | quality: 31 | - ["-crf", "20"] 32 | - ["-crf", "30"] 33 | - ["-crf", "63"] 34 | monochrome: 35 | - ["-vf", "hue=s=0"] 36 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_h264_264_libx264.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "h264" 3 | container: "264" 4 | library: "libx264" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libx264"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "64x64"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "160x90"] 22 | - ["-s", "320x240"] 23 | profile: 24 | - ["-profile:v", "baseline"] 25 | - ["-profile:v", "high"] 26 | - ["-profile:v", "main"] 27 | tune: 28 | - ["-tune", "animation"] 29 | - ["-tune", "fastdecode"] 30 | - ["-tune", "film"] 31 | - ["-tune", "zerolatency"] 32 | monochrome: 33 | - ["-vf", "hue=s=0"] 34 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_h264_mp4_libx264.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "h264" 3 | container: "mp4" 4 | library: "libx264" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libx264"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "320x240"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "64x64"] 22 | - ["-s", "320x180"] 23 | - ["-s", "480x320"] 24 | - ["-s", "1280x720"] 25 | - ["-s", "1920x1080"] 26 | - ["-s", "4096x2160"] 27 | profile: 28 | - ["-profile:v", "baseline"] 29 | - ["-profile:v", "high"] 30 | - ["-profile:v", "main"] 31 | tune: 32 | - ["-tune", "animation"] 33 | - ["-tune", "fastdecode"] 34 | - ["-tune", "film"] 35 | - ["-tune", "zerolatency"] 36 | monochrome: 37 | - ["-vf", "hue=s=0"] 38 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_h265_mp4_libx265.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "h265" 3 | container: "mp4" 4 | library: "libx265" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libx265"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "240x180"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "48x48"] 22 | - ["-s", "180x240"] 23 | - ["-s", "320x180"] 24 | - ["-s", "1280x720"] 25 | preset: 26 | - ["-preset", "ultrafast"] 27 | profile: 28 | - ["-profile:v", "main"] 29 | - ["-profile:v", "main10"] 30 | tune: 31 | - ["-tune", "fastdecode"] 32 | - ["-tune", "grain"] 33 | - ["-tune", "psnr"] 34 | - ["-tune", "ssim"] 35 | - ["-tune", "zerolatency"] 36 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_theora_ogg_libtheora.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "theora" 3 | container: "ogg" 4 | library: "libtheora" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libtheora"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "320x240"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "64x64"] 22 | - ["-s", "320x180"] 23 | - ["-s", "320x200"] 24 | - ["-s", "480x320"] 25 | - ["-s", "1280x720"] 26 | - ["-s", "1920x1080"] 27 | - ["-s", "4096x2160"] 28 | quality: 29 | - ["-qscale:v", "0"] 30 | - ["-qscale:v", "5"] 31 | - ["-qscale:v", "10"] 32 | monochrome: 33 | - ["-vf", "hue=s=0"] 34 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_vp8_webm_libvpx.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "vp8" 3 | container: "webm" 4 | library: "libvpx" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libvpx"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "320x240"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "64x64"] 22 | - ["-s", "320x180"] 23 | - ["-s", "320x200"] 24 | - ["-s", "480x320"] 25 | - ["-s", "1280x720"] 26 | - ["-s", "1920x1080"] 27 | - ["-s", "4096x2160"] 28 | deadline: 29 | - ["-deadline", "best"] 30 | - ["-deadline", "good"] 31 | - ["-deadline", "realtime"] 32 | monochrome: 33 | - ["-vf", "hue=s=0"] 34 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_vp9_webm_libvpx.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "vp9" 3 | container: "webm" 4 | library: "libvpx-vp9" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libvpx-vp9"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "320x240"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "64x64"] 22 | - ["-s", "320x180"] 23 | - ["-s", "320x200"] 24 | - ["-s", "480x320"] 25 | - ["-s", "1280x720"] 26 | - ["-s", "1920x1080"] 27 | - ["-s", "4096x2160"] 28 | deadline: 29 | - ["-deadline", "best"] 30 | - ["-deadline", "good"] 31 | - ["-deadline", "realtime"] 32 | monochrome: 33 | - ["-vf", "hue=s=0"] 34 | -------------------------------------------------------------------------------- /src/corpus_replicator/recipes/video_xvid_mp4_libxvid.yml: -------------------------------------------------------------------------------- 1 | base: 2 | codec: "xvid" 3 | container: "mp4" 4 | library: "libxvid" 5 | medium: "video" 6 | tool: "ffmpeg" 7 | default_flags: 8 | encoder: 9 | ["-c:v", "libxvid"] 10 | disable-audio: 11 | ["-an"] 12 | frames: 13 | ["-frames", "20"] 14 | resolution: 15 | ["-s", "320x240"] 16 | 17 | variation: 18 | resolution: 19 | - ["-s", "18x32"] 20 | - ["-s", "32x18"] 21 | - ["-s", "64x64"] 22 | - ["-s", "320x180"] 23 | - ["-s", "320x200"] 24 | - ["-s", "480x320"] 25 | - ["-s", "1280x720"] 26 | - ["-s", "1920x1080"] 27 | - ["-s", "4096x2160"] 28 | quality: 29 | - ["-qscale:v", "5"] 30 | - ["-qscale:v", "25"] 31 | - ["-qscale:v", "31"] 32 | monochrome: 33 | - ["-vf", "hue=s=0"] 34 | -------------------------------------------------------------------------------- /src/corpus_replicator/test_common.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from pytest import mark, raises 7 | from yaml import safe_dump 8 | 9 | from .common import ( 10 | CorpusGenerator, 11 | Recipe, 12 | RecipeError, 13 | Template, 14 | is_resolution, 15 | list_recipes, 16 | run_tool, 17 | ) 18 | 19 | SAMPLE_RECIPE = """ 20 | base: 21 | codec: "codec" 22 | container: "container" 23 | library: "library" 24 | medium: "audio" 25 | tool: "ffmpeg" 26 | default_flags: 27 | encoder: 28 | ["-c:a", "mp3"] 29 | 30 | variation: 31 | vbr: 32 | - ["-vbr", "1"] 33 | - ["-vbr", "3"] 34 | - ["-vbr", "5"] 35 | cbr: 36 | - ["-b:a", "64k"] 37 | """ 38 | 39 | 40 | def test_recipe_01(tmp_path): 41 | """test Recipe()""" 42 | recipe_file = tmp_path / "recipe.yml" 43 | recipe_file.write_text(SAMPLE_RECIPE) 44 | recipe = Recipe(recipe_file) 45 | assert recipe.container == "container" 46 | assert recipe.library == "library" 47 | assert recipe.medium == "audio" 48 | assert recipe.codec == "codec" 49 | assert recipe.tool == "ffmpeg" 50 | 51 | assert len(recipe) == 4 52 | 53 | for active, _, flags in recipe: 54 | assert active in ("cbr", "vbr") 55 | assert "-c:a" in flags 56 | if active == "cbr": 57 | assert "-b:a" in flags 58 | assert "-vbr" not in flags 59 | elif active == "vbr": 60 | assert "-b:a" not in flags 61 | assert "-vbr" in flags 62 | else: 63 | assert False 64 | assert len(flags) == 4 65 | 66 | 67 | @mark.parametrize( 68 | "data, msg", 69 | [ 70 | # unsupported tool 71 | ( 72 | { 73 | "base": { 74 | "codec": "codec", 75 | "container": "container", 76 | "library": "library", 77 | "medium": "audio", 78 | "tool": "x", 79 | "default_flags": {}, 80 | }, 81 | "variation": { 82 | "a": [["-a"]], 83 | }, 84 | }, 85 | "Recipe tool 'x' unsupported", 86 | ), 87 | # unsupported medium 88 | ( 89 | { 90 | "base": { 91 | "codec": "codec", 92 | "container": "container", 93 | "library": "library", 94 | "medium": "x", 95 | "tool": "ffmpeg", 96 | "default_flags": {}, 97 | }, 98 | "variation": { 99 | "a": [["-a"]], 100 | }, 101 | }, 102 | "Recipe medium 'x' unsupported", 103 | ), 104 | # missing codec 105 | ( 106 | { 107 | "base": { 108 | "container": "container", 109 | "library": "library", 110 | "medium": "audio", 111 | "tool": "ffmpeg", 112 | "default_flags": {}, 113 | }, 114 | "variation": { 115 | "a": [["-a"]], 116 | }, 117 | }, 118 | "Recipe missing entry 'codec'", 119 | ), 120 | # invalid codec entry 121 | ( 122 | { 123 | "base": { 124 | "codec": 1, 125 | "container": "container", 126 | "library": "library", 127 | "medium": "audio", 128 | "tool": "ffmpeg", 129 | "default_flags": {}, 130 | }, 131 | "variation": { 132 | "a": [["-a"]], 133 | }, 134 | }, 135 | "Recipe 'codec' entry is invalid", 136 | ), 137 | # empty codec entry 138 | ( 139 | { 140 | "base": { 141 | "codec": "", 142 | "container": "container", 143 | "library": "library", 144 | "medium": "audio", 145 | "tool": "ffmpeg", 146 | "default_flags": {}, 147 | }, 148 | "variation": { 149 | "a": [["-a"]], 150 | }, 151 | }, 152 | "Recipe 'codec' entry is invalid", 153 | ), 154 | # missing base 155 | ( 156 | { 157 | "variation": { 158 | "a": [["-a"]], 159 | }, 160 | }, 161 | "Recipe missing entry 'base'", 162 | ), 163 | # missing variation 164 | ( 165 | { 166 | "base": { 167 | "codec": "codec", 168 | "container": "container", 169 | "library": "library", 170 | "medium": "audio", 171 | "tool": "ffmpeg", 172 | "default_flags": {}, 173 | }, 174 | }, 175 | "Recipe missing entry 'variation'", 176 | ), 177 | # incomplete default flags 178 | ( 179 | { 180 | "base": { 181 | "codec": "codec", 182 | "container": "container", 183 | "library": "library", 184 | "medium": "audio", 185 | "tool": "ffmpeg", 186 | "default_flags": {"a": None, "b": ["-b"]}, 187 | }, 188 | "variation": { 189 | "a": [["-a"]], 190 | }, 191 | }, 192 | "Recipe 'default_flags' 'a' is incomplete", 193 | ), 194 | # invalid default flags entry 195 | ( 196 | { 197 | "base": { 198 | "codec": "codec", 199 | "container": "container", 200 | "library": "library", 201 | "medium": "audio", 202 | "tool": "ffmpeg", 203 | "default_flags": {"a": [1]}, 204 | }, 205 | "variation": { 206 | "a": [["-a"]], 207 | }, 208 | }, 209 | "Recipe 'default_flags' 'a' has invalid flags", 210 | ), 211 | # invalid empty default flags 212 | ( 213 | { 214 | "base": { 215 | "codec": "codec", 216 | "container": "container", 217 | "library": "library", 218 | "medium": "audio", 219 | "tool": "ffmpeg", 220 | "default_flags": [], 221 | }, 222 | "variation": { 223 | "a": [["-a"]], 224 | }, 225 | }, 226 | "Recipe 'default_flags' is invalid", 227 | ), 228 | # empty variations 229 | ( 230 | { 231 | "base": { 232 | "codec": "codec", 233 | "container": "container", 234 | "library": "library", 235 | "medium": "audio", 236 | "tool": "ffmpeg", 237 | "default_flags": {}, 238 | }, 239 | "variation": {}, 240 | }, 241 | "Recipe missing variations", 242 | ), 243 | # incomplete variation 244 | ( 245 | { 246 | "base": { 247 | "codec": "codec", 248 | "container": "container", 249 | "library": "library", 250 | "medium": "audio", 251 | "tool": "ffmpeg", 252 | "default_flags": {}, 253 | }, 254 | "variation": {"a": None, "b": [["-b"]]}, 255 | }, 256 | "Recipe variation 'a' is incomplete", 257 | ), 258 | # incomplete variation (entry missing values) 259 | ( 260 | { 261 | "base": { 262 | "codec": "codec", 263 | "container": "container", 264 | "library": "library", 265 | "medium": "audio", 266 | "tool": "ffmpeg", 267 | "default_flags": {}, 268 | }, 269 | "variation": {"a": [["-a"], []]}, 270 | }, 271 | "Recipe variation 'a' is incomplete", 272 | ), 273 | # invalid variation name 274 | ( 275 | { 276 | "base": { 277 | "codec": "codec", 278 | "container": "container", 279 | "library": "library", 280 | "medium": "audio", 281 | "tool": "ffmpeg", 282 | "default_flags": {}, 283 | }, 284 | "variation": {"B@D!": [["-a"]]}, 285 | }, 286 | "Recipe variation name 'B@D!' is invalid", 287 | ), 288 | # invalid variation type 289 | ( 290 | { 291 | "base": { 292 | "codec": "codec", 293 | "container": "container", 294 | "library": "library", 295 | "medium": "audio", 296 | "tool": "ffmpeg", 297 | "default_flags": {}, 298 | }, 299 | "variation": {"a": {"A": ["-a"]}}, 300 | }, 301 | "Recipe variation 'a' is invalid", 302 | ), 303 | # invalid variation type 304 | ( 305 | { 306 | "base": { 307 | "codec": "codec", 308 | "container": "container", 309 | "library": "library", 310 | "medium": "audio", 311 | "tool": "ffmpeg", 312 | "default_flags": {}, 313 | }, 314 | "variation": {"a": [{"-a": None}]}, 315 | }, 316 | "Recipe variation 'a' is invalid", 317 | ), 318 | # variation invalid flags 319 | ( 320 | { 321 | "base": { 322 | "codec": "codec", 323 | "container": "container", 324 | "library": "library", 325 | "medium": "audio", 326 | "tool": "ffmpeg", 327 | "default_flags": {}, 328 | }, 329 | "variation": {"a": [[1]]}, 330 | }, 331 | "Recipe variation 'a' has invalid flags", 332 | ), 333 | # invalid YAML 334 | ( 335 | "{", 336 | "Invalid YAML file", 337 | ), 338 | # empty recipe file 339 | ( 340 | "", 341 | "Recipe missing entry ", 342 | ), 343 | ], 344 | ) 345 | def test_recipe_02(tmp_path, data, msg): 346 | """test Recipe() errors""" 347 | recipe_file = tmp_path / "recipe.yml" 348 | with recipe_file.open("w") as out_fp: 349 | if isinstance(data, dict): 350 | safe_dump(data, out_fp) 351 | else: 352 | out_fp.write(data) 353 | with raises(RecipeError, match=msg): 354 | Recipe(recipe_file) 355 | 356 | 357 | def test_template_01(tmp_path): 358 | """test Template()""" 359 | template_file = tmp_path / "testfile" 360 | template_file.touch() 361 | template = Template("test_template", template_file) 362 | assert template.file == template_file 363 | assert template.name == "test_template" 364 | template.unlink() 365 | assert not template_file.is_file() 366 | 367 | 368 | def test_corpus_generator_01(mocker, tmp_path): 369 | """test CorpusGenerator()""" 370 | 371 | class SimpleGenerator(CorpusGenerator): 372 | """Test Generator""" 373 | 374 | def generate(self): 375 | yield ("test", ["arg1", "arg2"]) 376 | 377 | recipe = mocker.Mock( 378 | spec_set=Recipe, 379 | codec="h264", 380 | container="mp4", 381 | library="libx264", 382 | medium="video", 383 | ) 384 | generator = SimpleGenerator(recipe, tmp_path) 385 | generator.add_template(mocker.Mock(spec_set=Template)) 386 | assert generator.description == "video/libx264/h264/mp4" 387 | all(generator.generate()) 388 | 389 | 390 | @mark.parametrize( 391 | "resolution, result", 392 | [ 393 | ("123x234", True), 394 | ("", False), 395 | ("1", False), 396 | ("0x1", False), 397 | ("1x1x1", False), 398 | ("-1x19", False), 399 | ("foo", False), 400 | ], 401 | ) 402 | def test_is_resolution_01(resolution, result): 403 | """test is_resolution()""" 404 | assert is_resolution(resolution) == result 405 | 406 | 407 | def test_run_tool_01(mocker, tmp_path): 408 | """test run_tool()""" 409 | run = mocker.patch("corpus_replicator.common.run", autospec=True) 410 | log = tmp_path / "log.txt" 411 | mocker.patch("corpus_replicator.common.TOOL_LOG", log) 412 | # success 413 | run_tool(["foo"]) 414 | assert not log.is_file() 415 | # failure (check error log exists) 416 | run.side_effect = RuntimeError("foo") 417 | with raises(RuntimeError): 418 | run_tool(["foo"]) 419 | assert log.is_file() 420 | 421 | 422 | def test_list_recipes_01(): 423 | """test list_recipes()""" 424 | recipes = list(list_recipes()) 425 | assert recipes 426 | assert all(x.name.endswith(".yml") for x in recipes) 427 | -------------------------------------------------------------------------------- /src/corpus_replicator/test_generate_corpus.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | from pytest import mark, raises 6 | 7 | from .common import Recipe, ToolError 8 | from .generate_corpus import load_generator, main, parse_args 9 | 10 | 11 | @mark.parametrize( 12 | "tool, available, raised", 13 | [ 14 | # ffmpeg exists 15 | ("ffmpeg", True, False), 16 | # ffmpeg not available 17 | ("ffmpeg", False, True), 18 | # unknown tool 19 | ("unknown", False, True), 20 | ], 21 | ) 22 | def test_load_generator_01(mocker, tmp_path, tool, available, raised): 23 | """test load_generator()""" 24 | mocker.patch( 25 | "corpus_replicator.generate_corpus.ffmpeg_available", 26 | return_value=available, 27 | autospec=True, 28 | ) 29 | recipe = mocker.Mock(spec_set=Recipe, tool=tool) 30 | 31 | if raised: 32 | with raises(ToolError): 33 | load_generator(recipe, tmp_path) 34 | else: 35 | load_generator(recipe, tmp_path) 36 | 37 | 38 | def test_main_01(mocker, tmp_path): 39 | """test main()""" 40 | mocker.patch("corpus_replicator.generate_corpus.load_generator", autospec=True) 41 | mocker.patch("corpus_replicator.generate_corpus.Recipe", autospec=True) 42 | empty = tmp_path / "empty" 43 | empty.touch() 44 | main(["-o", str(tmp_path), str(empty), str(empty)]) 45 | 46 | 47 | def test_parse_args_01(capsys, tmp_path): 48 | """test parse_args()""" 49 | empty = tmp_path / "empty" 50 | empty.touch() 51 | # success 52 | parse_args([str(empty), str(empty)]) 53 | # missing template file 54 | with raises(SystemExit): 55 | parse_args([str(empty), "missing"]) 56 | assert "error: Template file does not exist: 'missing'" in capsys.readouterr()[1] 57 | # missing recipe file 58 | with raises(SystemExit): 59 | parse_args(["missing", str(empty)]) 60 | assert "error: Recipe file does not exist: 'missing'" in capsys.readouterr()[1] 61 | -------------------------------------------------------------------------------- /src/corpus_replicator/test_generate_template.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from pytest import mark, raises 7 | 8 | from .generate_template import ( 9 | TEMPLATES, 10 | generate_audio, 11 | generate_image, 12 | generate_video, 13 | main, 14 | parse_args, 15 | ) 16 | 17 | 18 | @mark.parametrize("template_name", TEMPLATES["audio"]) 19 | def test_generate_audio_01(mocker, tmp_path, template_name): 20 | """test generate_audio()""" 21 | mocker.patch("corpus_replicator.generate_template.run_tool", autospec=True) 22 | template = generate_audio(template_name, tmp_path) 23 | assert template.name == template_name 24 | assert template.file.parent == tmp_path 25 | 26 | 27 | @mark.parametrize("template_name", TEMPLATES["image"]) 28 | def test_generate_image_01(mocker, tmp_path, template_name): 29 | """test generate_image()""" 30 | mocker.patch("corpus_replicator.generate_template.run_tool", autospec=True) 31 | template = generate_image(template_name, tmp_path) 32 | assert template.name == template_name 33 | assert template.file.parent == tmp_path 34 | 35 | 36 | @mark.parametrize("template_name", TEMPLATES["video"]) 37 | def test_generate_video_01(mocker, tmp_path, template_name): 38 | """test generate_video()""" 39 | mocker.patch("corpus_replicator.generate_template.run_tool", autospec=True) 40 | template = generate_video(template_name, tmp_path) 41 | assert template.name == template_name 42 | assert template.file.parent == tmp_path 43 | 44 | 45 | @mark.parametrize( 46 | "duration, frames", 47 | [ 48 | (2.1, 0), 49 | (1.0, 10), 50 | ], 51 | ) 52 | def test_generate_video_02(mocker, tmp_path, duration, frames): 53 | """test generate_video()""" 54 | mocker.patch("corpus_replicator.generate_template.run_tool", autospec=True) 55 | generate_video("noise", tmp_path, duration=duration, frames=frames) 56 | 57 | 58 | @mark.parametrize( 59 | "medium, template", 60 | [ 61 | ("audio", "noise"), 62 | ("image", "noise"), 63 | ("video", "noise"), 64 | ], 65 | ) 66 | def test_main_01(mocker, tmp_path, medium, template): 67 | """test main()""" 68 | mocker.patch("corpus_replicator.generate_template.ffmpeg_available", autospec=True) 69 | mocker.patch("corpus_replicator.generate_template.run_tool", autospec=True) 70 | main(["-o", str(tmp_path), medium, template]) 71 | 72 | 73 | def test_parse_args_01(mocker, capsys): 74 | """test parse_args()""" 75 | ffmpeg_check = mocker.patch( 76 | "corpus_replicator.generate_template.ffmpeg_available", autospec=True 77 | ) 78 | # success 79 | parse_args(["audio", "noise"]) 80 | # invalid resolution 81 | with raises(SystemExit): 82 | parse_args(["video", "noise", "-r", "foo"]) 83 | assert "argument -r/--resolution: invalid value" in capsys.readouterr()[1] 84 | # missing ffmpeg 85 | ffmpeg_check.return_value = False 86 | with raises(SystemExit): 87 | parse_args(["audio", "noise"]) 88 | assert "Please install FFmpeg." in capsys.readouterr()[1] 89 | -------------------------------------------------------------------------------- /src/corpus_replicator/test_replicator.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from pytest import mark, raises 7 | 8 | from .common import RecipeError 9 | from .core import Replicator, main, parse_args 10 | 11 | SAMPLE_VIDEO_RECIPE = """ 12 | base: 13 | codec: "h264" 14 | container: "mp4" 15 | library: "libx264" 16 | medium: "video" 17 | tool: "ffmpeg" 18 | default_flags: 19 | encoder: 20 | ["-foo", "bar"] 21 | resolution: 22 | ["overwrite-me"] 23 | 24 | variation: 25 | resolution: 26 | - ["-s", "18x32"] 27 | """ 28 | 29 | 30 | @mark.parametrize( 31 | "mode, recipes, templates, expected", 32 | [ 33 | # empty 34 | ("audio", [], [], 0), 35 | # empty (coverage) 36 | # single 37 | ("video", [SAMPLE_VIDEO_RECIPE], ["noise"], 1), 38 | # recipe mode mismatch 39 | ("audio", [SAMPLE_VIDEO_RECIPE], ["noise"], 0), 40 | # recipe mode mismatch (again for coverage) 41 | ("image", [SAMPLE_VIDEO_RECIPE], ["noise"], 0), 42 | # multiple templates 43 | ("video", [SAMPLE_VIDEO_RECIPE], ["noise", "solid"], 2), 44 | # multiple recipes 45 | ("video", [SAMPLE_VIDEO_RECIPE, SAMPLE_VIDEO_RECIPE], ["noise"], 2), 46 | # multiple recipes and multiple templates 47 | ("video", [SAMPLE_VIDEO_RECIPE, SAMPLE_VIDEO_RECIPE], ["noise", "solid"], 4), 48 | ], 49 | ) 50 | def test_replicator_01(mocker, tmp_path, mode, recipes, templates, expected): 51 | """test Replicator()""" 52 | mocker.patch("corpus_replicator.common.run", autospec=True) 53 | mocker.patch("corpus_replicator.generate_corpus.ffmpeg_available", autospec=True) 54 | # create test template files 55 | recipe_path = tmp_path / "recipes" 56 | recipe_path.mkdir() 57 | for idx, recipe in enumerate(recipes): 58 | (recipe_path / f"recipe-{idx:02d}.yml").write_text(recipe) 59 | 60 | replicator = Replicator(mode, tmp_path / "output", recipe_path.iterdir()) 61 | assert len(replicator) == 0 62 | replicator.generate_templates(templates) 63 | assert len(replicator) == expected 64 | replicator.generate_corpus() 65 | replicator.remove_templates() 66 | 67 | 68 | @mark.parametrize( 69 | "file_data, final_count", 70 | [ 71 | ([], 0), 72 | (["test", "test"], 1), 73 | (["test", "test", "test"], 1), 74 | (["test1", "test2"], 2), 75 | (["test1", "test2", "test2"], 2), 76 | (["test1", "test1", "test2", "test2"], 2), 77 | ], 78 | ) 79 | def test_replicator_02(tmp_path, file_data, final_count): 80 | """test Replicator.remove_duplicates()""" 81 | 82 | for idx, data in enumerate(file_data): 83 | (tmp_path / f"{idx}.txt").write_text(data) 84 | replicator = Replicator("video", tmp_path, []) 85 | replicator.remove_duplicates() 86 | assert sum(1 for _ in replicator.dest.iterdir()) == final_count 87 | 88 | 89 | @mark.parametrize( 90 | "medium", 91 | [ 92 | "animation", 93 | "audio", 94 | "image", 95 | "video", 96 | ], 97 | ) 98 | def test_main_01(mocker, tmp_path, medium): 99 | """test main()""" 100 | mocker.patch("corpus_replicator.core.ffmpeg_available", autospec=True) 101 | replicator = mocker.patch("corpus_replicator.core.Replicator", autospec=True) 102 | empty = tmp_path / "empty" 103 | empty.touch() 104 | mocker.patch("corpus_replicator.core.TOOL_LOG", empty) 105 | main(["-o", str(tmp_path), str(empty), medium]) 106 | assert replicator.return_value.generate_templates.call_count == 1 107 | assert replicator.return_value.generate_corpus.call_count == 1 108 | assert replicator.return_value.remove_duplicates.call_count == 1 109 | assert replicator.return_value.remove_templates.call_count == 2 110 | 111 | 112 | def test_main_02(mocker, tmp_path): 113 | """test main()""" 114 | mocker.patch("corpus_replicator.core.ffmpeg_available", autospec=True) 115 | replicator = mocker.patch("corpus_replicator.core.Replicator", autospec=True) 116 | replicator.side_effect = RecipeError("foo") 117 | empty = tmp_path / "empty" 118 | empty.touch() 119 | main(["-o", str(tmp_path), str(empty), "video"]) 120 | assert replicator.return_value.generate_templates.call_count == 0 121 | assert replicator.return_value.generate_corpus.call_count == 0 122 | 123 | 124 | def test_parse_args_01(capsys, mocker, tmp_path): 125 | """test parse_args()""" 126 | ffmpeg_check = mocker.patch( 127 | "corpus_replicator.core.ffmpeg_available", autospec=True 128 | ) 129 | empty = tmp_path / "empty" 130 | empty.touch() 131 | # success 132 | parse_args([str(empty), "video"]) 133 | # missing recipe file 134 | with raises(SystemExit): 135 | parse_args(["missing", "video"]) 136 | assert "error: Recipe file does not exist: 'missing'" in capsys.readouterr()[1] 137 | # invalid resolution 138 | with raises(SystemExit): 139 | parse_args([str(empty), "video", "-r", "foo"]) 140 | assert "argument -r/--resolution: invalid value" in capsys.readouterr()[1] 141 | # missing ffmpeg 142 | ffmpeg_check.return_value = False 143 | with raises(SystemExit): 144 | parse_args([str(empty), "video"]) 145 | assert "Please install FFmpeg." in capsys.readouterr()[1] 146 | -------------------------------------------------------------------------------- /src/corpus_replicator/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MozillaSecurity/corpus-replicator/d6921d423e4c3ede4f9ac40b98af3fa1cc21cd15/src/corpus_replicator/tools/__init__.py -------------------------------------------------------------------------------- /src/corpus_replicator/tools/ffmpeg.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from collections.abc import Iterator 5 | from pathlib import Path 6 | from shutil import which 7 | 8 | from ..common import CorpusGenerator, run_tool 9 | 10 | FFMPEG_BIN = "ffmpeg" 11 | 12 | 13 | class FFmpegGenerator(CorpusGenerator): 14 | """FFmpeg wrapper.""" 15 | 16 | def generate(self) -> Iterator[Path]: 17 | """Generate corpus. Templates are combined with Recipes to create variations 18 | based on parameters defined in the Recipes. 19 | 20 | Args: 21 | recipe: Recipe to use to generate a corpus. 22 | dest: Location to place generated corpus. 23 | 24 | Yields: 25 | A corpus generator. 26 | """ 27 | for template in self._templates: 28 | base_cmd = [FFMPEG_BIN, "-i", str(template.file), "-y"] 29 | for flag, idx, variation in self._recipe: 30 | # build dest file name 'video-h264-library-noise-resolution-##.mp4' 31 | dest_file = self._dest / "-".join( 32 | [ 33 | self._recipe.medium, 34 | self._recipe.codec, 35 | self._recipe.library, 36 | template.name, 37 | flag, 38 | f"{idx:02d}.{self._recipe.container}", 39 | ] 40 | ) 41 | run_tool(base_cmd + variation + [str(dest_file)]) 42 | yield dest_file 43 | 44 | 45 | def ffmpeg_available() -> bool: 46 | """Check if FFmpeg is installed. 47 | 48 | Args: 49 | None 50 | 51 | Return: 52 | True if tool is installed otherwise False. 53 | """ 54 | # TODO: check version and flags for available features? 55 | return which(FFMPEG_BIN) is not None 56 | -------------------------------------------------------------------------------- /src/corpus_replicator/tools/imagemagick.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | from collections.abc import Iterator 5 | from pathlib import Path 6 | from shutil import which 7 | 8 | from ..common import CorpusGenerator, run_tool 9 | 10 | IMAGEMAGICK_BIN = "convert" 11 | 12 | 13 | class ImageMagickGenerator(CorpusGenerator): 14 | """ImageMagick wrapper.""" 15 | 16 | def generate(self) -> Iterator[Path]: 17 | """Generate corpus. Templates are combined with Recipes to create variations 18 | based on parameters defined in the Recipes. 19 | 20 | Args: 21 | recipe: Recipe to use to generate a corpus. 22 | dest: Location to place generated corpus. 23 | 24 | Yields: 25 | A corpus generator. 26 | """ 27 | for template in self._templates: 28 | base_cmd = [IMAGEMAGICK_BIN, str(template.file)] 29 | for flag, idx, variation in self._recipe: 30 | # build dest file name 'img-jpeg-library-noise-resolution-##.mp4' 31 | dest_file = self._dest / "-".join( 32 | [ 33 | self._recipe.medium, 34 | self._recipe.codec, 35 | self._recipe.library, 36 | template.name, 37 | flag, 38 | f"{idx:02d}.{self._recipe.container}", 39 | ] 40 | ) 41 | run_tool(base_cmd + variation + [str(dest_file)]) 42 | yield dest_file 43 | 44 | 45 | def imagemagick_available() -> bool: 46 | """Check if ImageMagick is installed. 47 | 48 | Args: 49 | None 50 | 51 | Return: 52 | True if installed otherwise False. 53 | """ 54 | # TODO: check version and flags for available features? 55 | return which(IMAGEMAGICK_BIN) is not None 56 | -------------------------------------------------------------------------------- /src/corpus_replicator/tools/test_ffmpeg.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from ..common import Recipe, Template 7 | from .ffmpeg import FFmpegGenerator, ffmpeg_available 8 | 9 | SAMPLE_VIDEO_RECIPE = """ 10 | base: 11 | codec: "h264" 12 | container: "mp4" 13 | library: "libx264" 14 | medium: "video" 15 | tool: "ffmpeg" 16 | default_flags: 17 | param: 18 | ["overwrite-me"] 19 | 20 | variation: 21 | param: 22 | - ["flags-1"] 23 | - ["flags-2"] 24 | """ 25 | 26 | 27 | def test_ffmpeg_available_01(mocker): 28 | """test ffmpeg_available()""" 29 | which = mocker.patch("corpus_replicator.tools.ffmpeg.which", autospec=True) 30 | which.return_value = True 31 | assert ffmpeg_available() 32 | which.return_value = None 33 | assert not ffmpeg_available() 34 | 35 | 36 | def test_ffmpeg_generator_01(mocker, tmp_path): 37 | """test FFmpegGenerator()""" 38 | mocker.patch("corpus_replicator.tools.ffmpeg.run_tool", autospec=True) 39 | 40 | recipe_file = tmp_path / "recipe.yml" 41 | recipe_file.write_text(SAMPLE_VIDEO_RECIPE) 42 | 43 | template_file = tmp_path / "template.bin" 44 | template_file.touch() 45 | 46 | generator = FFmpegGenerator(Recipe(recipe_file), tmp_path / "output") 47 | generator.add_template(Template("template01", template_file)) 48 | generator.add_template(Template("template02", template_file)) 49 | 50 | corpus = [x.name for x in generator.generate()] 51 | assert len(corpus) == 4 52 | assert "video-h264-libx264-template01-param-00.mp4" in corpus 53 | assert "video-h264-libx264-template01-param-01.mp4" in corpus 54 | assert "video-h264-libx264-template02-param-00.mp4" in corpus 55 | assert "video-h264-libx264-template02-param-01.mp4" in corpus 56 | -------------------------------------------------------------------------------- /src/corpus_replicator/tools/test_imagemagick.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from ..common import Recipe, Template 7 | from .imagemagick import ImageMagickGenerator, imagemagick_available 8 | 9 | SAMPLE_IMAGE_RECIPE = """ 10 | base: 11 | codec: "jpeg" 12 | container: "jpg" 13 | library: "imagemagick" 14 | medium: "image" 15 | tool: "imagemagick" 16 | default_flags: 17 | param: 18 | ["overwrite-me"] 19 | 20 | variation: 21 | param: 22 | - ["flags-1"] 23 | - ["flags-2"] 24 | """ 25 | 26 | 27 | def test_imagemagick_available_01(mocker): 28 | """test imagemagick_available()""" 29 | which = mocker.patch("corpus_replicator.tools.imagemagick.which", autospec=True) 30 | which.return_value = True 31 | assert imagemagick_available() 32 | which.return_value = None 33 | assert not imagemagick_available() 34 | 35 | 36 | def test_imagemagick_generator_01(mocker, tmp_path): 37 | """test ImagemagickGenerator()""" 38 | mocker.patch("corpus_replicator.tools.imagemagick.run_tool", autospec=True) 39 | 40 | recipe_file = tmp_path / "recipe.yml" 41 | recipe_file.write_text(SAMPLE_IMAGE_RECIPE) 42 | 43 | template_file = tmp_path / "template.bin" 44 | template_file.touch() 45 | 46 | generator = ImageMagickGenerator(Recipe(recipe_file), tmp_path / "output") 47 | generator.add_template(Template("template01", template_file)) 48 | generator.add_template(Template("template02", template_file)) 49 | 50 | corpus = [x.name for x in generator.generate()] 51 | assert len(corpus) == 4 52 | assert "image-jpeg-imagemagick-template01-param-00.jpg" in corpus 53 | assert "image-jpeg-imagemagick-template01-param-01.jpg" in corpus 54 | assert "image-jpeg-imagemagick-template02-param-00.jpg" in corpus 55 | assert "image-jpeg-imagemagick-template02-param-01.jpg" in corpus 56 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{39,310,311,312,313},lint 3 | skip_missing_interpreters = true 4 | tox_pip_extensions_ext_venv_update = true 5 | 6 | [testenv] 7 | commands = pytest -v --cache-clear --cov={toxinidir} --cov-config={toxinidir}/pyproject.toml --cov-report=term-missing --basetemp={envtmpdir} {posargs} --disable-pytest-warnings 8 | deps = 9 | pytest 10 | pytest-cov 11 | pytest-mock 12 | passenv = 13 | BUILD_CACHE 14 | CI 15 | CI_* 16 | CODECOV_* 17 | TOXENV 18 | TRAVIS 19 | TRAVIS_* 20 | TWINE_* 21 | VCS_* 22 | usedevelop = true 23 | 24 | [testenv:codecov] 25 | commands = 26 | codecov upload-process 27 | deps = 28 | codecov-cli 29 | coverage[toml] 30 | skip_install = true 31 | 32 | [testenv:lint] 33 | commands = 34 | pre-commit run -a {posargs} 35 | deps = 36 | pre-commit 37 | skip_install = true 38 | 39 | [testenv:mypy] 40 | commands = 41 | mypy --install-types --non-interactive {posargs} 42 | deps = 43 | mypy==v1.14.1 44 | usedevelop = true 45 | 46 | [testenv:pylint] 47 | commands = 48 | pylint {posargs} 49 | deps = 50 | pylint==3.3.3 51 | usedevelop = true 52 | 53 | [testenv:pypi] 54 | commands = 55 | python -m build 56 | twine upload --skip-existing dist/* 57 | deps = 58 | build 59 | twine 60 | skip_install = true 61 | --------------------------------------------------------------------------------