├── .github └── workflows │ └── pushtest.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── VERSION ├── imagine ├── __init__.py └── imagine.py ├── requirements.txt ├── requirements.txt.tf1 ├── setup.py └── tests ├── __init__.py ├── functional ├── __init__.py ├── test_jpg_creation.py ├── test_png_creation.py ├── test_recordio.py └── test_tfrecord.py └── unit ├── __init__.py └── test_units.py /.github/workflows/pushtest.yml: -------------------------------------------------------------------------------- 1 | name: Imageinary push tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test_tf2: 7 | name: Test and lint code for TensorFlow 2 8 | runs-on: ${{ matrix.operating-system }} 9 | strategy: 10 | matrix: 11 | python-version: [3.7, 3.8] 12 | operating-system: [ubuntu-latest, macOS-latest] 13 | 14 | steps: 15 | - uses: actions/checkout@v1 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements.txt 24 | - name: Run unit and functional tests with pytest 25 | run: | 26 | pytest --cov=imagine --cov-report term-missing tests/ 27 | - name: Lint with pycodestyle 28 | run: | 29 | pycodestyle imagine/ tests/ 30 | - name: Build the latest binary 31 | run: | 32 | python setup.py sdist bdist_wheel 33 | - name: Install the built wheel and test usage (UNIX) 34 | run: | 35 | pip install --ignore-installed dist/nvidia_imageinary-*-py3-none-any.whl 36 | imagine --help 37 | - name: Install the built wheel with all dependencies (UNIX) 38 | run: | 39 | pip install nvidia-imageinary['all'] 40 | imagine --help 41 | pip freeze | grep "tensorflow" 42 | pip freeze | grep "mxnet" 43 | 44 | test_tf1: 45 | name: Test code for TensorFlow 1.x 46 | runs-on: ${{ matrix.operating-system }} 47 | strategy: 48 | matrix: 49 | python-version: [3.7] 50 | operating-system: [ubuntu-latest, macOS-latest] 51 | 52 | steps: 53 | - uses: actions/checkout@v1 54 | - name: Set up Python ${{ matrix.python-version }} 55 | uses: actions/setup-python@v1 56 | with: 57 | python-version: ${{ matrix.python-version }} 58 | - name: Install dependencies 59 | run: | 60 | python -m pip install --upgrade pip 61 | pip install -r requirements.txt.tf1 62 | - name: Run unit and functional tests with pytest 63 | run: | 64 | pytest --cov=imagine --cov-report term-missing tests/ 65 | - name: Lint with pycodestyle 66 | run: | 67 | pycodestyle imagine/ tests/ 68 | - name: Build the latest binary 69 | run: | 70 | python setup.py sdist bdist_wheel 71 | - name: Install the built wheel and test usage (UNIX) 72 | run: | 73 | pip install --ignore-installed dist/nvidia_imageinary-*-py3-none-any.whl 74 | imagine --help 75 | - name: Install the built wheel with all dependencies (UNIX) 76 | run: | 77 | pip install tensorflow==1.14.0 nvidia-imageinary['all'] 78 | imagine --help 79 | pip freeze | grep "tensorflow" 80 | pip freeze | grep "mxnet" 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *pycache* 2 | *vscode* 3 | *env* 4 | *coverage* 5 | build/* 6 | dist/* 7 | *egg* 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute to Imageinary 2 | 3 | Before contributing to Imageinary, we require all users to sign-off on their 4 | work. 5 | 6 | ## Sign your work 7 | 8 | The sign-off is a simple line at the end of the explanation for the patch. Your 9 | signature certifies that you wrote the patch or otherwise have the right to pass 10 | it on as an open-source patch. The rules are pretty simple: if you can certify 11 | the below (from [developercertificate.org](http://developercertificate.org/)): 12 | 13 | ``` 14 | Developer Certificate of Origin 15 | Version 1.1 16 | 17 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 18 | 1 Letterman Drive 19 | Suite D4700 20 | San Francisco, CA, 94129 21 | 22 | Everyone is permitted to copy and distribute verbatim copies of this 23 | license document, but changing it is not allowed. 24 | 25 | Developer's Certificate of Origin 1.1 26 | 27 | By making a contribution to this project, I certify that: 28 | 29 | (a) The contribution was created in whole or in part by me and I 30 | have the right to submit it under the open source license 31 | indicated in the file; or 32 | 33 | (b) The contribution is based upon previous work that, to the best 34 | of my knowledge, is covered under an appropriate open source 35 | license and I have the right under that license to submit that 36 | work with modifications, whether created in whole or in part 37 | by me, under the same open source license (unless I am 38 | permitted to submit under a different license), as indicated 39 | in the file; or 40 | 41 | (c) The contribution was provided directly to me by some other 42 | person who certified (a), (b) or (c) and I have not modified 43 | it. 44 | 45 | (d) I understand and agree that this project and the contribution 46 | are public and that a record of the contribution (including all 47 | personal information I submit with it, including my sign-off) is 48 | maintained indefinitely and may be redistributed consistent with 49 | this project or the open source license(s) involved. 50 | ``` 51 | 52 | Then you just add a line to every git commit message: 53 | 54 | Signed-off-by: Joe Smith 55 | 56 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 57 | 58 | If you set your `user.name` and `user.email` git configs, you can sign your 59 | commit automatically with `git commit -s`. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 NVIDIA CORPORATION 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Imageinary 2 | Imageinary is a reproducible mechanism which is used to generate large image 3 | datasets at various resolutions. The tool supports multiple image types, 4 | including JPEGs, PNGs, BMPs, RecordIO, and TFRecord files. 5 | 6 | ## Use Cases 7 | While benchmarking deep learning applications involving images, there are 8 | typically only a handful of public datasets that can be used and they tend to 9 | have small and limited image sizes. In an effort to run DL tests against various 10 | input sizes, we designed a tool to quickly and easily generate images of 11 | variable dimensions and types which can be fed to convolutional neural networks 12 | and deep learning pipelines. 13 | 14 | The images are generated using random numpy arrays and are then converted to the 15 | requested output format and saved to the specified location. 16 | 17 | ## Requirements 18 | * Python 3.7 or greater 19 | * TensorFlow 2 or TensorFlow 1.14 20 | * MXNet 21 | * Pillow 22 | * Numpy 23 | 24 | ## Installation 25 | The application is available on `nvidia-pyindex` and can be downloaded and 26 | installed using PIP. 27 | 28 | ### Virtual Environment 29 | It is recommended to run this program in a Python virtual environment to avoid 30 | dependency interference. The virtual environment can be installed and activated 31 | with: 32 | 33 | ```bash 34 | pip install virtualenv 35 | virtualenv --python python3 env 36 | source env/bin/activate 37 | ``` 38 | 39 | Once finished using the application, you can leave the virtual environment with: 40 | 41 | ```bash 42 | deactivate 43 | ``` 44 | 45 | ### Pre-install 46 | Before installing the package, the `nvidia-pyindex` package needs to be 47 | installed to tell PIP where to look for the package. This needs to be installed 48 | just once. 49 | 50 | ```bash 51 | pip install nvidia-pyindex 52 | ``` 53 | 54 | ### Minimal Install 55 | The minimal install supports standard image types, such as JPG, PNG, and BMP 56 | and only installs the dependencies necessary for those tools. 57 | 58 | ```bash 59 | pip install nvidia-imageinary 60 | ``` 61 | 62 | ### TFRecord Support 63 | To add support for TFRecords in addition to the standard image types, TensorFlow 64 | needs to be included as a dependency. This can be done by running the following 65 | which installs TensorFlow alongside all other dependencies: 66 | 67 | ```bash 68 | pip install nvidia-imageinary['tfrecord'] 69 | ``` 70 | 71 | ### RecordIO Support 72 | RecordIO files are supported using MXNet, which can be included as a dependency 73 | using the following: 74 | 75 | ```bash 76 | pip install nvidia-imageinary['mxnet'] 77 | ``` 78 | 79 | ### Complete Install 80 | If desired, all dependencies can be installed to support standard images, 81 | TFRecords, and RecordIO files without installing extra packages later. Run the 82 | following to install all dependencies: 83 | 84 | ```bash 85 | pip install nvidia-imageinary['all'] 86 | ``` 87 | 88 | ## Running 89 | Imageinary supports many different image types which can be specified while 90 | running the application. 91 | 92 | ### JPEGs 93 | A basic run to create 1000 4K JPEGs, and display the size of the first file and 94 | all files in the target directory path (not including subdirectories): 95 | 96 | ```bash 97 | imagine create-images \ 98 | --path /mnt/nvme/test_dir \ 99 | --name random_image_ \ 100 | --width 3840 \ 101 | --height 2160 \ 102 | --count 1000 \ 103 | --image_format jpg \ 104 | --size 105 | ``` 106 | 107 | The above command will generate 1,000 unique JPEG images in the 108 | `/mnt/nvme/test_dir` directory. Each filename will begin with `random_image_` 109 | and end with an image number starting from 0, such as `random_image_0.jpg`, 110 | `random_image_1.jpg`, etc. The images will have dimensions of 3840x2160. The 111 | `--size` flag displays information on the images, such as the size of the first 112 | image and the size of the overall directory. 113 | 114 | Note that for creating a very large number of images, systems can easily run out 115 | of memory. In this case, increase the `--chunksize` to reduce the amount of 116 | memory allocated by each multiprocessing pool. 117 | 118 | ### TFRecords 119 | TFRecords can also be easily generated using the application. This command 120 | expects images to be pre-loaded to be used as the basis for the TFRecord files. 121 | 122 | ```bash 123 | imagine create-tfrecords \ 124 | --source_path /mnt/nvme/test_dir \ 125 | --dest_path /mnt/nvme/tf_record_dir \ 126 | --name random_tfrecord_ \ 127 | --img_per_file 100 128 | ``` 129 | 130 | This command uses the JPEGs created during the previous step and creates 131 | TFRecords based on those images. The TFRecords will be saved to 132 | `/mnt/nvme/tf_record_dir` where each file will be comprised of 100 JPEGs. 133 | 134 | ### RecordIO 135 | Similarly, RecordIO files can be generated with a single command: 136 | 137 | ```bash 138 | imagine create-recordio \ 139 | --source_path /mnt/nvme/test_dir \ 140 | --dest_path /mnt/nvme/record_files \ 141 | --name random_recordio_ \ 142 | --img_per_file 100 143 | ``` 144 | 145 | This command uses the JPEGs created during the previous step and creates 146 | TFRecords based on those images. The TFRecords will be saved to 147 | `/mnt/nvme/record_files` where each file will be comprised of 100 JPEGs. 148 | 149 | ## Importing 150 | Imageinary can be imported directly by a Python script to hook into a deep 151 | learning workflow. 152 | 153 | To write random images of a specific size to the system, import the library and 154 | specify the requested parameters: 155 | 156 | ```python 157 | from imagine import create_images 158 | 159 | create_images('path/to/save/images', # Path to save images 160 | 'random_image_prefix_', # Image name prefix 161 | 3840, # Width 162 | 2160, # Height 163 | 1000, # Number of images to create 164 | 'png') # Image format 165 | ``` 166 | 167 | The above command will save 1000 random PNG images of dimension 3840x2160 to the 168 | `path/to/save/images` directory, each prefixed with `random_image_prefix_`. 169 | 170 | Similary, TFRecords can be generated from Python: 171 | 172 | ```python 173 | from imagine import create_tfrecords 174 | 175 | create_tfrecords('path/to/saved_images', # Path to saved images 176 | 'path/to/save/tfrecords', # Path to save TFRecords 177 | 'random_tfrecord_', # TFRecord file prefix 178 | 100) # Number of images per file 179 | ``` 180 | 181 | As can RecordIO files: 182 | 183 | ```python 184 | from imagine import create_recordio 185 | 186 | create_recordio('path/to/saved_images', # Path to saved images 187 | 'path/to/save/recordio', # Path to save RecordIO files 188 | 'random_recordio_', # RecordIO file prefix 189 | 100) # Number of images per file 190 | ``` 191 | 192 | ## Testing 193 | This repository includes functional tests for the major modules listed above 194 | which can be verified locally using `pytest`. While in the virtual environment, 195 | run the following: 196 | 197 | ```bash 198 | $ pytest --cov=imagine --cov-report term-missing tests/ 199 | ``` 200 | 201 | This will output the test results including the overall coverage for the Python 202 | module. 203 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.1.3 2 | -------------------------------------------------------------------------------- /imagine/__init__.py: -------------------------------------------------------------------------------- 1 | from imagine.imagine import (create_images, 2 | create_recordio, 3 | create_tfrecords, 4 | _main) 5 | -------------------------------------------------------------------------------- /imagine/imagine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import os 16 | import re 17 | import numpy 18 | from argparse import ArgumentParser, Namespace 19 | from PIL import Image 20 | from multiprocessing.pool import Pool 21 | try: 22 | from mxnet.recordio import IRHeader, MXIndexedRecordIO, pack 23 | except ImportError: 24 | IRHeader = None 25 | from time import perf_counter 26 | from typing import Generator, List, NoReturn, Optional, Tuple 27 | from math import ceil 28 | try: 29 | from tensorflow.io import TFRecordWriter 30 | from tensorflow.train import (BytesList, 31 | Example, 32 | Feature, 33 | Features, 34 | Int64List) 35 | except ImportError: 36 | TFRecordWriter = None 37 | 38 | 39 | STANDARD_IMAGE = 'create-images' 40 | TFRECORD = 'create-tfrecord' 41 | RECORDIO = 'create-recordio' 42 | SUPPORTED_IMAGE_FORMATS = {"jpg": "jpg", "jpeg": "jpg", "bmp": "bmp", 43 | "bitmap": "bmp", "png": "png"} 44 | 45 | 46 | def _parse_args() -> Namespace: 47 | """ 48 | Parse arguments passed to the application. 49 | 50 | A custom argument parser handles multiple commands and options to launch 51 | the desired function. 52 | 53 | Returns 54 | ------- 55 | Namespace 56 | Returns a ``Namespace`` of all of the arguments that were parsed from 57 | the application during runtime. 58 | """ 59 | message = """ 60 | CLI for generating a fake dataset of various quantities at different 61 | resolutions. 62 | 63 | Supported file types: .bmp, .png, and .jpg. 64 | Supported record types: TFRecords, and RecordIO. 65 | TFRecords requires an external index file creation step. 66 | """ 67 | parser = ArgumentParser(message) 68 | # Required positional command subparser which should be specified first 69 | commands = parser.add_subparsers(dest='command', metavar='command') 70 | commands_parent = ArgumentParser(add_help=False) 71 | 72 | # Options specific to record types 73 | commands_parent.add_argument('source_path', metavar='source-path', 74 | help='Path containing valid input images to ' 75 | 'convert to records') 76 | commands_parent.add_argument('dest_path', metavar='dest-path', 77 | help='Path to save record files to') 78 | commands_parent.add_argument('name', help='Name to prepend files with, ' 79 | 'such as "sample_record_"') 80 | commands_parent.add_argument('--img-per-file', type=int, default=1000) 81 | commands.add_parser(TFRECORD, help='Create TFRecords from input images', 82 | parents=[commands_parent]) 83 | commands.add_parser(RECORDIO, help='Create RecordIO from input images', 84 | parents=[commands_parent]) 85 | 86 | # Options specific to generating standard images 87 | standard = commands.add_parser(STANDARD_IMAGE, help='Generate random ' 88 | 'images') 89 | standard.add_argument('path', help='Path to save images to') 90 | standard.add_argument('name', help='Name to prepend files with, such as ' 91 | '"sample_image_"') 92 | standard.add_argument('image_format', metavar='image-format', help='The ' 93 | 'image format to generate', 94 | choices=SUPPORTED_IMAGE_FORMATS.keys()) 95 | standard.add_argument('--width', help='The image width in pixels', 96 | type=int, default=1920) 97 | standard.add_argument('--height', help='The image height in pixels', 98 | type=int, default=1080) 99 | standard.add_argument('--count', help='The number of images to generate', 100 | type=int, default=1) 101 | standard.add_argument('--seed', help='The seed to use while generating ' 102 | 'random image data', type=int, default=0) 103 | standard.add_argument('--size', help='Display the first image size and ' 104 | 'the directory size for the images', 105 | action='store_true') 106 | return parser.parse_args() 107 | 108 | 109 | def _try_create_directory(directory: str) -> NoReturn: 110 | """ 111 | Create a directory if it doesn't exist. 112 | 113 | Given a name of a directory as a ``string``, a directory should be created 114 | with the requested name if and only if it doesn't exist already. If the 115 | directory exists, the function will return without any changes. 116 | 117 | Parameters 118 | ---------- 119 | directory : string 120 | A ``string`` of a path pointing to a directory to attempt to create. 121 | """ 122 | os.makedirs(directory, exist_ok=True) 123 | 124 | 125 | def _check_directory_exists(directory: str) -> NoReturn: 126 | """ 127 | Check if a directory exists. 128 | 129 | Check if a requested directory exists and raise an error if not. 130 | 131 | Parameters 132 | ---------- 133 | directory : string 134 | A ``string`` of the requested directory to check. 135 | 136 | Raises 137 | ------ 138 | RuntimeError 139 | Raises a ``RuntimeError`` if the requested directory does not exist. 140 | """ 141 | if not os.path.exists(directory): 142 | raise RuntimeError('Error: Please specify an input directory which ' 143 | 'contains valid images.') 144 | 145 | 146 | def create_images( 147 | path: str, 148 | name: str, 149 | width: int, 150 | height: int, 151 | count: int, 152 | image_format: str, 153 | seed: Optional[int] = 0, 154 | size: Optional[bool] = False, 155 | chunksize: Optional[int] = 64 156 | ) -> NoReturn: 157 | """ 158 | Randomly generate standard images. 159 | 160 | Generate random images of standard formats, such as JPG, PNG, and BMP of 161 | variable height and width. Images are generated by creating a random numpy 162 | array of the requested dimensions and converting the image to the desired 163 | format. All images will be saved in the specified directory with each name 164 | beginning with the passed ``name`` variable and ending with a counter 165 | starting at zero. 166 | 167 | Parameters 168 | ---------- 169 | path : string 170 | The path to the directory to save images to. The directory will be 171 | created if it doesn't exist. 172 | name : string 173 | A ``string`` to prepend to all filenames, such as `random_image_`. 174 | Filenames will end with a counter starting at zero, followed by the 175 | file format's extension. 176 | width : int 177 | The width of the image to generate in pixels. 178 | height : int 179 | The height of the image to generate in pixels. 180 | count : int 181 | The number of images to generate. 182 | image_format : str 183 | The format the images should be saved as. Choices are: {} 184 | seed : int (optional) 185 | A seed to use for numpy for creating the random image data. Defaults 186 | to 0. 187 | size : bool (optional) 188 | If `True`, will print image size information including the size of the 189 | first image and the final directory size. 190 | chunksize : int (optional) 191 | Specify the number of chunks to divide the requested amount of images 192 | into. Higher chunksizes reduce the amount of memory consumed with minor 193 | additional overhead. 194 | """.format(SUPPORTED_IMAGE_FORMATS.keys()) 195 | print('Creating {} {} files located at {} of {}x{} resolution with a base ' 196 | 'base filename of {}'.format(count, image_format, path, width, 197 | height, name)) 198 | _try_create_directory(path) 199 | combined_path = os.path.join(path, name) 200 | 201 | # Expected to yield a thread pool equivalent to the number of CPU cores in 202 | # the system. 203 | pool = Pool() 204 | try: 205 | start_time = perf_counter() 206 | # NOTE: For very large image counts on memory-constrained systems, this 207 | # can stall-out. Either reduce the image count request, or increase the 208 | # chunk size. 209 | pool.starmap(_image_creation, 210 | ((combined_path, width, height, seed, image_format, n) 211 | for n in range(count)), 212 | chunksize=chunksize) 213 | finally: 214 | pool.close() 215 | pool.join() 216 | 217 | stop_time = perf_counter() 218 | 219 | if size: 220 | _print_image_information(path) 221 | 222 | print('Created {} files in {} seconds'.format(count, stop_time-start_time)) 223 | 224 | 225 | def _record_slice( 226 | source_path: str, 227 | dest_path: str, 228 | name: str, 229 | image_files: List[str], 230 | images_per_file: int, 231 | num_of_records: int 232 | ) -> Generator[Tuple[str, str, str, List[str], int], None, None]: 233 | """ 234 | Generate subcomponents for a thread. 235 | 236 | While creating RecordIO files, a tuple needs to be generated to pass to 237 | every thread in a multiprocessing pool. Each tuple corresponds with a 238 | unique record file with a new path, name, and subset of images. The subset 239 | of images is calculated by taking the first N-images where 240 | N = (total images) / (number of records). The next subset begins at N + 1 241 | and so on. 242 | 243 | Parameters 244 | ---------- 245 | source_path : string 246 | Path to the directory where the input images are stored. 247 | dest_path : string 248 | Path to the directory where the record files should be saved. Will be 249 | created if it does not exist. 250 | name : string 251 | A ``string`` to prepend to all filenames, such as `random_record_`. 252 | Filenames will end with a counter starting at zero, followed by the 253 | file format's extension. 254 | image_files : list 255 | A ``list`` of ``strings`` of the image filenames to use for the record 256 | files. 257 | images_per_file : int 258 | The number of images to include per record file. 259 | num_of_records : int 260 | The total number of record files to create. Note that one record 261 | assumes a record file plus a corresponding index file. 262 | 263 | Returns 264 | ------- 265 | Generator 266 | Yields a ``tuple`` of objects specific to each record file. The tuple 267 | includes the `source_path` as a ``string``, `dest_path` as a 268 | ``string``, `name` as a ``string``, a subset of image names from 269 | `image_files` as a ``list`` of ``strings``, and a counter for the 270 | record file starting at 0 as an ``int``. 271 | """ 272 | for num in range(num_of_records): 273 | subset = num * images_per_file 274 | yield (source_path, 275 | dest_path, 276 | name, 277 | image_files[subset:(subset + images_per_file)], 278 | num) 279 | 280 | 281 | def create_recordio( 282 | source_path: str, 283 | dest_path: str, 284 | name: str, 285 | img_per_file: int 286 | ) -> NoReturn: 287 | """ 288 | Create RecordIO files based on standard images. 289 | 290 | Generate one or multiple RecordIO records based on standard input images. 291 | Records are created by specifying an input path containing standard image 292 | files in JPG, PNG, or BMP format, an output directory to save the images 293 | to, a name to prepend the records with, and the number of record files to 294 | generate. Each record file contains N images where N is the total number of 295 | images in the input directory divided by the number of images per record 296 | file. Images are pulled sequentially from the input directory and placed 297 | into each record. 298 | 299 | Parameters 300 | ---------- 301 | source_path : string 302 | Path to the directory where the input images are stored. 303 | dest_path : string 304 | Path to the directory where the record files should be saved. Will be 305 | created if it does not exist. 306 | name : string 307 | A ``string`` to prepend to all filenames, such as `random_record_`. 308 | Filenames will end with a counter starting at zero, followed by the 309 | file format's extension. 310 | images_per_file : int 311 | The number of images to include per record file. 312 | """ 313 | print('Creating RecordIO files at {} from {} targeting {} files per ' 314 | 'record with a base filename of {}'.format(dest_path, 315 | source_path, 316 | img_per_file, 317 | name)) 318 | if not IRHeader: 319 | raise ImportError('MXNet not found! Please install MXNet dependency ' 320 | 'using "pip install nvidia-imageinary[\'mxnet\']".') 321 | image_files = [] 322 | source_path = os.path.abspath(source_path) 323 | dest_path = os.path.abspath(dest_path) 324 | _check_directory_exists(source_path) 325 | _try_create_directory(dest_path) 326 | 327 | _print_image_information(source_path) 328 | 329 | for image_name in os.listdir(source_path): 330 | if not os.path.isdir(os.path.join(source_path, image_name)): 331 | image_files.append(image_name) 332 | 333 | num_of_records = ceil(len(image_files) / img_per_file) 334 | pool = Pool() 335 | try: 336 | start_time = perf_counter() 337 | pool.starmap(_recordio_creation, 338 | _record_slice(source_path, 339 | dest_path, 340 | name, 341 | image_files, 342 | img_per_file, 343 | num_of_records)) 344 | finally: 345 | pool.close() 346 | pool.join() 347 | 348 | stop_time = perf_counter() 349 | print('Completed in {} seconds'.format(stop_time-start_time)) 350 | 351 | 352 | def create_tfrecords( 353 | source_path: str, 354 | dest_path: str, 355 | name: str, 356 | img_per_file: int 357 | ) -> NoReturn: 358 | """ 359 | Create TFRecords based on standard images. 360 | 361 | Generate one or multiple TFRecords based on standard input images. Records 362 | are created by specifying an input path containing standard image files in 363 | JPG, PNG, or BMP format, an output directory to save the images to, a name 364 | to prepend the records with, and the number of record files to generate. 365 | Each record file contains N images where N is the total number of images in 366 | the input directory divided by the number of images per record file. Images 367 | are pulled sequentially from the input directory and placed into each 368 | record. 369 | 370 | Parameters 371 | ---------- 372 | source_path : string 373 | Path to the directory where the input images are stored. 374 | dest_path : string 375 | Path to the directory where the record files should be saved. Will be 376 | created if it does not exist. 377 | name : string 378 | A ``string`` to prepend to all filenames, such as `random_record_`. 379 | Filenames will end with a counter starting at zero. 380 | images_per_file : int 381 | The number of images to include per record file. 382 | """ 383 | print('Creating TFRecord files at {} from {} targeting {} files per ' 384 | 'TFRecord with a base filename of {}'.format(dest_path, 385 | source_path, 386 | img_per_file, 387 | name)) 388 | if not TFRecordWriter: 389 | raise ImportError('TensorFlow not found! Please install TensorFlow ' 390 | 'dependency using "pip install ' 391 | 'nvidia-imageinary[\'tfrecord\']".') 392 | _check_directory_exists(source_path) 393 | _try_create_directory(dest_path) 394 | combined_path = os.path.join(dest_path, name) 395 | 396 | _print_image_information(source_path) 397 | 398 | image_count = 0 399 | record = 0 400 | 401 | start_time = perf_counter() 402 | writer = TFRecordWriter(combined_path + str(record)) 403 | for image_name in os.listdir(source_path): 404 | image_path = os.path.join(source_path, image_name) 405 | if os.path.isdir(image_path): 406 | continue 407 | image_count += 1 408 | if image_count > img_per_file: 409 | image_count = 1 410 | writer.close() 411 | record += 1 412 | writer = TFRecordWriter(combined_path + str(record)) 413 | 414 | with open(image_path, 'rb') as image_file: 415 | image = image_file.read() 416 | feature = { 417 | 'image/encoded': Feature(bytes_list=BytesList(value=[image])), 418 | 'image/class/label': Feature(int64_list=Int64List(value=[0])) 419 | } 420 | 421 | tfrecord_entry = Example(features=Features(feature=feature)) 422 | writer.write(tfrecord_entry.SerializeToString()) 423 | 424 | writer.close() 425 | stop_time = perf_counter() 426 | 427 | print('Completed in {} seconds'.format(stop_time-start_time)) 428 | 429 | 430 | def _print_image_information(path: str) -> NoReturn: 431 | """ 432 | Print the image and directory size. 433 | 434 | Print the size of the first image in the directory, which is assumed to be 435 | a good approximator for the average image size of all images in the 436 | directory, as well as the total size of the directory, in bytes. 437 | 438 | Parameters 439 | ---------- 440 | path : string 441 | The path to the directory where generated images are stored. 442 | """ 443 | is_first_image = True 444 | first_image_size = 0 445 | directory_size = 0 446 | for image_name in os.listdir(path): 447 | image_path = os.path.join(path, image_name) 448 | if os.path.isdir(image_path): 449 | continue 450 | directory_size += os.path.getsize(image_path) 451 | if is_first_image: 452 | first_image_size = directory_size 453 | is_first_image = False 454 | print('First image size from {}, in bytes: {}'.format(path, 455 | first_image_size)) 456 | print('Directory {} size, in bytes: {}'.format(path, directory_size)) 457 | 458 | 459 | def _recordio_creation( 460 | source_path: str, 461 | dest_path: str, 462 | name: str, 463 | image_files: List[str], 464 | n: int 465 | ) -> NoReturn: 466 | """ 467 | Create a RecordIO file based on input images. 468 | 469 | Given a subset of images, a RecordIO file should be created with a 470 | corresponding index file with the given name and counter. 471 | 472 | Parameters 473 | ---------- 474 | source_path : string 475 | Path to the directory where the input images are stored. 476 | dest_path : string 477 | Path to the directory where the record files should be saved. Will be 478 | created if it does not exist. 479 | name : string 480 | A ``string`` to prepend the record filename with. 481 | image_files : list 482 | A ``list`` of ``strings`` of image filenames to be used for the record 483 | creation. 484 | n : int 485 | An ``integer`` of the current count the record file points to, starting 486 | at zero. 487 | """ 488 | combined_path = os.path.join(dest_path, name) 489 | regex = re.compile(r'\d+') 490 | dataset_rec = combined_path + str(n) + '.rec' 491 | dataset_idx = combined_path + str(n) + '.idx' 492 | recordio_ds = MXIndexedRecordIO(os.path.join(dest_path, dataset_idx), 493 | os.path.join(dest_path, dataset_rec), 494 | 'w') 495 | 496 | for image_name in image_files: 497 | image_path = os.path.join(source_path, image_name) 498 | image_index = int(regex.findall(image_name)[0]) 499 | header = IRHeader(0, 0, image_index, 0) 500 | image = open(image_path, "rb").read() 501 | packed_image = pack(header, image) 502 | recordio_ds.write_idx(image_index, packed_image) 503 | 504 | recordio_ds.close() 505 | 506 | 507 | def _image_creation( 508 | combined_path: str, 509 | width: int, 510 | height: int, 511 | seed: int, 512 | image_format: str, 513 | n: int 514 | ) -> NoReturn: 515 | """ 516 | Generate a random image. 517 | 518 | Given a name, dimensions, a seed, and an image format, a random image is 519 | generated by creating a numpy array of random data for the specified 520 | dimensions and three color channels, then converting the array to an image 521 | of the specified format and saving the result to the output directory with 522 | the requested name postfixed with with the zero-based image counter and the 523 | file extension. 524 | 525 | Parameters 526 | ---------- 527 | combined_path : string 528 | The full path to the output image file including the requested name as 529 | a prefix for the filename. 530 | width : int 531 | The width of the image to generate in pixels. 532 | height : int 533 | The height of the image to generate in pixels. 534 | image_format : str 535 | The format the images should be saved as. 536 | n : int 537 | The zero-based counter for the image. 538 | """ 539 | numpy.random.seed(seed + n) 540 | a = numpy.random.rand(height, width, 3) * 255 541 | file_ext = SUPPORTED_IMAGE_FORMATS.get(image_format.lower(), 'png') 542 | if file_ext == "jpg": 543 | im_out = Image.fromarray(a.astype('uint8')).convert('RGB') 544 | else: 545 | im_out = Image.fromarray(a.astype('uint8')).convert('RGBA') 546 | 547 | im_out.save('%s%d.%s' % (combined_path, n, file_ext)) 548 | 549 | 550 | def _main() -> NoReturn: 551 | """ 552 | Randomly generate images or record files. 553 | 554 | Create standard images or record files using randomized data to be ingested 555 | into a deep learning application. 556 | """ 557 | args = _parse_args() 558 | if args.command == STANDARD_IMAGE: 559 | create_images(args.path, args.name, args.width, args.height, 560 | args.count, args.image_format, args.seed, args.size) 561 | elif args.command == TFRECORD: 562 | create_tfrecords(args.source_path, args.dest_path, args.name, 563 | args.img_per_file) 564 | elif args.command == RECORDIO: 565 | create_recordio(args.source_path, args.dest_path, args.name, 566 | args.img_per_file) 567 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.10.0 2 | astunparse==1.6.3 3 | attrs==20.2.0 4 | cachetools==4.1.0 5 | certifi==2020.4.5.1 6 | chardet==3.0.4 7 | coverage==5.3 8 | gast==0.4.0 9 | google-auth==1.28.0 10 | google-auth-oauthlib==0.4.1 11 | google-pasta==0.2.0 12 | graphviz==0.8.4 13 | grpcio==1.34.0 14 | h5py==3.1.0 15 | idna==2.9 16 | importlib-metadata==1.6.0 17 | iniconfig==1.1.1 18 | libclang==12.0.0 19 | keras==2.7.0 20 | Keras-Preprocessing==1.1.2 21 | Markdown==3.2.2 22 | mxnet==1.6.0 23 | numpy==1.21.0 24 | oauthlib==3.1.0 25 | opt-einsum==3.3.0 26 | packaging==20.4 27 | Pillow==9.0.0 28 | pluggy==0.13.1 29 | protobuf==3.15.0 30 | py==1.10.0 31 | pyasn1==0.4.8 32 | pyasn1-modules==0.2.8 33 | pycodestyle==2.6.0 34 | pyparsing==2.4.7 35 | pytest==6.1.1 36 | pytest-cov==2.10.1 37 | requests==2.26.0 38 | requests-oauthlib==1.3.0 39 | rsa==4.7 40 | scipy==1.4.1 41 | six==1.15.0 42 | tensorboard==2.7.0 43 | tensorboard-plugin-wit==1.6.0.post3 44 | tensorflow==2.7.0 45 | tensorflow-estimator==2.7.0 46 | tensorflow-io-gcs-filesystem==0.23.1 47 | termcolor==1.1.0 48 | toml==0.10.1 49 | urllib3==1.26.5 50 | Werkzeug==1.0.1 51 | wrapt==1.12.1 52 | zipp==3.1.0 53 | -------------------------------------------------------------------------------- /requirements.txt.tf1: -------------------------------------------------------------------------------- 1 | absl-py==0.9.0 2 | astor==0.8.1 3 | attrs==20.2.0 4 | certifi==2020.4.5.2 5 | chardet==3.0.4 6 | coverage==5.3 7 | gast==0.3.3 8 | google-pasta==0.2.0 9 | graphviz==0.8.4 10 | grpcio==1.29.0 11 | h5py==2.10.0 12 | idna==2.9 13 | importlib-metadata==1.6.0 14 | iniconfig==1.1.1 15 | Keras-Applications==1.0.8 16 | Keras-Preprocessing==1.1.2 17 | Markdown==3.2.2 18 | mxnet==1.6.0 19 | numpy==1.18.4 20 | packaging==20.4 21 | Pillow==7.1.2 22 | pluggy==0.13.1 23 | protobuf==3.12.2 24 | py==1.9.0 25 | pycodestyle==2.6.0 26 | pyparsing==2.4.7 27 | pytest==6.1.2 28 | pytest-cov==2.10.1 29 | requests==2.23.0 30 | six==1.15.0 31 | tensorboard==1.14.0 32 | tensorflow==1.14.0 33 | tensorflow-estimator==1.14.0 34 | termcolor==1.1.0 35 | toml==0.10.1 36 | urllib3==1.25.9 37 | Werkzeug==1.0.1 38 | wrapt==1.12.1 39 | zipp==3.1.0 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open('README.md', 'r') as f: 4 | long_description = f.read() 5 | 6 | with open('VERSION', 'r') as f: 7 | version = f.read().strip() 8 | 9 | extras = { 10 | 'tfrecord': ['tensorflow >= 1.14.0,!=2.0.x,!=2.1.x,!=2.2.0,!=2.4.0'], 11 | 'mxnet': ['mxnet >= 1.6.0,!=1.8.0'] 12 | } 13 | 14 | extras['all'] = [item for group in extras.values() for item in group] 15 | 16 | setup( 17 | name='nvidia-imageinary', 18 | author='NVIDIA Corporation', 19 | author_email='roclark@nvidia.com', 20 | version=version, 21 | description='A tool to randomly generate image datasets of various resolutions', 22 | long_description=long_description, 23 | packages=find_packages(include=['imagine'], exclude=['tests']), 24 | license='Apache 2.0', 25 | python_requires='>=3.7', 26 | entry_points={ 27 | 'console_scripts': ['imagine=imagine:_main'] 28 | }, 29 | install_requires=[ 30 | 'numpy >= 1.18.0', 31 | 'Pillow >= 7.1.2' 32 | ], 33 | extras_require=extras 34 | ) 35 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Imageinary/aa44843e7671c0ebac032e39ef66508d3fa9b55c/tests/__init__.py -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Imageinary/aa44843e7671c0ebac032e39ef66508d3fa9b55c/tests/functional/__init__.py -------------------------------------------------------------------------------- /tests/functional/test_jpg_creation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | import re 16 | import os 17 | from glob import glob 18 | from imagine import create_images 19 | from PIL import Image 20 | 21 | 22 | class TestJPGCreation: 23 | @pytest.fixture(autouse=True) 24 | def setup(self, tmpdir): 25 | self.tmpdir = tmpdir.mkdir('jpg_files') 26 | 27 | def teardown_method(self): 28 | for image in glob(f'{str(self.tmpdir)}/*'): 29 | os.remove(image) 30 | os.rmdir(str(self.tmpdir)) 31 | 32 | def test_creating_one_hundred_images(self): 33 | create_images( 34 | str(self.tmpdir), 35 | 'tmp_', 36 | 1920, 37 | 1080, 38 | 100, 39 | 'jpg', 40 | 0, 41 | False 42 | ) 43 | 44 | images = glob(f'{str(self.tmpdir)}/*') 45 | 46 | assert len(images) == 100 47 | for image in images: 48 | assert re.search(r'tmp_\d+.jpg', image) 49 | with Image.open(image) as im: 50 | assert im.size == (1920, 1080) 51 | 52 | def test_creating_one_hundred_4K_images(self): 53 | create_images( 54 | str(self.tmpdir), 55 | 'tmp_', 56 | 3840, 57 | 2160, 58 | 100, 59 | 'jpg', 60 | 0, 61 | False 62 | ) 63 | 64 | images = glob(f'{str(self.tmpdir)}/*') 65 | 66 | assert len(images) == 100 67 | for image in images: 68 | assert re.search(r'tmp_\d+.jpg', image) 69 | with Image.open(image) as im: 70 | assert im.size == (3840, 2160) 71 | -------------------------------------------------------------------------------- /tests/functional/test_png_creation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | import re 16 | import os 17 | from glob import glob 18 | from imagine import create_images 19 | from PIL import Image 20 | 21 | 22 | class TestPNGCreation: 23 | @pytest.fixture(autouse=True) 24 | def setup(self, tmpdir): 25 | self.tmpdir = tmpdir.mkdir('png_files') 26 | 27 | def teardown_method(self): 28 | for image in glob(f'{str(self.tmpdir)}/*'): 29 | os.remove(image) 30 | os.rmdir(str(self.tmpdir)) 31 | 32 | def test_creating_one_hundred_images(self): 33 | create_images( 34 | str(self.tmpdir), 35 | 'tmp_', 36 | 1920, 37 | 1080, 38 | 100, 39 | 'png', 40 | 0, 41 | False 42 | ) 43 | 44 | images = glob(f'{str(self.tmpdir)}/*') 45 | 46 | assert len(images) == 100 47 | for image in images: 48 | assert re.search(r'tmp_\d+.png', image) 49 | with Image.open(image) as im: 50 | assert im.size == (1920, 1080) 51 | 52 | def test_creating_one_hundred_4K_images(self): 53 | create_images( 54 | str(self.tmpdir), 55 | 'tmp_', 56 | 3840, 57 | 2160, 58 | 100, 59 | 'png', 60 | 0, 61 | False 62 | ) 63 | 64 | images = glob(f'{str(self.tmpdir)}/*') 65 | 66 | assert len(images) == 100 67 | for image in images: 68 | assert re.search(r'tmp_\d+.png', image) 69 | with Image.open(image) as im: 70 | assert im.size == (3840, 2160) 71 | -------------------------------------------------------------------------------- /tests/functional/test_recordio.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | import re 16 | import os 17 | from glob import glob 18 | from imagine import create_images, create_recordio 19 | from PIL import Image 20 | 21 | 22 | class TestRecordIOCreation: 23 | @pytest.fixture(autouse=True) 24 | def setup(self, tmpdir): 25 | self.tmpdir = tmpdir.mkdir('input_files') 26 | self.outdir = tmpdir.mkdir('output_files') 27 | 28 | def teardown_method(self): 29 | for image in glob(f'{str(self.tmpdir)}/*'): 30 | os.remove(image) 31 | for record in glob(f'{str(self.outdir)}/*'): 32 | os.remove(record) 33 | os.rmdir(str(self.tmpdir)) 34 | os.rmdir(str(self.outdir)) 35 | 36 | def test_creating_recordio_from_100_jpgs(self): 37 | # Create sample images which will be used as a basis. 38 | create_images( 39 | str(self.tmpdir), 40 | 'tmp_', 41 | 1920, 42 | 1080, 43 | 100, 44 | 'jpg', 45 | 0, 46 | False 47 | ) 48 | create_recordio( 49 | str(self.tmpdir), 50 | str(self.outdir), 51 | 'tmprecord_', 52 | 100 53 | ) 54 | 55 | records = glob(f'{str(self.outdir)}/*') 56 | 57 | assert len(records) == 2 58 | for record in records: 59 | assert 'tmprecord_0.idx' in record or \ 60 | 'tmprecord_0.rec' in record 61 | 62 | def test_creating_recordio_from_100_pngs(self): 63 | # Create sample images which will be used as a basis. 64 | create_images( 65 | str(self.tmpdir), 66 | 'tmp_', 67 | 1920, 68 | 1080, 69 | 100, 70 | 'png', 71 | 0, 72 | False 73 | ) 74 | create_recordio( 75 | str(self.tmpdir), 76 | str(self.outdir), 77 | 'tmprecord_', 78 | 100 79 | ) 80 | 81 | records = glob(f'{str(self.outdir)}/*') 82 | 83 | assert len(records) == 2 84 | for record in records: 85 | assert 'tmprecord_0.idx' in record or \ 86 | 'tmprecord_0.rec' in record 87 | 88 | def test_creating_recordio_from_100_jpg_multiple_files(self): 89 | # Create sample images which will be used as a basis. 90 | create_images( 91 | str(self.tmpdir), 92 | 'tmp_', 93 | 1920, 94 | 1080, 95 | 100, 96 | 'jpg', 97 | 0, 98 | False 99 | ) 100 | create_recordio( 101 | str(self.tmpdir), 102 | str(self.outdir), 103 | 'tmprecord_', 104 | 10 105 | ) 106 | 107 | records = glob(f'{str(self.outdir)}/*') 108 | 109 | assert len(records) == 20 110 | for record in records: 111 | assert re.search(r'tmprecord_\d+.idx', record) or \ 112 | re.search(r'tmprecord_\d+.rec', record) 113 | 114 | def test_creating_recordio_from_100_pngs_multiple_files(self): 115 | # Create sample images which will be used as a basis. 116 | create_images( 117 | str(self.tmpdir), 118 | 'tmp_', 119 | 1920, 120 | 1080, 121 | 100, 122 | 'png', 123 | 0, 124 | False 125 | ) 126 | create_recordio( 127 | str(self.tmpdir), 128 | str(self.outdir), 129 | 'tmprecord_', 130 | 10 131 | ) 132 | 133 | records = glob(f'{str(self.outdir)}/*') 134 | 135 | assert len(records) == 20 136 | for record in records: 137 | assert re.search(r'tmprecord_\d+.idx', record) or \ 138 | re.search(r'tmprecord_\d+.rec', record) 139 | -------------------------------------------------------------------------------- /tests/functional/test_tfrecord.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | import re 16 | import os 17 | from glob import glob 18 | from imagine import create_images, create_tfrecords 19 | from PIL import Image 20 | 21 | 22 | class TestTFRecordCreation: 23 | @pytest.fixture(autouse=True) 24 | def setup(self, tmpdir): 25 | self.tmpdir = tmpdir.mkdir('input_files') 26 | self.outdir = tmpdir.mkdir('output_files') 27 | 28 | def teardown_method(self): 29 | for image in glob(f'{str(self.tmpdir)}/*'): 30 | os.remove(image) 31 | for record in glob(f'{str(self.outdir)}/*'): 32 | os.remove(record) 33 | os.rmdir(str(self.tmpdir)) 34 | os.rmdir(str(self.outdir)) 35 | 36 | def test_creating_tfrecord_from_100_jpgs(self): 37 | # Create sample images which will be used as a basis. 38 | create_images( 39 | str(self.tmpdir), 40 | 'tmp_', 41 | 1920, 42 | 1080, 43 | 100, 44 | 'jpg', 45 | 0, 46 | False 47 | ) 48 | create_tfrecords( 49 | str(self.tmpdir), 50 | str(self.outdir), 51 | 'tmprecord_', 52 | 100 53 | ) 54 | 55 | records = glob(f'{str(self.outdir)}/*') 56 | 57 | assert len(records) == 1 58 | assert 'tmprecord_0' in records[0] 59 | 60 | def test_creating_tfrecord_from_100_pngs(self): 61 | # Create sample images which will be used as a basis. 62 | create_images( 63 | str(self.tmpdir), 64 | 'tmp_', 65 | 1920, 66 | 1080, 67 | 100, 68 | 'png', 69 | 0, 70 | False 71 | ) 72 | create_tfrecords( 73 | str(self.tmpdir), 74 | str(self.outdir), 75 | 'tmprecord_', 76 | 100 77 | ) 78 | 79 | records = glob(f'{str(self.outdir)}/*') 80 | 81 | assert len(records) == 1 82 | assert 'tmprecord_0' in records[0] 83 | 84 | def test_creating_tfrecord_from_100_jpg_multiple_files(self): 85 | # Create sample images which will be used as a basis. 86 | create_images( 87 | str(self.tmpdir), 88 | 'tmp_', 89 | 1920, 90 | 1080, 91 | 100, 92 | 'jpg', 93 | 0, 94 | False 95 | ) 96 | create_tfrecords( 97 | str(self.tmpdir), 98 | str(self.outdir), 99 | 'tmprecord_', 100 | 10 101 | ) 102 | 103 | records = glob(f'{str(self.outdir)}/*') 104 | 105 | assert len(records) == 10 106 | for record in records: 107 | assert re.search(r'tmprecord_\d+', record) 108 | 109 | def test_creating_tfrecord_from_100_pngs_multiple_files(self): 110 | # Create sample images which will be used as a basis. 111 | create_images( 112 | str(self.tmpdir), 113 | 'tmp_', 114 | 1920, 115 | 1080, 116 | 100, 117 | 'png', 118 | 0, 119 | False 120 | ) 121 | create_tfrecords( 122 | str(self.tmpdir), 123 | str(self.outdir), 124 | 'tmprecord_', 125 | 10 126 | ) 127 | 128 | records = glob(f'{str(self.outdir)}/*') 129 | 130 | assert len(records) == 10 131 | for record in records: 132 | assert re.search(r'tmprecord_\d+', record) 133 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Imageinary/aa44843e7671c0ebac032e39ef66508d3fa9b55c/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/test_units.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | import os 16 | from imagine import imagine 17 | 18 | 19 | class TestUnits: 20 | @pytest.fixture(autouse=True) 21 | def setup(self, tmpdir): 22 | self.tmpdir = tmpdir 23 | 24 | def teardown_method(self): 25 | try: 26 | os.rmdir(str(self.tmpdir)) 27 | except OSError: 28 | # The directory wasn't created, as expected 29 | pass 30 | 31 | def test_directory_creation_if_not_exist(self): 32 | imagine._try_create_directory(str(self.tmpdir)) 33 | 34 | def test_error_input_directory_doesnt_exist(self): 35 | with pytest.raises(RuntimeError): 36 | imagine._check_directory_exists(os.path.join(str(self.tmpdir), 37 | 'dne')) 38 | 39 | def test_record_slice_yields_expected_results(self): 40 | slices = [range(x, x + 100) for x in range(0, 1000, 100)] 41 | results = imagine._record_slice(self.tmpdir, 42 | self.tmpdir, 43 | 'test_record_', 44 | range(0, 1000), 45 | 100, 46 | 10) 47 | 48 | for count, result in enumerate(results): 49 | source, dest, name, images, num = result 50 | assert source == self.tmpdir 51 | assert dest == self.tmpdir 52 | assert name == 'test_record_' 53 | assert images == slices[count] 54 | assert num == count 55 | # Enumerate is 0-based, so the final number will be 9 for 10 records 56 | assert count == 10 - 1 57 | --------------------------------------------------------------------------------