├── .github
    └── workflows
    │   └── pushtest.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── VERSION
├── imagine
    ├── __init__.py
    └── imagine.py
├── requirements.txt
├── requirements.txt.tf1
├── setup.py
└── tests
    ├── __init__.py
    ├── functional
        ├── __init__.py
        ├── test_jpg_creation.py
        ├── test_png_creation.py
        ├── test_recordio.py
        └── test_tfrecord.py
    └── unit
        ├── __init__.py
        └── test_units.py


/.github/workflows/pushtest.yml:
--------------------------------------------------------------------------------
 1 | name: Imageinary push tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   test_tf2:
 7 |     name: Test and lint code for TensorFlow 2
 8 |     runs-on: ${{ matrix.operating-system }}
 9 |     strategy:
10 |       matrix:
11 |         python-version: [3.7, 3.8]
12 |         operating-system: [ubuntu-latest, macOS-latest]
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v1
16 |     - name: Set up Python ${{ matrix.python-version }}
17 |       uses: actions/setup-python@v1
18 |       with:
19 |         python-version: ${{ matrix.python-version }}
20 |     - name: Install dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         pip install -r requirements.txt
24 |     - name: Run unit and functional tests with pytest
25 |       run: |
26 |         pytest --cov=imagine --cov-report term-missing tests/
27 |     - name: Lint with pycodestyle
28 |       run: |
29 |         pycodestyle imagine/ tests/
30 |     - name: Build the latest binary
31 |       run: |
32 |         python setup.py sdist bdist_wheel
33 |     - name: Install the built wheel and test usage (UNIX)
34 |       run: |
35 |         pip install --ignore-installed dist/nvidia_imageinary-*-py3-none-any.whl
36 |         imagine --help
37 |     - name: Install the built wheel with all dependencies (UNIX)
38 |       run: |
39 |         pip install nvidia-imageinary['all']
40 |         imagine --help
41 |         pip freeze | grep "tensorflow"
42 |         pip freeze | grep "mxnet"
43 |         
44 |   test_tf1:
45 |     name: Test code for TensorFlow 1.x
46 |     runs-on: ${{ matrix.operating-system }}
47 |     strategy:
48 |       matrix:
49 |         python-version: [3.7]
50 |         operating-system: [ubuntu-latest, macOS-latest]
51 | 
52 |     steps:
53 |     - uses: actions/checkout@v1
54 |     - name: Set up Python ${{ matrix.python-version }}
55 |       uses: actions/setup-python@v1
56 |       with:
57 |         python-version: ${{ matrix.python-version }}
58 |     - name: Install dependencies
59 |       run: |
60 |         python -m pip install --upgrade pip
61 |         pip install -r requirements.txt.tf1
62 |     - name: Run unit and functional tests with pytest
63 |       run: |
64 |         pytest --cov=imagine --cov-report term-missing tests/
65 |     - name: Lint with pycodestyle
66 |       run: |
67 |         pycodestyle imagine/ tests/
68 |     - name: Build the latest binary
69 |       run: |
70 |         python setup.py sdist bdist_wheel
71 |     - name: Install the built wheel and test usage (UNIX)
72 |       run: |
73 |         pip install --ignore-installed dist/nvidia_imageinary-*-py3-none-any.whl
74 |         imagine --help
75 |     - name: Install the built wheel with all dependencies (UNIX)
76 |       run: |
77 |         pip install tensorflow==1.14.0 nvidia-imageinary['all']
78 |         imagine --help
79 |         pip freeze | grep "tensorflow"
80 |         pip freeze | grep "mxnet"
81 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *pycache*
2 | *vscode*
3 | *env*
4 | *coverage*
5 | build/*
6 | dist/*
7 | *egg*
8 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribute to Imageinary
 2 | 
 3 | Before contributing to Imageinary, we require all users to sign-off on their
 4 | work.
 5 | 
 6 | ## Sign your work
 7 | 
 8 | The sign-off is a simple line at the end of the explanation for the patch. Your
 9 | signature certifies that you wrote the patch or otherwise have the right to pass
10 | it on as an open-source patch. The rules are pretty simple: if you can certify
11 | the below (from [developercertificate.org](http://developercertificate.org/)):
12 | 
13 | ```
14 | Developer Certificate of Origin
15 | Version 1.1
16 | 
17 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
18 | 1 Letterman Drive
19 | Suite D4700
20 | San Francisco, CA, 94129
21 | 
22 | Everyone is permitted to copy and distribute verbatim copies of this
23 | license document, but changing it is not allowed.
24 | 
25 | Developer's Certificate of Origin 1.1
26 | 
27 | By making a contribution to this project, I certify that:
28 | 
29 | (a) The contribution was created in whole or in part by me and I
30 |     have the right to submit it under the open source license
31 |     indicated in the file; or
32 | 
33 | (b) The contribution is based upon previous work that, to the best
34 |     of my knowledge, is covered under an appropriate open source
35 |     license and I have the right under that license to submit that
36 |     work with modifications, whether created in whole or in part
37 |     by me, under the same open source license (unless I am
38 |     permitted to submit under a different license), as indicated
39 |     in the file; or
40 | 
41 | (c) The contribution was provided directly to me by some other
42 |     person who certified (a), (b) or (c) and I have not modified
43 |     it.
44 | 
45 | (d) I understand and agree that this project and the contribution
46 |     are public and that a record of the contribution (including all
47 |     personal information I submit with it, including my sign-off) is
48 |     maintained indefinitely and may be redistributed consistent with
49 |     this project or the open source license(s) involved.
50 | ```
51 | 
52 | Then you just add a line to every git commit message:
53 | 
54 |     Signed-off-by: Joe Smith <joe.smith@email.com>
55 | 
56 | Use your real name (sorry, no pseudonyms or anonymous contributions.)
57 | 
58 | If you set your `user.name` and `user.email` git configs, you can sign your
59 | commit automatically with `git commit -s`.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2020 NVIDIA CORPORATION
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Imageinary
  2 | Imageinary is a reproducible mechanism which is used to generate large image
  3 | datasets at various resolutions. The tool supports multiple image types,
  4 | including JPEGs, PNGs, BMPs, RecordIO, and TFRecord files.
  5 | 
  6 | ## Use Cases
  7 | While benchmarking deep learning applications involving images, there are
  8 | typically only a handful of public datasets that can be used and they tend to
  9 | have small and limited image sizes. In an effort to run DL tests against various
 10 | input sizes, we designed a tool to quickly and easily generate images of
 11 | variable dimensions and types which can be fed to convolutional neural networks
 12 | and deep learning pipelines.
 13 | 
 14 | The images are generated using random numpy arrays and are then converted to the
 15 | requested output format and saved to the specified location.
 16 | 
 17 | ## Requirements
 18 |   * Python 3.7 or greater
 19 |   * TensorFlow 2 or TensorFlow 1.14
 20 |   * MXNet
 21 |   * Pillow
 22 |   * Numpy
 23 | 
 24 | ## Installation
 25 | The application is available on `nvidia-pyindex` and can be downloaded and
 26 | installed using PIP.
 27 | 
 28 | ### Virtual Environment
 29 | It is recommended to run this program in a Python virtual environment to avoid
 30 | dependency interference. The virtual environment can be installed and activated
 31 | with:
 32 | 
 33 | ```bash
 34 | pip install virtualenv
 35 | virtualenv --python python3 env
 36 | source env/bin/activate
 37 | ```
 38 | 
 39 | Once finished using the application, you can leave the virtual environment with:
 40 | 
 41 | ```bash
 42 | deactivate
 43 | ```
 44 | 
 45 | ### Pre-install
 46 | Before installing the package, the `nvidia-pyindex` package needs to be
 47 | installed to tell PIP where to look for the package. This needs to be installed
 48 | just once.
 49 | 
 50 | ```bash
 51 | pip install nvidia-pyindex
 52 | ```
 53 | 
 54 | ### Minimal Install
 55 | The minimal install supports standard image types, such as JPG, PNG, and BMP
 56 | and only installs the dependencies necessary for those tools.
 57 | 
 58 | ```bash
 59 | pip install nvidia-imageinary
 60 | ```
 61 | 
 62 | ### TFRecord Support
 63 | To add support for TFRecords in addition to the standard image types, TensorFlow
 64 | needs to be included as a dependency. This can be done by running the following
 65 | which installs TensorFlow alongside all other dependencies:
 66 | 
 67 | ```bash
 68 | pip install nvidia-imageinary['tfrecord']
 69 | ```
 70 | 
 71 | ### RecordIO Support
 72 | RecordIO files are supported using MXNet, which can be included as a dependency
 73 | using the following:
 74 | 
 75 | ```bash
 76 | pip install nvidia-imageinary['mxnet']
 77 | ```
 78 | 
 79 | ### Complete Install
 80 | If desired, all dependencies can be installed to support standard images,
 81 | TFRecords, and RecordIO files without installing extra packages later. Run the
 82 | following to install all dependencies:
 83 | 
 84 | ```bash
 85 | pip install nvidia-imageinary['all']
 86 | ```
 87 | 
 88 | ## Running
 89 | Imageinary supports many different image types which can be specified while
 90 | running the application.
 91 | 
 92 | ### JPEGs
 93 | A basic run to create 1000 4K JPEGs, and display the size of the first file and
 94 | all files in the target directory path (not including subdirectories):
 95 | 
 96 | ```bash
 97 | imagine create-images \
 98 |     --path /mnt/nvme/test_dir \
 99 |     --name random_image_ \
100 |     --width 3840 \
101 |     --height 2160 \
102 |     --count 1000 \
103 |     --image_format jpg \
104 |     --size
105 | ```
106 | 
107 | The above command will generate 1,000 unique JPEG images in the
108 | `/mnt/nvme/test_dir` directory. Each filename will begin with `random_image_`
109 | and end with an image number starting from 0, such as `random_image_0.jpg`,
110 | `random_image_1.jpg`, etc. The images will have dimensions of 3840x2160. The
111 | `--size` flag displays information on the images, such as the size of the first
112 | image and the size of the overall directory.
113 | 
114 | Note that for creating a very large number of images, systems can easily run out
115 | of memory. In this case, increase the `--chunksize` to reduce the amount of
116 | memory allocated by each multiprocessing pool.
117 | 
118 | ### TFRecords
119 | TFRecords can also be easily generated using the application. This command
120 | expects images to be pre-loaded to be used as the basis for the TFRecord files.
121 | 
122 | ```bash
123 | imagine create-tfrecords \
124 |     --source_path /mnt/nvme/test_dir \
125 |     --dest_path /mnt/nvme/tf_record_dir \
126 |     --name random_tfrecord_ \
127 |     --img_per_file 100
128 | ```
129 | 
130 | This command uses the JPEGs created during the previous step and creates
131 | TFRecords based on those images. The TFRecords will be saved to
132 | `/mnt/nvme/tf_record_dir` where each file will be comprised of 100 JPEGs.
133 | 
134 | ### RecordIO
135 | Similarly, RecordIO files can be generated with a single command:
136 | 
137 | ```bash
138 | imagine create-recordio \
139 |     --source_path /mnt/nvme/test_dir \
140 |     --dest_path /mnt/nvme/record_files \
141 |     --name random_recordio_ \
142 |     --img_per_file 100
143 | ```
144 | 
145 | This command uses the JPEGs created during the previous step and creates
146 | TFRecords based on those images. The TFRecords will be saved to
147 | `/mnt/nvme/record_files` where each file will be comprised of 100 JPEGs.
148 | 
149 | ## Importing
150 | Imageinary can be imported directly by a Python script to hook into a deep
151 | learning workflow.
152 | 
153 | To write random images of a specific size to the system, import the library and
154 | specify the requested parameters:
155 | 
156 | ```python
157 | from imagine import create_images
158 | 
159 | create_images('path/to/save/images',  # Path to save images
160 |               'random_image_prefix_',  # Image name prefix
161 |               3840,  # Width
162 |               2160,  # Height
163 |               1000,  # Number of images to create
164 |               'png')  # Image format
165 | ```
166 | 
167 | The above command will save 1000 random PNG images of dimension 3840x2160 to the
168 | `path/to/save/images` directory, each prefixed with `random_image_prefix_`.
169 | 
170 | Similary, TFRecords can be generated from Python:
171 | 
172 | ```python
173 | from imagine import create_tfrecords
174 | 
175 | create_tfrecords('path/to/saved_images',  # Path to saved images
176 |                  'path/to/save/tfrecords',  # Path to save TFRecords
177 |                  'random_tfrecord_',  # TFRecord file prefix
178 |                  100)  # Number of images per file
179 | ```
180 | 
181 | As can RecordIO files:
182 | 
183 | ```python
184 | from imagine import create_recordio
185 | 
186 | create_recordio('path/to/saved_images',  # Path to saved images
187 |                 'path/to/save/recordio',  # Path to save RecordIO files
188 |                 'random_recordio_',  # RecordIO file prefix
189 |                 100)  # Number of images per file
190 | ```
191 | 
192 | ## Testing
193 | This repository includes functional tests for the major modules listed above
194 | which can be verified locally using `pytest`. While in the virtual environment,
195 | run the following:
196 | 
197 | ```bash
198 | $ pytest --cov=imagine --cov-report term-missing tests/
199 | ```
200 | 
201 | This will output the test results including the overall coverage for the Python
202 | module.
203 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 1.1.3
2 | 


--------------------------------------------------------------------------------
/imagine/__init__.py:
--------------------------------------------------------------------------------
1 | from imagine.imagine import (create_images,
2 |                              create_recordio,
3 |                              create_tfrecords,
4 |                              _main)
5 | 


--------------------------------------------------------------------------------
/imagine/imagine.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | import os
 16 | import re
 17 | import numpy
 18 | from argparse import ArgumentParser, Namespace
 19 | from PIL import Image
 20 | from multiprocessing.pool import Pool
 21 | try:
 22 |     from mxnet.recordio import IRHeader, MXIndexedRecordIO, pack
 23 | except ImportError:
 24 |     IRHeader = None
 25 | from time import perf_counter
 26 | from typing import Generator, List, NoReturn, Optional, Tuple
 27 | from math import ceil
 28 | try:
 29 |     from tensorflow.io import TFRecordWriter
 30 |     from tensorflow.train import (BytesList,
 31 |                                   Example,
 32 |                                   Feature,
 33 |                                   Features,
 34 |                                   Int64List)
 35 | except ImportError:
 36 |     TFRecordWriter = None
 37 | 
 38 | 
 39 | STANDARD_IMAGE = 'create-images'
 40 | TFRECORD = 'create-tfrecord'
 41 | RECORDIO = 'create-recordio'
 42 | SUPPORTED_IMAGE_FORMATS = {"jpg": "jpg", "jpeg": "jpg", "bmp": "bmp",
 43 |                            "bitmap": "bmp", "png": "png"}
 44 | 
 45 | 
 46 | def _parse_args() -> Namespace:
 47 |     """
 48 |     Parse arguments passed to the application.
 49 | 
 50 |     A custom argument parser handles multiple commands and options to launch
 51 |     the desired function.
 52 | 
 53 |     Returns
 54 |     -------
 55 |     Namespace
 56 |         Returns a ``Namespace`` of all of the arguments that were parsed from
 57 |         the application during runtime.
 58 |     """
 59 |     message = """
 60 |     CLI for generating a fake dataset of various quantities at different
 61 |     resolutions.
 62 | 
 63 |     Supported file types: .bmp, .png, and .jpg.
 64 |     Supported record types: TFRecords, and RecordIO.
 65 |     TFRecords requires an external index file creation step.
 66 |     """
 67 |     parser = ArgumentParser(message)
 68 |     # Required positional command subparser which should be specified first
 69 |     commands = parser.add_subparsers(dest='command', metavar='command')
 70 |     commands_parent = ArgumentParser(add_help=False)
 71 | 
 72 |     # Options specific to record types
 73 |     commands_parent.add_argument('source_path', metavar='source-path',
 74 |                                  help='Path containing valid input images to '
 75 |                                  'convert to records')
 76 |     commands_parent.add_argument('dest_path', metavar='dest-path',
 77 |                                  help='Path to save record files to')
 78 |     commands_parent.add_argument('name', help='Name to prepend files with, '
 79 |                                  'such as "sample_record_"')
 80 |     commands_parent.add_argument('--img-per-file', type=int, default=1000)
 81 |     commands.add_parser(TFRECORD, help='Create TFRecords from input images',
 82 |                         parents=[commands_parent])
 83 |     commands.add_parser(RECORDIO, help='Create RecordIO from input images',
 84 |                         parents=[commands_parent])
 85 | 
 86 |     # Options specific to generating standard images
 87 |     standard = commands.add_parser(STANDARD_IMAGE, help='Generate random '
 88 |                                    'images')
 89 |     standard.add_argument('path', help='Path to save images to')
 90 |     standard.add_argument('name', help='Name to prepend files with, such as '
 91 |                           '"sample_image_"')
 92 |     standard.add_argument('image_format', metavar='image-format', help='The '
 93 |                           'image format to generate',
 94 |                           choices=SUPPORTED_IMAGE_FORMATS.keys())
 95 |     standard.add_argument('--width', help='The image width in pixels',
 96 |                           type=int, default=1920)
 97 |     standard.add_argument('--height', help='The image height in pixels',
 98 |                           type=int, default=1080)
 99 |     standard.add_argument('--count', help='The number of images to generate',
100 |                           type=int, default=1)
101 |     standard.add_argument('--seed', help='The seed to use while generating '
102 |                           'random image data', type=int, default=0)
103 |     standard.add_argument('--size', help='Display the first image size and '
104 |                           'the directory size for the images',
105 |                           action='store_true')
106 |     return parser.parse_args()
107 | 
108 | 
109 | def _try_create_directory(directory: str) -> NoReturn:
110 |     """
111 |     Create a directory if it doesn't exist.
112 | 
113 |     Given a name of a directory as a ``string``, a directory should be created
114 |     with the requested name if and only if it doesn't exist already. If the
115 |     directory exists, the function will return without any changes.
116 | 
117 |     Parameters
118 |     ----------
119 |     directory : string
120 |         A ``string`` of a path pointing to a directory to attempt to create.
121 |     """
122 |     os.makedirs(directory, exist_ok=True)
123 | 
124 | 
125 | def _check_directory_exists(directory: str) -> NoReturn:
126 |     """
127 |     Check if a directory exists.
128 | 
129 |     Check if a requested directory exists and raise an error if not.
130 | 
131 |     Parameters
132 |     ----------
133 |     directory : string
134 |         A ``string`` of the requested directory to check.
135 | 
136 |     Raises
137 |     ------
138 |     RuntimeError
139 |         Raises a ``RuntimeError`` if the requested directory does not exist.
140 |     """
141 |     if not os.path.exists(directory):
142 |         raise RuntimeError('Error: Please specify an input directory which '
143 |                            'contains valid images.')
144 | 
145 | 
146 | def create_images(
147 |     path: str,
148 |     name: str,
149 |     width: int,
150 |     height: int,
151 |     count: int,
152 |     image_format: str,
153 |     seed: Optional[int] = 0,
154 |     size: Optional[bool] = False,
155 |     chunksize: Optional[int] = 64
156 | ) -> NoReturn:
157 |     """
158 |     Randomly generate standard images.
159 | 
160 |     Generate random images of standard formats, such as JPG, PNG, and BMP of
161 |     variable height and width. Images are generated by creating a random numpy
162 |     array of the requested dimensions and converting the image to the desired
163 |     format. All images will be saved in the specified directory with each name
164 |     beginning with the passed ``name`` variable and ending with a counter
165 |     starting at zero.
166 | 
167 |     Parameters
168 |     ----------
169 |     path : string
170 |         The path to the directory to save images to. The directory will be
171 |         created if it doesn't exist.
172 |     name : string
173 |         A ``string`` to prepend to all filenames, such as `random_image_`.
174 |         Filenames will end with a counter starting at zero, followed by the
175 |         file format's extension.
176 |     width : int
177 |         The width of the image to generate in pixels.
178 |     height : int
179 |         The height of the image to generate in pixels.
180 |     count : int
181 |         The number of images to generate.
182 |     image_format : str
183 |         The format the images should be saved as. Choices are: {}
184 |     seed : int (optional)
185 |         A seed to use for numpy for creating the random image data. Defaults
186 |         to 0.
187 |     size : bool (optional)
188 |         If `True`, will print image size information including the size of the
189 |         first image and the final directory size.
190 |     chunksize : int (optional)
191 |         Specify the number of chunks to divide the requested amount of images
192 |         into. Higher chunksizes reduce the amount of memory consumed with minor
193 |         additional overhead.
194 |     """.format(SUPPORTED_IMAGE_FORMATS.keys())
195 |     print('Creating {} {} files located at {} of {}x{} resolution with a base '
196 |           'base filename of {}'.format(count, image_format, path, width,
197 |                                        height, name))
198 |     _try_create_directory(path)
199 |     combined_path = os.path.join(path, name)
200 | 
201 |     # Expected to yield a thread pool equivalent to the number of CPU cores in
202 |     # the system.
203 |     pool = Pool()
204 |     try:
205 |         start_time = perf_counter()
206 |         # NOTE: For very large image counts on memory-constrained systems, this
207 |         # can stall-out. Either reduce the image count request, or increase the
208 |         # chunk size.
209 |         pool.starmap(_image_creation,
210 |                      ((combined_path, width, height, seed, image_format, n)
211 |                       for n in range(count)),
212 |                      chunksize=chunksize)
213 |     finally:
214 |         pool.close()
215 |         pool.join()
216 | 
217 |     stop_time = perf_counter()
218 | 
219 |     if size:
220 |         _print_image_information(path)
221 | 
222 |     print('Created {} files in {} seconds'.format(count, stop_time-start_time))
223 | 
224 | 
225 | def _record_slice(
226 |     source_path: str,
227 |     dest_path: str,
228 |     name: str,
229 |     image_files: List[str],
230 |     images_per_file: int,
231 |     num_of_records: int
232 | ) -> Generator[Tuple[str, str, str, List[str], int], None, None]:
233 |     """
234 |     Generate subcomponents for a thread.
235 | 
236 |     While creating RecordIO files, a tuple needs to be generated to pass to
237 |     every thread in a multiprocessing pool. Each tuple corresponds with a
238 |     unique record file with a new path, name, and subset of images. The subset
239 |     of images is calculated by taking the first N-images where
240 |     N = (total images) / (number of records). The next subset begins at N + 1
241 |     and so on.
242 | 
243 |     Parameters
244 |     ----------
245 |     source_path : string
246 |         Path to the directory where the input images are stored.
247 |     dest_path : string
248 |         Path to the directory where the record files should be saved. Will be
249 |         created if it does not exist.
250 |     name : string
251 |         A ``string`` to prepend to all filenames, such as `random_record_`.
252 |         Filenames will end with a counter starting at zero, followed by the
253 |         file format's extension.
254 |     image_files : list
255 |         A ``list`` of ``strings`` of the image filenames to use for the record
256 |         files.
257 |     images_per_file : int
258 |         The number of images to include per record file.
259 |     num_of_records : int
260 |         The total number of record files to create. Note that one record
261 |         assumes a record file plus a corresponding index file.
262 | 
263 |     Returns
264 |     -------
265 |     Generator
266 |         Yields a ``tuple`` of objects specific to each record file. The tuple
267 |         includes the `source_path` as a ``string``, `dest_path` as a
268 |         ``string``, `name` as a ``string``, a subset of image names from
269 |         `image_files` as a ``list`` of ``strings``, and a counter for the
270 |         record file starting at 0 as an ``int``.
271 |     """
272 |     for num in range(num_of_records):
273 |         subset = num * images_per_file
274 |         yield (source_path,
275 |                dest_path,
276 |                name,
277 |                image_files[subset:(subset + images_per_file)],
278 |                num)
279 | 
280 | 
281 | def create_recordio(
282 |     source_path: str,
283 |     dest_path: str,
284 |     name: str,
285 |     img_per_file: int
286 | ) -> NoReturn:
287 |     """
288 |     Create RecordIO files based on standard images.
289 | 
290 |     Generate one or multiple RecordIO records based on standard input images.
291 |     Records are created by specifying an input path containing standard image
292 |     files in JPG, PNG, or BMP format, an output directory to save the images
293 |     to, a name to prepend the records with, and the number of record files to
294 |     generate. Each record file contains N images where N is the total number of
295 |     images in the input directory divided by the number of images per record
296 |     file. Images are pulled sequentially from the input directory and placed
297 |     into each record.
298 | 
299 |     Parameters
300 |     ----------
301 |     source_path : string
302 |         Path to the directory where the input images are stored.
303 |     dest_path : string
304 |         Path to the directory where the record files should be saved. Will be
305 |         created if it does not exist.
306 |     name : string
307 |         A ``string`` to prepend to all filenames, such as `random_record_`.
308 |         Filenames will end with a counter starting at zero, followed by the
309 |         file format's extension.
310 |     images_per_file : int
311 |         The number of images to include per record file.
312 |     """
313 |     print('Creating RecordIO files at {} from {} targeting {} files per '
314 |           'record with a base filename of {}'.format(dest_path,
315 |                                                      source_path,
316 |                                                      img_per_file,
317 |                                                      name))
318 |     if not IRHeader:
319 |         raise ImportError('MXNet not found! Please install MXNet dependency '
320 |                           'using "pip install nvidia-imageinary[\'mxnet\']".')
321 |     image_files = []
322 |     source_path = os.path.abspath(source_path)
323 |     dest_path = os.path.abspath(dest_path)
324 |     _check_directory_exists(source_path)
325 |     _try_create_directory(dest_path)
326 | 
327 |     _print_image_information(source_path)
328 | 
329 |     for image_name in os.listdir(source_path):
330 |         if not os.path.isdir(os.path.join(source_path, image_name)):
331 |             image_files.append(image_name)
332 | 
333 |     num_of_records = ceil(len(image_files) / img_per_file)
334 |     pool = Pool()
335 |     try:
336 |         start_time = perf_counter()
337 |         pool.starmap(_recordio_creation,
338 |                      _record_slice(source_path,
339 |                                    dest_path,
340 |                                    name,
341 |                                    image_files,
342 |                                    img_per_file,
343 |                                    num_of_records))
344 |     finally:
345 |         pool.close()
346 |         pool.join()
347 | 
348 |     stop_time = perf_counter()
349 |     print('Completed in {} seconds'.format(stop_time-start_time))
350 | 
351 | 
352 | def create_tfrecords(
353 |     source_path: str,
354 |     dest_path: str,
355 |     name: str,
356 |     img_per_file: int
357 | ) -> NoReturn:
358 |     """
359 |     Create TFRecords based on standard images.
360 | 
361 |     Generate one or multiple TFRecords based on standard input images. Records
362 |     are created by specifying an input path containing standard image files in
363 |     JPG, PNG, or BMP format, an output directory to save the images to, a name
364 |     to prepend the records with, and the number of record files to generate.
365 |     Each record file contains N images where N is the total number of images in
366 |     the input directory divided by the number of images per record file. Images
367 |     are pulled sequentially from the input directory and placed into each
368 |     record.
369 | 
370 |     Parameters
371 |     ----------
372 |     source_path : string
373 |         Path to the directory where the input images are stored.
374 |     dest_path : string
375 |         Path to the directory where the record files should be saved. Will be
376 |         created if it does not exist.
377 |     name : string
378 |         A ``string`` to prepend to all filenames, such as `random_record_`.
379 |         Filenames will end with a counter starting at zero.
380 |     images_per_file : int
381 |         The number of images to include per record file.
382 |     """
383 |     print('Creating TFRecord files at {} from {} targeting {} files per '
384 |           'TFRecord with a base filename of {}'.format(dest_path,
385 |                                                        source_path,
386 |                                                        img_per_file,
387 |                                                        name))
388 |     if not TFRecordWriter:
389 |         raise ImportError('TensorFlow not found! Please install TensorFlow '
390 |                           'dependency using "pip install '
391 |                           'nvidia-imageinary[\'tfrecord\']".')
392 |     _check_directory_exists(source_path)
393 |     _try_create_directory(dest_path)
394 |     combined_path = os.path.join(dest_path, name)
395 | 
396 |     _print_image_information(source_path)
397 | 
398 |     image_count = 0
399 |     record = 0
400 | 
401 |     start_time = perf_counter()
402 |     writer = TFRecordWriter(combined_path + str(record))
403 |     for image_name in os.listdir(source_path):
404 |         image_path = os.path.join(source_path, image_name)
405 |         if os.path.isdir(image_path):
406 |             continue
407 |         image_count += 1
408 |         if image_count > img_per_file:
409 |             image_count = 1
410 |             writer.close()
411 |             record += 1
412 |             writer = TFRecordWriter(combined_path + str(record))
413 | 
414 |         with open(image_path, 'rb') as image_file:
415 |             image = image_file.read()
416 |         feature = {
417 |             'image/encoded': Feature(bytes_list=BytesList(value=[image])),
418 |             'image/class/label': Feature(int64_list=Int64List(value=[0]))
419 |         }
420 | 
421 |         tfrecord_entry = Example(features=Features(feature=feature))
422 |         writer.write(tfrecord_entry.SerializeToString())
423 | 
424 |     writer.close()
425 |     stop_time = perf_counter()
426 | 
427 |     print('Completed in {} seconds'.format(stop_time-start_time))
428 | 
429 | 
430 | def _print_image_information(path: str) -> NoReturn:
431 |     """
432 |     Print the image and directory size.
433 | 
434 |     Print the size of the first image in the directory, which is assumed to be
435 |     a good approximator for the average image size of all images in the
436 |     directory, as well as the total size of the directory, in bytes.
437 | 
438 |     Parameters
439 |     ----------
440 |     path : string
441 |         The path to the directory where generated images are stored.
442 |     """
443 |     is_first_image = True
444 |     first_image_size = 0
445 |     directory_size = 0
446 |     for image_name in os.listdir(path):
447 |         image_path = os.path.join(path, image_name)
448 |         if os.path.isdir(image_path):
449 |             continue
450 |         directory_size += os.path.getsize(image_path)
451 |         if is_first_image:
452 |             first_image_size = directory_size
453 |             is_first_image = False
454 |     print('First image size from {}, in bytes: {}'.format(path,
455 |                                                           first_image_size))
456 |     print('Directory {} size, in bytes: {}'.format(path, directory_size))
457 | 
458 | 
459 | def _recordio_creation(
460 |     source_path: str,
461 |     dest_path: str,
462 |     name: str,
463 |     image_files: List[str],
464 |     n: int
465 | ) -> NoReturn:
466 |     """
467 |     Create a RecordIO file based on input images.
468 | 
469 |     Given a subset of images, a RecordIO file should be created with a
470 |     corresponding index file with the given name and counter.
471 | 
472 |     Parameters
473 |     ----------
474 |     source_path : string
475 |         Path to the directory where the input images are stored.
476 |     dest_path : string
477 |         Path to the directory where the record files should be saved. Will be
478 |         created if it does not exist.
479 |     name : string
480 |         A ``string`` to prepend the record filename with.
481 |     image_files : list
482 |         A ``list`` of ``strings`` of image filenames to be used for the record
483 |         creation.
484 |     n : int
485 |         An ``integer`` of the current count the record file points to, starting
486 |         at zero.
487 |     """
488 |     combined_path = os.path.join(dest_path, name)
489 |     regex = re.compile(r'\d+')
490 |     dataset_rec = combined_path + str(n) + '.rec'
491 |     dataset_idx = combined_path + str(n) + '.idx'
492 |     recordio_ds = MXIndexedRecordIO(os.path.join(dest_path, dataset_idx),
493 |                                     os.path.join(dest_path, dataset_rec),
494 |                                     'w')
495 | 
496 |     for image_name in image_files:
497 |         image_path = os.path.join(source_path, image_name)
498 |         image_index = int(regex.findall(image_name)[0])
499 |         header = IRHeader(0, 0, image_index, 0)
500 |         image = open(image_path, "rb").read()
501 |         packed_image = pack(header, image)
502 |         recordio_ds.write_idx(image_index, packed_image)
503 | 
504 |     recordio_ds.close()
505 | 
506 | 
507 | def _image_creation(
508 |     combined_path: str,
509 |     width: int,
510 |     height: int,
511 |     seed: int,
512 |     image_format: str,
513 |     n: int
514 | ) -> NoReturn:
515 |     """
516 |     Generate a random image.
517 | 
518 |     Given a name, dimensions, a seed, and an image format, a random image is
519 |     generated by creating a numpy array of random data for the specified
520 |     dimensions and three color channels, then converting the array to an image
521 |     of the specified format and saving the result to the output directory with
522 |     the requested name postfixed with with the zero-based image counter and the
523 |     file extension.
524 | 
525 |     Parameters
526 |     ----------
527 |     combined_path : string
528 |         The full path to the output image file including the requested name as
529 |         a prefix for the filename.
530 |     width : int
531 |         The width of the image to generate in pixels.
532 |     height : int
533 |         The height of the image to generate in pixels.
534 |     image_format : str
535 |         The format the images should be saved as.
536 |     n : int
537 |         The zero-based counter for the image.
538 |     """
539 |     numpy.random.seed(seed + n)
540 |     a = numpy.random.rand(height, width, 3) * 255
541 |     file_ext = SUPPORTED_IMAGE_FORMATS.get(image_format.lower(), 'png')
542 |     if file_ext == "jpg":
543 |         im_out = Image.fromarray(a.astype('uint8')).convert('RGB')
544 |     else:
545 |         im_out = Image.fromarray(a.astype('uint8')).convert('RGBA')
546 | 
547 |     im_out.save('%s%d.%s' % (combined_path, n, file_ext))
548 | 
549 | 
550 | def _main() -> NoReturn:
551 |     """
552 |     Randomly generate images or record files.
553 | 
554 |     Create standard images or record files using randomized data to be ingested
555 |     into a deep learning application.
556 |     """
557 |     args = _parse_args()
558 |     if args.command == STANDARD_IMAGE:
559 |         create_images(args.path, args.name, args.width, args.height,
560 |                       args.count, args.image_format, args.seed, args.size)
561 |     elif args.command == TFRECORD:
562 |         create_tfrecords(args.source_path, args.dest_path, args.name,
563 |                          args.img_per_file)
564 |     elif args.command == RECORDIO:
565 |         create_recordio(args.source_path, args.dest_path, args.name,
566 |                         args.img_per_file)
567 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.10.0
 2 | astunparse==1.6.3
 3 | attrs==20.2.0
 4 | cachetools==4.1.0
 5 | certifi==2020.4.5.1
 6 | chardet==3.0.4
 7 | coverage==5.3
 8 | gast==0.4.0
 9 | google-auth==1.28.0
10 | google-auth-oauthlib==0.4.1
11 | google-pasta==0.2.0
12 | graphviz==0.8.4
13 | grpcio==1.34.0
14 | h5py==3.1.0
15 | idna==2.9
16 | importlib-metadata==1.6.0
17 | iniconfig==1.1.1
18 | libclang==12.0.0
19 | keras==2.7.0
20 | Keras-Preprocessing==1.1.2
21 | Markdown==3.2.2
22 | mxnet==1.6.0
23 | numpy==1.21.0
24 | oauthlib==3.1.0
25 | opt-einsum==3.3.0
26 | packaging==20.4
27 | Pillow==9.0.0
28 | pluggy==0.13.1
29 | protobuf==3.15.0
30 | py==1.10.0
31 | pyasn1==0.4.8
32 | pyasn1-modules==0.2.8
33 | pycodestyle==2.6.0
34 | pyparsing==2.4.7
35 | pytest==6.1.1
36 | pytest-cov==2.10.1
37 | requests==2.26.0
38 | requests-oauthlib==1.3.0
39 | rsa==4.7
40 | scipy==1.4.1
41 | six==1.15.0
42 | tensorboard==2.7.0
43 | tensorboard-plugin-wit==1.6.0.post3
44 | tensorflow==2.7.0
45 | tensorflow-estimator==2.7.0
46 | tensorflow-io-gcs-filesystem==0.23.1
47 | termcolor==1.1.0
48 | toml==0.10.1
49 | urllib3==1.26.5
50 | Werkzeug==1.0.1
51 | wrapt==1.12.1
52 | zipp==3.1.0
53 | 


--------------------------------------------------------------------------------
/requirements.txt.tf1:
--------------------------------------------------------------------------------
 1 | absl-py==0.9.0
 2 | astor==0.8.1
 3 | attrs==20.2.0
 4 | certifi==2020.4.5.2
 5 | chardet==3.0.4
 6 | coverage==5.3
 7 | gast==0.3.3
 8 | google-pasta==0.2.0
 9 | graphviz==0.8.4
10 | grpcio==1.29.0
11 | h5py==2.10.0
12 | idna==2.9
13 | importlib-metadata==1.6.0
14 | iniconfig==1.1.1
15 | Keras-Applications==1.0.8
16 | Keras-Preprocessing==1.1.2
17 | Markdown==3.2.2
18 | mxnet==1.6.0
19 | numpy==1.18.4
20 | packaging==20.4
21 | Pillow==7.1.2
22 | pluggy==0.13.1
23 | protobuf==3.12.2
24 | py==1.9.0
25 | pycodestyle==2.6.0
26 | pyparsing==2.4.7
27 | pytest==6.1.2
28 | pytest-cov==2.10.1
29 | requests==2.23.0
30 | six==1.15.0
31 | tensorboard==1.14.0
32 | tensorflow==1.14.0
33 | tensorflow-estimator==1.14.0
34 | termcolor==1.1.0
35 | toml==0.10.1
36 | urllib3==1.25.9
37 | Werkzeug==1.0.1
38 | wrapt==1.12.1
39 | zipp==3.1.0
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | with open('README.md', 'r') as f:
 4 |     long_description = f.read()
 5 | 
 6 | with open('VERSION', 'r') as f:
 7 |     version = f.read().strip()
 8 | 
 9 | extras = {
10 |     'tfrecord': ['tensorflow >= 1.14.0,!=2.0.x,!=2.1.x,!=2.2.0,!=2.4.0'],
11 |     'mxnet': ['mxnet >= 1.6.0,!=1.8.0']
12 | }
13 | 
14 | extras['all'] = [item for group in extras.values() for item in group]
15 | 
16 | setup(
17 |     name='nvidia-imageinary',
18 |     author='NVIDIA Corporation',
19 |     author_email='roclark@nvidia.com',
20 |     version=version,
21 |     description='A tool to randomly generate image datasets of various resolutions',
22 |     long_description=long_description,
23 |     packages=find_packages(include=['imagine'], exclude=['tests']),
24 |     license='Apache 2.0',
25 |     python_requires='>=3.7',
26 |     entry_points={
27 |         'console_scripts': ['imagine=imagine:_main']
28 |     },
29 |     install_requires=[
30 |         'numpy >= 1.18.0',
31 |         'Pillow >= 7.1.2'
32 |     ],
33 |     extras_require=extras
34 | )
35 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/Imageinary/aa44843e7671c0ebac032e39ef66508d3fa9b55c/tests/__init__.py


--------------------------------------------------------------------------------
/tests/functional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/Imageinary/aa44843e7671c0ebac032e39ef66508d3fa9b55c/tests/functional/__init__.py


--------------------------------------------------------------------------------
/tests/functional/test_jpg_creation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import pytest
15 | import re
16 | import os
17 | from glob import glob
18 | from imagine import create_images
19 | from PIL import Image
20 | 
21 | 
22 | class TestJPGCreation:
23 |     @pytest.fixture(autouse=True)
24 |     def setup(self, tmpdir):
25 |         self.tmpdir = tmpdir.mkdir('jpg_files')
26 | 
27 |     def teardown_method(self):
28 |         for image in glob(f'{str(self.tmpdir)}/*'):
29 |             os.remove(image)
30 |         os.rmdir(str(self.tmpdir))
31 | 
32 |     def test_creating_one_hundred_images(self):
33 |         create_images(
34 |             str(self.tmpdir),
35 |             'tmp_',
36 |             1920,
37 |             1080,
38 |             100,
39 |             'jpg',
40 |             0,
41 |             False
42 |         )
43 | 
44 |         images = glob(f'{str(self.tmpdir)}/*')
45 | 
46 |         assert len(images) == 100
47 |         for image in images:
48 |             assert re.search(r'tmp_\d+.jpg', image)
49 |             with Image.open(image) as im:
50 |                 assert im.size == (1920, 1080)
51 | 
52 |     def test_creating_one_hundred_4K_images(self):
53 |         create_images(
54 |             str(self.tmpdir),
55 |             'tmp_',
56 |             3840,
57 |             2160,
58 |             100,
59 |             'jpg',
60 |             0,
61 |             False
62 |         )
63 | 
64 |         images = glob(f'{str(self.tmpdir)}/*')
65 | 
66 |         assert len(images) == 100
67 |         for image in images:
68 |             assert re.search(r'tmp_\d+.jpg', image)
69 |             with Image.open(image) as im:
70 |                 assert im.size == (3840, 2160)
71 | 


--------------------------------------------------------------------------------
/tests/functional/test_png_creation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import pytest
15 | import re
16 | import os
17 | from glob import glob
18 | from imagine import create_images
19 | from PIL import Image
20 | 
21 | 
22 | class TestPNGCreation:
23 |     @pytest.fixture(autouse=True)
24 |     def setup(self, tmpdir):
25 |         self.tmpdir = tmpdir.mkdir('png_files')
26 | 
27 |     def teardown_method(self):
28 |         for image in glob(f'{str(self.tmpdir)}/*'):
29 |             os.remove(image)
30 |         os.rmdir(str(self.tmpdir))
31 | 
32 |     def test_creating_one_hundred_images(self):
33 |         create_images(
34 |             str(self.tmpdir),
35 |             'tmp_',
36 |             1920,
37 |             1080,
38 |             100,
39 |             'png',
40 |             0,
41 |             False
42 |         )
43 | 
44 |         images = glob(f'{str(self.tmpdir)}/*')
45 | 
46 |         assert len(images) == 100
47 |         for image in images:
48 |             assert re.search(r'tmp_\d+.png', image)
49 |             with Image.open(image) as im:
50 |                 assert im.size == (1920, 1080)
51 | 
52 |     def test_creating_one_hundred_4K_images(self):
53 |         create_images(
54 |             str(self.tmpdir),
55 |             'tmp_',
56 |             3840,
57 |             2160,
58 |             100,
59 |             'png',
60 |             0,
61 |             False
62 |         )
63 | 
64 |         images = glob(f'{str(self.tmpdir)}/*')
65 | 
66 |         assert len(images) == 100
67 |         for image in images:
68 |             assert re.search(r'tmp_\d+.png', image)
69 |             with Image.open(image) as im:
70 |                 assert im.size == (3840, 2160)
71 | 


--------------------------------------------------------------------------------
/tests/functional/test_recordio.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import pytest
 15 | import re
 16 | import os
 17 | from glob import glob
 18 | from imagine import create_images, create_recordio
 19 | from PIL import Image
 20 | 
 21 | 
 22 | class TestRecordIOCreation:
 23 |     @pytest.fixture(autouse=True)
 24 |     def setup(self, tmpdir):
 25 |         self.tmpdir = tmpdir.mkdir('input_files')
 26 |         self.outdir = tmpdir.mkdir('output_files')
 27 | 
 28 |     def teardown_method(self):
 29 |         for image in glob(f'{str(self.tmpdir)}/*'):
 30 |             os.remove(image)
 31 |         for record in glob(f'{str(self.outdir)}/*'):
 32 |             os.remove(record)
 33 |         os.rmdir(str(self.tmpdir))
 34 |         os.rmdir(str(self.outdir))
 35 | 
 36 |     def test_creating_recordio_from_100_jpgs(self):
 37 |         # Create sample images which will be used as a basis.
 38 |         create_images(
 39 |             str(self.tmpdir),
 40 |             'tmp_',
 41 |             1920,
 42 |             1080,
 43 |             100,
 44 |             'jpg',
 45 |             0,
 46 |             False
 47 |         )
 48 |         create_recordio(
 49 |             str(self.tmpdir),
 50 |             str(self.outdir),
 51 |             'tmprecord_',
 52 |             100
 53 |         )
 54 | 
 55 |         records = glob(f'{str(self.outdir)}/*')
 56 | 
 57 |         assert len(records) == 2
 58 |         for record in records:
 59 |             assert 'tmprecord_0.idx' in record or \
 60 |                 'tmprecord_0.rec' in record
 61 | 
 62 |     def test_creating_recordio_from_100_pngs(self):
 63 |         # Create sample images which will be used as a basis.
 64 |         create_images(
 65 |             str(self.tmpdir),
 66 |             'tmp_',
 67 |             1920,
 68 |             1080,
 69 |             100,
 70 |             'png',
 71 |             0,
 72 |             False
 73 |         )
 74 |         create_recordio(
 75 |             str(self.tmpdir),
 76 |             str(self.outdir),
 77 |             'tmprecord_',
 78 |             100
 79 |         )
 80 | 
 81 |         records = glob(f'{str(self.outdir)}/*')
 82 | 
 83 |         assert len(records) == 2
 84 |         for record in records:
 85 |             assert 'tmprecord_0.idx' in record or \
 86 |                 'tmprecord_0.rec' in record
 87 | 
 88 |     def test_creating_recordio_from_100_jpg_multiple_files(self):
 89 |         # Create sample images which will be used as a basis.
 90 |         create_images(
 91 |             str(self.tmpdir),
 92 |             'tmp_',
 93 |             1920,
 94 |             1080,
 95 |             100,
 96 |             'jpg',
 97 |             0,
 98 |             False
 99 |         )
100 |         create_recordio(
101 |             str(self.tmpdir),
102 |             str(self.outdir),
103 |             'tmprecord_',
104 |             10
105 |         )
106 | 
107 |         records = glob(f'{str(self.outdir)}/*')
108 | 
109 |         assert len(records) == 20
110 |         for record in records:
111 |             assert re.search(r'tmprecord_\d+.idx', record) or \
112 |                 re.search(r'tmprecord_\d+.rec', record)
113 | 
114 |     def test_creating_recordio_from_100_pngs_multiple_files(self):
115 |         # Create sample images which will be used as a basis.
116 |         create_images(
117 |             str(self.tmpdir),
118 |             'tmp_',
119 |             1920,
120 |             1080,
121 |             100,
122 |             'png',
123 |             0,
124 |             False
125 |         )
126 |         create_recordio(
127 |             str(self.tmpdir),
128 |             str(self.outdir),
129 |             'tmprecord_',
130 |             10
131 |         )
132 | 
133 |         records = glob(f'{str(self.outdir)}/*')
134 | 
135 |         assert len(records) == 20
136 |         for record in records:
137 |             assert re.search(r'tmprecord_\d+.idx', record) or \
138 |                 re.search(r'tmprecord_\d+.rec', record)
139 | 


--------------------------------------------------------------------------------
/tests/functional/test_tfrecord.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import pytest
 15 | import re
 16 | import os
 17 | from glob import glob
 18 | from imagine import create_images, create_tfrecords
 19 | from PIL import Image
 20 | 
 21 | 
 22 | class TestTFRecordCreation:
 23 |     @pytest.fixture(autouse=True)
 24 |     def setup(self, tmpdir):
 25 |         self.tmpdir = tmpdir.mkdir('input_files')
 26 |         self.outdir = tmpdir.mkdir('output_files')
 27 | 
 28 |     def teardown_method(self):
 29 |         for image in glob(f'{str(self.tmpdir)}/*'):
 30 |             os.remove(image)
 31 |         for record in glob(f'{str(self.outdir)}/*'):
 32 |             os.remove(record)
 33 |         os.rmdir(str(self.tmpdir))
 34 |         os.rmdir(str(self.outdir))
 35 | 
 36 |     def test_creating_tfrecord_from_100_jpgs(self):
 37 |         # Create sample images which will be used as a basis.
 38 |         create_images(
 39 |             str(self.tmpdir),
 40 |             'tmp_',
 41 |             1920,
 42 |             1080,
 43 |             100,
 44 |             'jpg',
 45 |             0,
 46 |             False
 47 |         )
 48 |         create_tfrecords(
 49 |             str(self.tmpdir),
 50 |             str(self.outdir),
 51 |             'tmprecord_',
 52 |             100
 53 |         )
 54 | 
 55 |         records = glob(f'{str(self.outdir)}/*')
 56 | 
 57 |         assert len(records) == 1
 58 |         assert 'tmprecord_0' in records[0]
 59 | 
 60 |     def test_creating_tfrecord_from_100_pngs(self):
 61 |         # Create sample images which will be used as a basis.
 62 |         create_images(
 63 |             str(self.tmpdir),
 64 |             'tmp_',
 65 |             1920,
 66 |             1080,
 67 |             100,
 68 |             'png',
 69 |             0,
 70 |             False
 71 |         )
 72 |         create_tfrecords(
 73 |             str(self.tmpdir),
 74 |             str(self.outdir),
 75 |             'tmprecord_',
 76 |             100
 77 |         )
 78 | 
 79 |         records = glob(f'{str(self.outdir)}/*')
 80 | 
 81 |         assert len(records) == 1
 82 |         assert 'tmprecord_0' in records[0]
 83 | 
 84 |     def test_creating_tfrecord_from_100_jpg_multiple_files(self):
 85 |         # Create sample images which will be used as a basis.
 86 |         create_images(
 87 |             str(self.tmpdir),
 88 |             'tmp_',
 89 |             1920,
 90 |             1080,
 91 |             100,
 92 |             'jpg',
 93 |             0,
 94 |             False
 95 |         )
 96 |         create_tfrecords(
 97 |             str(self.tmpdir),
 98 |             str(self.outdir),
 99 |             'tmprecord_',
100 |             10
101 |         )
102 | 
103 |         records = glob(f'{str(self.outdir)}/*')
104 | 
105 |         assert len(records) == 10
106 |         for record in records:
107 |             assert re.search(r'tmprecord_\d+', record)
108 | 
109 |     def test_creating_tfrecord_from_100_pngs_multiple_files(self):
110 |         # Create sample images which will be used as a basis.
111 |         create_images(
112 |             str(self.tmpdir),
113 |             'tmp_',
114 |             1920,
115 |             1080,
116 |             100,
117 |             'png',
118 |             0,
119 |             False
120 |         )
121 |         create_tfrecords(
122 |             str(self.tmpdir),
123 |             str(self.outdir),
124 |             'tmprecord_',
125 |             10
126 |         )
127 | 
128 |         records = glob(f'{str(self.outdir)}/*')
129 | 
130 |         assert len(records) == 10
131 |         for record in records:
132 |             assert re.search(r'tmprecord_\d+', record)
133 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/Imageinary/aa44843e7671c0ebac032e39ef66508d3fa9b55c/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_units.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import pytest
15 | import os
16 | from imagine import imagine
17 | 
18 | 
19 | class TestUnits:
20 |     @pytest.fixture(autouse=True)
21 |     def setup(self, tmpdir):
22 |         self.tmpdir = tmpdir
23 | 
24 |     def teardown_method(self):
25 |         try:
26 |             os.rmdir(str(self.tmpdir))
27 |         except OSError:
28 |             # The directory wasn't created, as expected
29 |             pass
30 | 
31 |     def test_directory_creation_if_not_exist(self):
32 |         imagine._try_create_directory(str(self.tmpdir))
33 | 
34 |     def test_error_input_directory_doesnt_exist(self):
35 |         with pytest.raises(RuntimeError):
36 |             imagine._check_directory_exists(os.path.join(str(self.tmpdir),
37 |                                                          'dne'))
38 | 
39 |     def test_record_slice_yields_expected_results(self):
40 |         slices = [range(x, x + 100) for x in range(0, 1000, 100)]
41 |         results = imagine._record_slice(self.tmpdir,
42 |                                         self.tmpdir,
43 |                                         'test_record_',
44 |                                         range(0, 1000),
45 |                                         100,
46 |                                         10)
47 | 
48 |         for count, result in enumerate(results):
49 |             source, dest, name, images, num = result
50 |             assert source == self.tmpdir
51 |             assert dest == self.tmpdir
52 |             assert name == 'test_record_'
53 |             assert images == slices[count]
54 |             assert num == count
55 |         # Enumerate is 0-based, so the final number will be 9 for 10 records
56 |         assert count == 10 - 1
57 | 


--------------------------------------------------------------------------------