├── .gitattributes
├── .github
    └── workflows
    │   ├── PyPI.yml
    │   └── package-tests.yml
├── .gitignore
├── .travis.yml
├── Augmentor
    ├── ImageSource.py
    ├── ImageUtilities.py
    ├── Operations.py
    ├── Pipeline.py
    └── __init__.py
├── CONTRIBUTING.md
├── DESCRIPTION.rst
├── LICENSE.md
├── MANIFEST.in
├── README.md
├── binder
    ├── environment.yml
    └── postBuild
├── docs
    ├── Makefile
    ├── code.bak
    ├── code.rst
    ├── conf.py
    ├── index.rst
    ├── joss
    │   ├── paper.bib
    │   └── paper.md
    ├── licence.rst
    ├── logo.ico
    ├── logo.png
    ├── make.bat
    └── userguide
    │   ├── examples.rst
    │   ├── extend.rst
    │   ├── install.rst
    │   ├── mainfeatures.rst
    │   └── usage.rst
├── notebooks
    ├── Augmentor_Keras.ipynb
    ├── Augmentor_Keras_Array_Data.ipynb
    ├── Augmentor_Keras_DataFrame.ipynb
    ├── Multiple-Mask-Augmentation.ipynb
    └── Per_Class_Augmentation_Strategy.ipynb
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── test_array_fuctions.py
    ├── test_custom_operations.py
    ├── test_datapipeline.py
    ├── test_distortion.py
    ├── test_gaussian.py
    ├── test_generators.py
    ├── test_ground_truth_augmentation.py
    ├── test_ground_truth_by_class.py
    ├── test_load.py
    ├── test_multi_threading.py
    ├── test_pipeline_add_operations.py
    ├── test_pipeline_utilities.py
    ├── test_random_color_brightness_contrast.py
    ├── test_resize.py
    ├── test_rotate.py
    ├── test_torch_transform.py
    ├── test_user_operation_parameter_input.py
    └── util_funcs.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-detectable=false
2 | 


--------------------------------------------------------------------------------
/.github/workflows/PyPI.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install the latest version of Augmentor using pip
 2 | 
 3 | name: PyPI Install
 4 | 
 5 | on:
 6 |   workflow_dispatch:
 7 |   release:
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         os: [ubuntu-latest, windows-latest, macos-latest]
16 |         python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9]
17 | 
18 |     steps:
19 |     # - uses: actions/checkout@v2
20 |     - name: Set up Python ${{ matrix.python-version }}
21 |       uses: actions/setup-python@v2
22 |       with:
23 |         python-version: ${{ matrix.python-version }}
24 |     - name: Install Augmentor
25 |       run: |
26 |         python -m pip install --upgrade pip
27 |         pip install Augmentor
28 |     - name: Test that Augmentor can be imported and exit
29 |       run: |
30 |         python -c "import Augmentor"
31 | 


--------------------------------------------------------------------------------
/.github/workflows/package-tests.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint
 2 | 
 3 | name: Pytest
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ master ]
 8 |   pull_request:
 9 |     branches: [ master ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |       runs-on: ${{ matrix.os }}
15 |       strategy:
16 |         matrix:
17 |           os: [windows-latest, ubuntu-latest, macos-latest]  # windows-latest 
18 |           python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9]
19 | 
20 |       steps:
21 |       - uses: actions/checkout@v2
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@v2
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 |       - name: Install dependencies
27 |         run: |
28 |           python -m pip install --upgrade pip
29 |           pip install flake8 pytest
30 |           pip install -r requirements.txt
31 |           # if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |       - name: Lint with flake8
33 |         run: |
34 |           # stop the build if there are Python syntax errors or undefined names
35 |           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |       - name: Test with pytest
39 |         run: |
40 |           pytest -v
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # OS generated files #
 2 | ######################
 3 | .DS_Store
 4 | .DS_Store?
 5 | ._*
 6 | .Spotlight-V100
 7 | .Trashes
 8 | ehthumbs.db
 9 | Thumbs.db
10 | # My additions
11 | *.pyc
12 | *.egg
13 | *.egg-info
14 | .idea/
15 | dist/
16 | data/
17 | build/
18 | back/
19 | .cache/
20 | .pytest_cache/
21 | .tox/
22 | _build/
23 | _static/
24 | _templates/
25 | __pycache__/
26 | .ipynb_checkpoints/
27 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | os:
 3 |   - linux
 4 | python:
 5 |   - "2.7"
 6 |   - "3.5"
 7 |   - "3.6"
 8 |   - "3.7"
 9 | # command to install dependencies
10 | install:
11 |   # Install conda.
12 |   - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
13 |       wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
14 |     else
15 |       wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
16 |     fi
17 |   - bash miniconda.sh -b -p $HOME/miniconda
18 |   - export OPATH="$PATH" && export PATH="$HOME/miniconda/bin:$PATH"
19 |   - conda create -q -y -n test-environment python=$TRAVIS_PYTHON_VERSION pytest
20 |   - source activate test-environment
21 |   # Attempt to install torchvision; on failure, revert back to pre-conda environment.
22 |   - conda install -q -y torchvision -c soumith || export PATH="$OPATH"
23 |   # Install pandas
24 |   - conda install -q -y pandas
25 |   - pip install -r requirements.txt
26 | # command to run tests
27 | script: py.test -v
28 | 


--------------------------------------------------------------------------------
/Augmentor/ImageSource.py:
--------------------------------------------------------------------------------
 1 | from __future__ import (absolute_import, division,
 2 |                         print_function, unicode_literals)
 3 | from builtins import *
 4 | 
 5 | import os
 6 | import glob
 7 | 
 8 | 
 9 | class ImageSource(object):
10 |     """
11 |     The ImageSource class is used to search for and contain paths to images for augmentation.
12 |     """
13 |     def __init__(self, source_directory, recursive_scan=False):
14 |         source_directory = os.path.abspath(source_directory)
15 |         self.image_list = self.scan_directory(source_directory, recursive_scan)
16 | 
17 |         self.largest_file_dimensions = (800, 600)
18 | 
19 |     def scan_directory(self, source_directory, recusrive_scan=False):
20 |         # TODO: Make this a static member somewhere later
21 |         file_types = ['*.jpg', '*.bmp', '*.jpeg', '*.gif', '*.img', '*.png']
22 |         file_types.extend([str.upper(x) for x in file_types])
23 | 
24 |         list_of_files = []
25 | 
26 |         for file_type in file_types:
27 |             list_of_files.extend(glob.glob(os.path.join(os.path.abspath(source_directory), file_type)))
28 | 
29 |         return list_of_files
30 | 


--------------------------------------------------------------------------------
/Augmentor/ImageUtilities.py:
--------------------------------------------------------------------------------
  1 | # ImageUtilities.py
  2 | # Author: Marcus D. Bloice <https://github.com/mdbloice> and contributors
  3 | # Licensed under the terms of the MIT Licence.
  4 | """
  5 | The ImageUtilities module provides a number of helper functions, as well as
  6 | the main :class:`~Augmentor.ImageUtilities.AugmentorImage` class, that is used
  7 | throughout the package as a container class for images to be augmented.
  8 | """
  9 | from __future__ import (absolute_import, division,
 10 |                         print_function, unicode_literals)
 11 | from builtins import *
 12 | 
 13 | import os
 14 | import glob
 15 | import numbers
 16 | import random
 17 | import warnings
 18 | import numpy as np
 19 | 
 20 | 
 21 | class AugmentorImage(object):
 22 |     """
 23 |     Wrapper class containing paths to images, as well as a number of other
 24 |     parameters, that are used by the Pipeline and Operation modules to perform
 25 |     augmentation.
 26 | 
 27 |     Each image that is found by Augmentor during the initialisation of a
 28 |     Pipeline object is contained with a new AugmentorImage object.
 29 |     """
 30 |     def __init__(self,
 31 |                  image_path,
 32 |                  output_directory,
 33 |                  pil_images=None,
 34 |                  array_images=None,
 35 |                  path_images=None,
 36 |                  class_label_int=None):
 37 |         """
 38 |         To initialise an AugmentorImage object for any image, the image's
 39 |         file path is required, as well as that image's output directory,
 40 |         which defines where any augmented images are stored.
 41 | 
 42 |         :param image_path: The full path to an image.
 43 |         :param output_directory: The directory where augmented images for this
 44 |          image should be saved.
 45 |         """
 46 | 
 47 |         # Could really think about initialising AugmentorImage member
 48 |         # variables here and and only here during init. Then remove all
 49 |         # setters below so that they cannot be altered later.
 50 | 
 51 |         # Call the setters from parameters that are required.
 52 |         self._image_path = image_path
 53 |         self._output_directory = output_directory
 54 | 
 55 |         self._ground_truth = None
 56 | 
 57 |         self._image_paths = None
 58 |         self._image_arrays = None
 59 |         self._pil_images = None
 60 | 
 61 |         self._file_format = None
 62 |         self._class_label = None
 63 |         self._class_label_int = None
 64 |         self._label = None
 65 |         self._label_pair = None
 66 |         self._categorical_label = None
 67 | 
 68 |         if pil_images is not None:
 69 |             self._pil_images = pil_images
 70 | 
 71 |         if array_images is not None:
 72 |             self._array_images = array_images
 73 | 
 74 |         if path_images is not None:
 75 |             self._path_images = path_images
 76 | 
 77 |         if class_label_int is not None:
 78 |             self._class_label_int = class_label_int
 79 | 
 80 |     def __str__(self):
 81 |         return """
 82 |         Image path: %s
 83 |         Ground truth path: %s
 84 |         File format (inferred from extension): %s
 85 |         Class label: %s
 86 |         Numerical class label (auto assigned): %s
 87 |         """ % (self._image_path, self._ground_truth, self._file_format, self._class_label, self._class_label_int)
 88 | 
 89 |     @property
 90 |     def pil_images(self):
 91 |         return self._pil_images
 92 | 
 93 |     @pil_images.setter
 94 |     def pil_images(self, value):
 95 |         self._pil_images = value
 96 | 
 97 |     @property
 98 |     def image_arrays(self):
 99 |         return self._image_arrays
100 | 
101 |     @image_arrays.setter
102 |     def image_arrays(self, value):
103 |         self._image_arrays = value
104 | 
105 |     @property
106 |     def class_label_int(self):
107 |         return self._class_label_int
108 | 
109 |     @class_label_int.setter
110 |     def class_label_int(self, value):
111 |         self._class_label_int = value
112 | 
113 |     @property
114 |     def output_directory(self):
115 |         """
116 |         The :attr:`output_directory` property contains a path to the directory
117 |         to which augmented images will be saved for this instance.
118 | 
119 |         :getter: Returns this image's output directory.
120 |         :setter: Sets this image's output directory.
121 |         :type: String
122 |         """
123 |         return self._output_directory
124 | 
125 |     @output_directory.setter
126 |     def output_directory(self, value):
127 |         self._output_directory = value
128 | 
129 |     @property
130 |     def image_path(self):
131 |         """
132 |         The :attr:`image_path` property contains the absolute file path to the
133 |         image.
134 | 
135 |         :getter: Returns this image's image path.
136 |         :setter: Sets this image's image path
137 |         :type: String
138 |         """
139 |         return self._image_path
140 | 
141 |     @image_path.setter
142 |     def image_path(self, value):
143 |         self._image_path = value
144 | 
145 |     @property
146 |     def pil_images(self):
147 |         return self._pil_images
148 | 
149 |     @property
150 |     def image_file_name(self):
151 |         """
152 |         The :attr:`image_file_name` property contains the **file name** of the
153 |         image contained in this instance. **There is no setter for this
154 |         property.**
155 | 
156 |         :getter: Returns this image's file name.
157 |         :type: String
158 |         """
159 |         return os.path.basename(self._image_path)
160 | 
161 |     @property
162 |     def class_label(self):
163 |         return self._class_label
164 | 
165 |     @class_label.setter
166 |     def class_label(self, value):
167 |         self._class_label = value
168 | 
169 |     @property
170 |     def label(self):
171 |         return self._label
172 | 
173 |     @label.setter
174 |     def label(self, value):
175 |         self._label = value
176 | 
177 |     @property
178 |     def categorical_label(self):
179 |         return self._categorical_label
180 | 
181 |     @categorical_label.setter
182 |     def categorical_label(self, value):
183 |         self._categorical_label = value
184 | 
185 |     @property
186 |     def ground_truth(self):
187 |         """
188 |         The :attr:`ground_truth` property contains an absolute path to the
189 |         ground truth file for an image.
190 | 
191 |         :getter: Returns this image's ground truth file path.
192 |         :setter: Sets this image's ground truth file path.
193 |         :type: String
194 |         """
195 |         return self._ground_truth
196 | 
197 |     @ground_truth.setter
198 |     def ground_truth(self, value):
199 |         if os.path.isfile(value):
200 |             self._ground_truth = value
201 | 
202 |     @property
203 |     def label_pair(self):
204 |         return self._class_label_int, self._class_label
205 | 
206 |     @property
207 |     def file_format(self):
208 |         return self._file_format
209 | 
210 |     @file_format.setter
211 |     def file_format(self, value):
212 |         self._file_format = value
213 | 
214 | 
215 | def parse_user_parameter(user_param):
216 | 
217 |     if isinstance(user_param, numbers.Real):
218 |         return user_param
219 |     elif isinstance(user_param, tuple):
220 |         return random.sample(user_param, 1)[0]
221 |     elif isinstance(user_param, list):
222 |         return random.choice(np.arange(*user_param))
223 | 
224 | 
225 | def extract_paths_and_extensions(image_path):
226 |     """
227 |     Extract an image's file name, its extension, and its root path (the
228 |     image's absolute path without the file name).
229 | 
230 |     :param image_path: The path to the image.
231 |     :type image_path: String
232 |     :return: A 3-tuple containing the image's file name, extension, and
233 |      root path.
234 |     """
235 |     file_name, extension = os.path.splitext(image_path)
236 |     root_path = os.path.dirname(image_path)
237 | 
238 |     return file_name, extension, root_path
239 | 
240 | 
241 | def scan(source_directory, output_directory):
242 | 
243 |     abs_output_directory = os.path.abspath(output_directory)
244 |     files_and_directories = glob.glob(os.path.join(os.path.abspath(source_directory), '*'))
245 | 
246 |     directory_count = 0
247 |     directories = []
248 | 
249 |     class_labels = []
250 | 
251 |     for f in files_and_directories:
252 |         if os.path.isdir(f):
253 |             if f != abs_output_directory:
254 |                 directories.append(f)
255 |                 directory_count += 1
256 | 
257 |     directories = sorted(directories)
258 |     label_counter = 0
259 | 
260 |     if directory_count == 0:
261 | 
262 |         augmentor_images = []
263 |         # This was wrong
264 |         # parent_directory_name = os.path.basename(os.path.abspath(os.path.join(source_directory, os.pardir)))
265 |         parent_directory_name = os.path.basename(os.path.abspath(source_directory))
266 | 
267 |         for image_path in scan_directory(source_directory):
268 |             a = AugmentorImage(image_path=image_path, output_directory=abs_output_directory)
269 |             a.class_label = parent_directory_name
270 |             a.class_label_int = label_counter
271 |             a.categorical_label = [label_counter]
272 |             a.file_format = os.path.splitext(image_path)[1].split(".")[1]
273 |             augmentor_images.append(a)
274 | 
275 |         class_labels.append((label_counter, parent_directory_name))
276 | 
277 |         return augmentor_images, class_labels
278 | 
279 |     elif directory_count != 0:
280 |         augmentor_images = []
281 | 
282 |         for d in directories:
283 |             output_directory = os.path.join(abs_output_directory, os.path.split(d)[1])
284 |             for image_path in scan_directory(d):
285 |                 categorical_label = np.zeros(directory_count, dtype=np.uint32)
286 |                 a = AugmentorImage(image_path=image_path, output_directory=output_directory)
287 |                 a.class_label = os.path.split(d)[1]
288 |                 a.class_label_int = label_counter
289 |                 categorical_label[label_counter] = 1  # Set to 1 with the index of the current class.
290 |                 a.categorical_label = categorical_label
291 |                 a.file_format = os.path.splitext(image_path)[1].split(".")[1]
292 |                 augmentor_images.append(a)
293 |             class_labels.append((os.path.split(d)[1], label_counter))
294 |             label_counter += 1
295 | 
296 |         return augmentor_images, class_labels
297 | 
298 | 
299 | def scan_dataframe(source_dataframe, image_col, category_col, output_directory):
300 |     try:
301 |         import pandas as pd
302 |     except ImportError:
303 |         raise ImportError('Pandas is required to use the scan_dataframe function!\nrun pip install pandas and try again')
304 | 
305 |     # ensure column is categorical
306 |     cat_col_series = pd.Categorical(source_dataframe[category_col])
307 |     abs_output_directory = os.path.abspath(output_directory)
308 |     class_labels = list(enumerate(cat_col_series.categories))
309 | 
310 |     augmentor_images = []
311 | 
312 |     for image_path, cat_name, cat_id in zip(source_dataframe[image_col].values,
313 |                                             cat_col_series.get_values(),
314 |                                             cat_col_series.codes):
315 | 
316 |         a = AugmentorImage(image_path=image_path, output_directory=abs_output_directory)
317 |         a.class_label = cat_name
318 |         a.class_label_int = cat_id
319 |         categorical_label = np.zeros(len(class_labels), dtype=np.uint32)
320 |         categorical_label[cat_id] = 1
321 |         a.categorical_label = categorical_label
322 |         a.file_format = os.path.splitext(image_path)[1].split(".")[1]
323 |         augmentor_images.append(a)
324 | 
325 |     return augmentor_images, class_labels
326 | 
327 | 
328 | def scan_directory(source_directory):
329 |     """
330 |     Scan a directory for images, returning any images found with the
331 |     extensions ``.jpg``, ``.JPG``, ``.jpeg``, ``.JPEG``, ``.gif``, ``.GIF``,
332 |     ``.img``, ``.IMG``, ``.png``, ``.PNG``, ``.tif``, ``.TIF``, ``.tiff``,
333 |     or ``.TIFF``.
334 | 
335 |     :param source_directory: The directory to scan for images.
336 |     :type source_directory: String
337 |     :return: A list of images found in the :attr:`source_directory`
338 |     """
339 |     # TODO: GIFs are highly problematic. It may make sense to drop GIF support.
340 |     file_types = ['*.jpg', '*.bmp', '*.jpeg', '*.gif', '*.img', '*.png', '*.tiff', '*.tif']
341 | 
342 |     list_of_files = []
343 | 
344 |     if os.name == "nt":
345 |         for file_type in file_types:
346 |             list_of_files.extend(glob.glob(os.path.join(os.path.abspath(source_directory), file_type)))
347 |     else:
348 |         file_types.extend([str.upper(str(x)) for x in file_types])
349 |         for file_type in file_types:
350 |             list_of_files.extend(glob.glob(os.path.join(os.path.abspath(source_directory), file_type)))
351 | 
352 |     return list_of_files
353 | 
354 | 
355 | def scan_directory_with_classes(source_directory):
356 |     warnings.warn("The scan_directory_with_classes() function has been deprecated.", DeprecationWarning)
357 |     l = glob.glob(os.path.join(source_directory, '*'))
358 | 
359 |     directories = []
360 | 
361 |     for f in l:
362 |         if os.path.isdir(f):
363 |             directories.append(f)
364 | 
365 |     list_of_files = {}
366 | 
367 |     for d in directories:
368 |         list_of_files_current_folder = scan_directory(d)
369 |         list_of_files[os.path.split(d)[1]] = list_of_files_current_folder
370 | 
371 |     return list_of_files
372 | 


--------------------------------------------------------------------------------
/Augmentor/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The Augmentor image augmentation library.
 3 | 
 4 | Augmentor is a software package for augmenting image data. It provides a number of utilities that aid augmentation \
 5 | in a automated manner. The aim of the package is to make augmentation for machine learning tasks less prone to \
 6 | error, more reproducible, more efficient, and easier to perform.
 7 | 
 8 | .. moduleauthor:: Marcus D. Bloice <marcus.bloice@medunigraz.at>
 9 |    :platform: Windows, Linux, Macintosh
10 |    :synopsis: An image augmentation library for Machine Learning.
11 | 
12 | """
13 | 
14 | from .Pipeline import Pipeline, DataFramePipeline, DataPipeline
15 | 
16 | __author__ = """Marcus D. Bloice"""
17 | __email__ = 'marcus.bloice@medunigraz.at'
18 | __version__ = '0.2.12'
19 | 
20 | __all__ = ['Pipeline', 'DataFramePipeline', 'DataPipeline']
21 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Augmentor
2 | 
3 | Thanks for your interest in Augmentor.
4 | 
5 | For guidelines on how to contribute, either by fixing bugs through pull requests or filing bug reports, see [contribution-guide.org](http://www.contribution-guide.org/)!


--------------------------------------------------------------------------------
/DESCRIPTION.rst:
--------------------------------------------------------------------------------
1 | Augmentor
2 | =========
3 | 
4 | An image augmentation library for machine learning. See `<http://augmentor.readthedocs.io/>`_ for documentation.


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Marcus D. Bloice
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.md
3 | 


--------------------------------------------------------------------------------
/binder/environment.yml:
--------------------------------------------------------------------------------
 1 | name: augmentor_notebooks
 2 | channels:
 3 |   - menpo
 4 |   - soumith
 5 |   - conda-forge
 6 |   - defaults
 7 | dependencies:
 8 |   - python=3.6
 9 |   - numpy
10 |   - matplotlib
11 |   - pandas
12 |   - h5py
13 |   - scikit-image<=0.20
14 |   - scikit-learn<=0.20
15 |   - pytorch<=0.3.0
16 |   - torchvision
17 |   - opencv3>=3.0
18 |   - tensorflow
19 |   - pip:
20 |     - keras==2.1.3
21 | 


--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
1 | pip install .
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help
 18 | help:
 19 | 	@echo "Please use \`make <target>' where <target> is one of"
 20 | 	@echo "  html       to make standalone HTML files"
 21 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 22 | 	@echo "  singlehtml to make a single large HTML file"
 23 | 	@echo "  pickle     to make pickle files"
 24 | 	@echo "  json       to make JSON files"
 25 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 26 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 27 | 	@echo "  applehelp  to make an Apple Help Book"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  epub3      to make an epub3"
 31 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 34 | 	@echo "  text       to make text files"
 35 | 	@echo "  man        to make manual pages"
 36 | 	@echo "  texinfo    to make Texinfo files"
 37 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 38 | 	@echo "  gettext    to make PO message catalogs"
 39 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 40 | 	@echo "  xml        to make Docutils-native XML files"
 41 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 42 | 	@echo "  linkcheck  to check all external links for integrity"
 43 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 44 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 45 | 	@echo "  dummy      to check syntax errors of document sources"
 46 | 
 47 | .PHONY: livehtml 
 48 | livehtml:
 49 | 	sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 50 | 
 51 | .PHONY: clean
 52 | clean:
 53 | 	rm -rf $(BUILDDIR)/*
 54 | 
 55 | .PHONY: html
 56 | html:
 57 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 58 | 	@echo
 59 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 60 | 
 61 | .PHONY: dirhtml
 62 | dirhtml:
 63 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 66 | 
 67 | .PHONY: singlehtml
 68 | singlehtml:
 69 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 70 | 	@echo
 71 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 72 | 
 73 | .PHONY: pickle
 74 | pickle:
 75 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the pickle files."
 78 | 
 79 | .PHONY: json
 80 | json:
 81 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 82 | 	@echo
 83 | 	@echo "Build finished; now you can process the JSON files."
 84 | 
 85 | .PHONY: htmlhelp
 86 | htmlhelp:
 87 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 88 | 	@echo
 89 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 90 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 91 | 
 92 | .PHONY: qthelp
 93 | qthelp:
 94 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 95 | 	@echo
 96 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 97 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 98 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Augmentor.qhcp"
 99 | 	@echo "To view the help file:"
100 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Augmentor.qhc"
101 | 
102 | .PHONY: applehelp
103 | applehelp:
104 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
105 | 	@echo
106 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
107 | 	@echo "N.B. You won't be able to view it unless you put it in" \
108 | 	      "~/Library/Documentation/Help or install it in your application" \
109 | 	      "bundle."
110 | 
111 | .PHONY: devhelp
112 | devhelp:
113 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
114 | 	@echo
115 | 	@echo "Build finished."
116 | 	@echo "To view the help file:"
117 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Augmentor"
118 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Augmentor"
119 | 	@echo "# devhelp"
120 | 
121 | .PHONY: epub
122 | epub:
123 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
124 | 	@echo
125 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
126 | 
127 | .PHONY: epub3
128 | epub3:
129 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
130 | 	@echo
131 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
132 | 
133 | .PHONY: latex
134 | latex:
135 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
136 | 	@echo
137 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
138 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
139 | 	      "(use \`make latexpdf' here to do that automatically)."
140 | 
141 | .PHONY: latexpdf
142 | latexpdf:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through pdflatex..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: latexpdfja
149 | latexpdfja:
150 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
151 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
152 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
153 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
154 | 
155 | .PHONY: text
156 | text:
157 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
158 | 	@echo
159 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
160 | 
161 | .PHONY: man
162 | man:
163 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
164 | 	@echo
165 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
166 | 
167 | .PHONY: texinfo
168 | texinfo:
169 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
170 | 	@echo
171 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
172 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
173 | 	      "(use \`make info' here to do that automatically)."
174 | 
175 | .PHONY: info
176 | info:
177 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
178 | 	@echo "Running Texinfo files through makeinfo..."
179 | 	make -C $(BUILDDIR)/texinfo info
180 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
181 | 
182 | .PHONY: gettext
183 | gettext:
184 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
185 | 	@echo
186 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
187 | 
188 | .PHONY: changes
189 | changes:
190 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
191 | 	@echo
192 | 	@echo "The overview file is in $(BUILDDIR)/changes."
193 | 
194 | .PHONY: linkcheck
195 | linkcheck:
196 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
197 | 	@echo
198 | 	@echo "Link check complete; look for any errors in the above output " \
199 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
200 | 
201 | .PHONY: doctest
202 | doctest:
203 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
204 | 	@echo "Testing of doctests in the sources finished, look at the " \
205 | 	      "results in $(BUILDDIR)/doctest/output.txt."
206 | 
207 | .PHONY: coverage
208 | coverage:
209 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
210 | 	@echo "Testing of coverage in the sources finished, look at the " \
211 | 	      "results in $(BUILDDIR)/coverage/python.txt."
212 | 
213 | .PHONY: xml
214 | xml:
215 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
216 | 	@echo
217 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
218 | 
219 | .PHONY: pseudoxml
220 | pseudoxml:
221 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
222 | 	@echo
223 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
224 | 
225 | .PHONY: dummy
226 | dummy:
227 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
228 | 	@echo
229 | 	@echo "Build finished. Dummy builder generates no files."
230 | 


--------------------------------------------------------------------------------
/docs/code.bak:
--------------------------------------------------------------------------------
 1 | Auto Generated Documentation
 2 | ============================
 3 | 
 4 | Documentation of the ``Pipeline`` module.
 5 | -----------------------------------------
 6 | 
 7 | .. automodule:: Pipeline
 8 |     :members:
 9 |     :undoc-members:
10 | 
11 | Documentation of the ``Utilities`` module.
12 | ------------------------------------------
13 | 
14 | .. automodule:: Utilities
15 |     :members:
16 |     :undoc-members:
17 | 
18 | Documentation of the ``ImageSource`` module.
19 | --------------------------------------------
20 | 
21 | .. automodule:: ImageSource
22 |     :members:
23 |     :undoc-members:
24 | 


--------------------------------------------------------------------------------
/docs/code.rst:
--------------------------------------------------------------------------------
 1 | Auto Generated Documentation
 2 | ****************************
 3 | 
 4 | .. automodule:: Augmentor
 5 | 
 6 | Module by Module Documentation
 7 | ==============================
 8 | 
 9 | Documentation of the Pipeline module
10 | ------------------------------------
11 | 
12 | .. automodule:: Augmentor.Pipeline
13 |     :members:
14 |     :undoc-members:
15 | 
16 | Documentation of the Operations module
17 | --------------------------------------
18 | 
19 | .. automodule:: Augmentor.Operations
20 |     :members:
21 |     :undoc-members:
22 | 
23 | Documentation of the ImageUtilities module
24 | ------------------------------------------
25 | 
26 | .. automodule:: Augmentor.ImageUtilities
27 |     :members:
28 |     :undoc-members:
29 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Augmentor documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Dec  5 11:08:33 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | sys.path.insert(0, os.path.abspath('..'))
 22 | #sys.path.insert(0, '/home/marcus/git/Augmentor')
 23 | 
 24 | print("MY MESSAGES")
 25 | for blops in sys.path:
 26 |     print(blops)
 27 | print("END MY MESSAGES")
 28 | 
 29 | autoclass_content = 'both'
 30 | 
 31 | # -- General configuration ------------------------------------------------
 32 | 
 33 | # If your documentation needs a minimal Sphinx version, state it here.
 34 | #
 35 | # needs_sphinx = '1.0'
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 39 | # ones.
 40 | extensions = [
 41 |     'sphinx.ext.coverage',
 42 |     'sphinx.ext.mathjax',
 43 |     'sphinx.ext.viewcode',
 44 |     'sphinx.ext.autodoc'
 45 | ]
 46 | 
 47 | # Add any paths that contain templates here, relative to this directory.
 48 | templates_path = ['_templates']
 49 | 
 50 | # The suffix(es) of source filenames.
 51 | # You can specify multiple suffix as a list of string:
 52 | #
 53 | # source_suffix = ['.rst', '.md']
 54 | source_suffix = '.rst'
 55 | 
 56 | # The encoding of source files.
 57 | #
 58 | # source_encoding = 'utf-8-sig'
 59 | 
 60 | # The master toctree document.
 61 | master_doc = 'index'
 62 | 
 63 | # General information about the project.
 64 | project = u'Augmentor'
 65 | copyright = u'2023, Marcus D. Bloice'
 66 | author = u'Marcus D. Bloice'
 67 | 
 68 | # The version info for the project you're documenting, acts as replacement for
 69 | # |version| and |release|, also used in various other places throughout the
 70 | # built documents.
 71 | #
 72 | # The short X.Y version.
 73 | version = u'0.2.12'
 74 | # The full version, including alpha/beta/rc tags.
 75 | release = u'0.2.12'
 76 | 
 77 | # The language for content autogenerated by Sphinx. Refer to documentation
 78 | # for a list of supported languages.
 79 | #
 80 | # This is also used if you do content translation via gettext catalogs.
 81 | # Usually you set "language" from the command line for these cases.
 82 | language = None
 83 | 
 84 | # There are two options for replacing |today|: either, you set today to some
 85 | # non-false value, then it is used:
 86 | #
 87 | # today = ''
 88 | #
 89 | # Else, today_fmt is used as the format for a strftime call.
 90 | #
 91 | # today_fmt = '%B %d, %Y'
 92 | 
 93 | # List of patterns, relative to source directory, that match files and
 94 | # directories to ignore when looking for source files.
 95 | # This patterns also effect to html_static_path and html_extra_path
 96 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 97 | 
 98 | # The reST default role (used for this markup: `text`) to use for all
 99 | # documents.
100 | #
101 | # default_role = None
102 | 
103 | # If true, '()' will be appended to :func: etc. cross-reference text.
104 | #
105 | # add_function_parentheses = True
106 | 
107 | # If true, the current module name will be prepended to all description
108 | # unit titles (such as .. function::).
109 | #
110 | # add_module_names = True
111 | 
112 | # If true, sectionauthor and moduleauthor directives will be shown in the
113 | # output. They are ignored by default.
114 | #
115 | # show_authors = False
116 | 
117 | # The name of the Pygments (syntax highlighting) style to use.
118 | pygments_style = 'sphinx'
119 | 
120 | # A list of ignored prefixes for module index sorting.
121 | # modindex_common_prefix = []
122 | 
123 | # If true, keep warnings as "system message" paragraphs in the built documents.
124 | # keep_warnings = False
125 | 
126 | # If true, `todo` and `todoList` produce output, else they produce nothing.
127 | todo_include_todos = False
128 | 
129 | 
130 | # -- Options for HTML output ----------------------------------------------
131 | 
132 | # The theme to use for HTML and HTML Help pages.  See the documentation for
133 | # a list of builtin themes.
134 | #
135 | html_theme = 'default'
136 | 
137 | # Theme options are theme-specific and customize the look and feel of a theme
138 | # further.  For a list of options available for each theme, see the
139 | # documentation.
140 | #
141 | # html_theme_options = {}
142 | 
143 | # Add any paths that contain custom themes here, relative to this directory.
144 | # html_theme_path = []
145 | 
146 | # The name for this set of Sphinx documents.
147 | # "<project> v<release> documentation" by default.
148 | #
149 | # html_title = u'Augmentor v0.1.1'
150 | 
151 | # A shorter title for the navigation bar.  Default is the same as html_title.
152 | #
153 | # html_short_title = None
154 | 
155 | # The name of an image file (relative to this directory) to place at the top
156 | # of the sidebar.
157 | #
158 | html_logo = 'logo.png'
159 | 
160 | # The name of an image file (relative to this directory) to use as a favicon of
161 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
162 | # pixels large.
163 | #
164 | # html_favicon = None
165 | 
166 | # Add any paths that contain custom static files (such as style sheets) here,
167 | # relative to this directory. They are copied after the builtin static files,
168 | # so a file named "default.css" will overwrite the builtin "default.css".
169 | html_static_path = ['_static']
170 | 
171 | # Add any extra paths that contain custom files (such as robots.txt or
172 | # .htaccess) here, relative to this directory. These files are copied
173 | # directly to the root of the documentation.
174 | #
175 | # html_extra_path = []
176 | 
177 | # If not None, a 'Last updated on:' timestamp is inserted at every page
178 | # bottom, using the given strftime format.
179 | # The empty string is equivalent to '%b %d, %Y'.
180 | #
181 | # html_last_updated_fmt = None
182 | 
183 | # If true, SmartyPants will be used to convert quotes and dashes to
184 | # typographically correct entities.
185 | #
186 | # html_use_smartypants = True
187 | 
188 | # Custom sidebar templates, maps document names to template names.
189 | #
190 | # html_sidebars = {}
191 | 
192 | # Additional templates that should be rendered to pages, maps page names to
193 | # template names.
194 | #
195 | # html_additional_pages = {}
196 | 
197 | # If false, no module index is generated.
198 | #
199 | # html_domain_indices = True
200 | 
201 | # If false, no index is generated.
202 | #
203 | # html_use_index = True
204 | 
205 | # If true, the index is split into individual pages for each letter.
206 | #
207 | # html_split_index = False
208 | 
209 | # If true, links to the reST sources are added to the pages.
210 | #
211 | # html_show_sourcelink = True
212 | 
213 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
214 | #
215 | # html_show_sphinx = True
216 | 
217 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
218 | #
219 | # html_show_copyright = True
220 | 
221 | # If true, an OpenSearch description file will be output, and all pages will
222 | # contain a <link> tag referring to it.  The value of this option must be the
223 | # base URL from which the finished HTML is served.
224 | #
225 | # html_use_opensearch = ''
226 | 
227 | # This is the file name suffix for HTML files (e.g. ".xhtml").
228 | # html_file_suffix = None
229 | 
230 | # Language to be used for generating the HTML full-text search index.
231 | # Sphinx supports the following languages:
232 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
233 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
234 | #
235 | # html_search_language = 'en'
236 | 
237 | # A dictionary with options for the search language support, empty by default.
238 | # 'ja' uses this config value.
239 | # 'zh' user can custom change `jieba` dictionary path.
240 | #
241 | # html_search_options = {'type': 'default'}
242 | 
243 | # The name of a javascript file (relative to the configuration directory) that
244 | # implements a search results scorer. If empty, the default will be used.
245 | #
246 | # html_search_scorer = 'scorer.js'
247 | 
248 | # Output file base name for HTML help builder.
249 | htmlhelp_basename = 'Augmentordoc'
250 | 
251 | # -- Options for LaTeX output ---------------------------------------------
252 | 
253 | latex_elements = {
254 |      # The paper size ('letterpaper' or 'a4paper').
255 |      #
256 |      # 'papersize': 'letterpaper',
257 | 
258 |      # The font size ('10pt', '11pt' or '12pt').
259 |      #
260 |      # 'pointsize': '10pt',
261 | 
262 |      # Additional stuff for the LaTeX preamble.
263 |      #
264 |      # 'preamble': '',
265 | 
266 |      # Latex figure (float) alignment
267 |      #
268 |      # 'figure_align': 'htbp',
269 | }
270 | 
271 | # Grouping the document tree into LaTeX files. List of tuples
272 | # (source start file, target name, title,
273 | #  author, documentclass [howto, manual, or own class]).
274 | latex_documents = [
275 |     (master_doc, 'Augmentor.tex', u'Augmentor Documentation',
276 |      u'Marcus D. Bloice', 'manual'),
277 | ]
278 | 
279 | # The name of an image file (relative to this directory) to place at the top of
280 | # the title page.
281 | #
282 | # latex_logo = None
283 | 
284 | # For "manual" documents, if this is true, then toplevel headings are parts,
285 | # not chapters.
286 | #
287 | # latex_use_parts = False
288 | 
289 | # If true, show page references after internal links.
290 | #
291 | # latex_show_pagerefs = False
292 | 
293 | # If true, show URL addresses after external links.
294 | #
295 | # latex_show_urls = False
296 | 
297 | # Documents to append as an appendix to all manuals.
298 | #
299 | # latex_appendices = []
300 | 
301 | # It false, will not define \strong, \code, 	itleref, \crossref ... but only
302 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
303 | # packages.
304 | #
305 | # latex_keep_old_macro_names = True
306 | 
307 | # If false, no module index is generated.
308 | #
309 | # latex_domain_indices = True
310 | 
311 | 
312 | # -- Options for manual page output ---------------------------------------
313 | 
314 | # One entry per manual page. List of tuples
315 | # (source start file, name, description, authors, manual section).
316 | man_pages = [
317 |     (master_doc, 'augmentor', u'Augmentor Documentation',
318 |      [author], 1)
319 | ]
320 | 
321 | # If true, show URL addresses after external links.
322 | #
323 | # man_show_urls = False
324 | 
325 | 
326 | # -- Options for Texinfo output -------------------------------------------
327 | 
328 | # Grouping the document tree into Texinfo files. List of tuples
329 | # (source start file, target name, title, author,
330 | #  dir menu entry, description, category)
331 | texinfo_documents = [
332 |     (master_doc, 'Augmentor', u'Augmentor Documentation',
333 |      author, 'Augmentor', 'One line description of project.',
334 |      'Miscellaneous'),
335 | ]
336 | 
337 | # Documents to append as an appendix to all manuals.
338 | #
339 | # texinfo_appendices = []
340 | 
341 | # If false, no module index is generated.
342 | #
343 | # texinfo_domain_indices = True
344 | 
345 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
346 | #
347 | # texinfo_show_urls = 'footnote'
348 | 
349 | # If true, do not generate a @detailmenu in the "Top" node's menu.
350 | #
351 | # texinfo_no_detailmenu = False
352 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Augmentor documentation master file, created by
 2 |    sphinx-quickstart on Thu Aug  4 13:26:55 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Augmentor
 7 | =========
 8 | 
 9 | Augmentor is a Python package designed to aid the augmentation and artificial generation of image data for machine learning tasks. It is primarily a data augmentation tool, but will also incorporate basic image pre-processing functionality.
10 | 
11 | .. tip::
12 | 
13 |     A Julia version of the package is also being actively developed. If you prefer to use Julia, you can find it `here <https://github.com/Evizero/Augmentor.jl>`_.
14 | 
15 | The documentation is organised as follows:
16 | 
17 | .. toctree::
18 |     :maxdepth: 3
19 |     :caption: User Guide
20 | 
21 |     userguide/mainfeatures
22 |     userguide/install
23 |     userguide/usage
24 |     userguide/examples
25 |     userguide/extend
26 | 
27 |     code
28 | 
29 | .. toctree::
30 |     :maxdepth: 2
31 |     :caption: Licence and Terms
32 | 
33 |     licence
34 | 
35 | Indices and tables
36 | ==================
37 | 
38 | * :ref:`genindex`
39 | * :ref:`modindex`
40 | * :ref:`search`
41 | 


--------------------------------------------------------------------------------
/docs/joss/paper.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{simard2003,
 2 |   title={{Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis}},
 3 |   author={Simard, Patrice Y and Steinkraus, David and Platt, John C and others},
 4 |   booktitle={ICDAR},
 5 |   volume={3},
 6 |   pages={958--962},
 7 |   year={2003}
 8 | }
 9 | 
10 | @article{zhong2017,
11 |     title={Random Erasing Data Augmentation},
12 |     author={Zhong, Zhun and Zheng, Liang and Kang, Guoliang and Li, Shaozi and Yang, Yi},
13 |     journal={arXiv preprint arXiv:1708.04896},
14 |     year={2017}
15 | }
16 | 
17 | @online{augmentorPython,
18 |   author = {Marcus D Bloice},
19 |   title = {Augmentor: Image augmentation library in Python for machine learning},
20 |   year = 2017,
21 |   url = {https://github.com/mdbloice/Augmentor},
22 |   urldate = {2017-06-06}
23 | }
24 | 
25 | @online{augmentorJulia,
26 |   author = {Christof Stocker},
27 |   title = {Augmentor.jl: A fast image augmentation library in Julia for machine learning},
28 |   year = 2017,
29 |   url = {https://github.com/evizero/Augmentor.jl},
30 |   urldate = {2017-06-06}
31 | }
32 | 
33 | @article{bloice2017,
34 |     title={Augmentor: An Image Augmentation Library for Machine Learning},
35 |     author={Bloice, Marcus D. and Stocker, Christof and Holzinger, Andreas},
36 |     journal={arXiv preprint arXiv:1708.04680},
37 |     year={2017}
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/docs/joss/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'Augmentor: An Image Augmentation Library for Machine Learning'
 3 | tags:
 4 |   - image augmentation
 5 |   - machine learning
 6 |   - neural networks
 7 | authors:
 8 |  - name: Marcus D Bloice
 9 |    orcid: 0000-0002-2468-4086
10 |    affiliation: 1
11 |  - name: Christof Stocker
12 |    orcid: 0000-0002-1276-7976
13 |  - name: Andreas Holzinger
14 |    orcid: 0000-0002-6786-5194
15 |    affiliation: 1
16 | affiliations:
17 |  - name: Institute for Medical Informatics, Statistics and Documentation, Medical University of Graz, Austria
18 |    index: 1
19 | date: 22 Sep 2017
20 | bibliography: paper.bib
21 | ---
22 | 
23 | # Summary
24 | 
25 | Augmentor is an image augmentation library for machine learning. It provides a stochastic, pipeline-based API for the generation of image data by effectively allowing new image data to be sampled at runtime according to a user-defined set of operations and parameters. Common operations include arbitrary rotations, mirroring, shearing, skewing, and random crops, as well as less frequently implemented operations such as randomised elastic distortions or random erasing [@simard2003; @zhong2017].
26 | 
27 | The pipeline approach means that images are passed through a user-defined set of operations sequentially, where each operation is applied stochastically according to a probability parameter that is also defined by the user.
28 | 
29 | The goal of the package, therefore, is to:
30 | 
31 | - Provide a platform and framework independent image augmentation library for machine learning with a convenient API
32 | - Provide a library that is extensible, by allowing for custom operations to be added easily, even at runtime
33 | - Provide less commonly implemented operations, such as the aforementioned elastic distortions
34 | - Enable fine level control of parameters of the augmentation procedure
35 | 
36 | The software is available in Python [@augmentorPython] and Julia [@augmentorJulia] versions, the latter being optimised for speed [@bloice2017]. The authors would also like the thank all those who have contributed to the project, a list of whom can be found on the contributors page of the GitHub repository: <https://github.com/mdbloice/Augmentor/graphs/contributors>.
37 | 
38 | # References
39 | 


--------------------------------------------------------------------------------
/docs/licence.rst:
--------------------------------------------------------------------------------
 1 | Licence
 2 | =======
 3 | 
 4 | Augmentor Licence
 5 | -----------------
 6 | 
 7 | The Augmentor package is licenced under the terms of the MIT licence.
 8 | 
 9 | The MIT License (MIT)
10 | 
11 | Copyright (c) 2016 Marcus D. Bloice
12 | 
13 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | 


--------------------------------------------------------------------------------
/docs/logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdbloice/Augmentor/894d5cc414205cf4becfb7c6f987b8c66feb9542/docs/logo.ico


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdbloice/Augmentor/894d5cc414205cf4becfb7c6f987b8c66feb9542/docs/logo.png


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  epub3      to make an epub3
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  xml        to make Docutils-native XML files
 38 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 39 | 	echo.  linkcheck  to check all external links for integrity
 40 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 41 | 	echo.  coverage   to run coverage check of the documentation if enabled
 42 | 	echo.  dummy      to check syntax errors of document sources
 43 | 	goto end
 44 | )
 45 | 
 46 | if "%1" == "clean" (
 47 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 48 | 	del /q /s %BUILDDIR%\*
 49 | 	goto end
 50 | )
 51 | 
 52 | 
 53 | REM Check if sphinx-build is available and fallback to Python version if any
 54 | %SPHINXBUILD% 1>NUL 2>NUL
 55 | if errorlevel 9009 goto sphinx_python
 56 | goto sphinx_ok
 57 | 
 58 | :sphinx_python
 59 | 
 60 | set SPHINXBUILD=python -m sphinx.__init__
 61 | %SPHINXBUILD% 2> nul
 62 | if errorlevel 9009 (
 63 | 	echo.
 64 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 65 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 66 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 67 | 	echo.may add the Sphinx directory to PATH.
 68 | 	echo.
 69 | 	echo.If you don't have Sphinx installed, grab it from
 70 | 	echo.http://sphinx-doc.org/
 71 | 	exit /b 1
 72 | )
 73 | 
 74 | :sphinx_ok
 75 | 
 76 | 
 77 | if "%1" == "html" (
 78 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 79 | 	if errorlevel 1 exit /b 1
 80 | 	echo.
 81 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 82 | 	goto end
 83 | )
 84 | 
 85 | if "%1" == "dirhtml" (
 86 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 87 | 	if errorlevel 1 exit /b 1
 88 | 	echo.
 89 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 90 | 	goto end
 91 | )
 92 | 
 93 | if "%1" == "singlehtml" (
 94 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 95 | 	if errorlevel 1 exit /b 1
 96 | 	echo.
 97 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 98 | 	goto end
 99 | )
100 | 
101 | if "%1" == "pickle" (
102 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
103 | 	if errorlevel 1 exit /b 1
104 | 	echo.
105 | 	echo.Build finished; now you can process the pickle files.
106 | 	goto end
107 | )
108 | 
109 | if "%1" == "json" (
110 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
111 | 	if errorlevel 1 exit /b 1
112 | 	echo.
113 | 	echo.Build finished; now you can process the JSON files.
114 | 	goto end
115 | )
116 | 
117 | if "%1" == "htmlhelp" (
118 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
119 | 	if errorlevel 1 exit /b 1
120 | 	echo.
121 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
122 | .hhp project file in %BUILDDIR%/htmlhelp.
123 | 	goto end
124 | )
125 | 
126 | if "%1" == "qthelp" (
127 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
128 | 	if errorlevel 1 exit /b 1
129 | 	echo.
130 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
131 | .qhcp project file in %BUILDDIR%/qthelp, like this:
132 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Augmentor.qhcp
133 | 	echo.To view the help file:
134 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Augmentor.ghc
135 | 	goto end
136 | )
137 | 
138 | if "%1" == "devhelp" (
139 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
140 | 	if errorlevel 1 exit /b 1
141 | 	echo.
142 | 	echo.Build finished.
143 | 	goto end
144 | )
145 | 
146 | if "%1" == "epub" (
147 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
148 | 	if errorlevel 1 exit /b 1
149 | 	echo.
150 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
151 | 	goto end
152 | )
153 | 
154 | if "%1" == "epub3" (
155 | 	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
156 | 	if errorlevel 1 exit /b 1
157 | 	echo.
158 | 	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
159 | 	goto end
160 | )
161 | 
162 | if "%1" == "latex" (
163 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
164 | 	if errorlevel 1 exit /b 1
165 | 	echo.
166 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdf" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "latexpdfja" (
181 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
182 | 	cd %BUILDDIR%/latex
183 | 	make all-pdf-ja
184 | 	cd %~dp0
185 | 	echo.
186 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
187 | 	goto end
188 | )
189 | 
190 | if "%1" == "text" (
191 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
192 | 	if errorlevel 1 exit /b 1
193 | 	echo.
194 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
195 | 	goto end
196 | )
197 | 
198 | if "%1" == "man" (
199 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
200 | 	if errorlevel 1 exit /b 1
201 | 	echo.
202 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
203 | 	goto end
204 | )
205 | 
206 | if "%1" == "texinfo" (
207 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
208 | 	if errorlevel 1 exit /b 1
209 | 	echo.
210 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
211 | 	goto end
212 | )
213 | 
214 | if "%1" == "gettext" (
215 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
216 | 	if errorlevel 1 exit /b 1
217 | 	echo.
218 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
219 | 	goto end
220 | )
221 | 
222 | if "%1" == "changes" (
223 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
224 | 	if errorlevel 1 exit /b 1
225 | 	echo.
226 | 	echo.The overview file is in %BUILDDIR%/changes.
227 | 	goto end
228 | )
229 | 
230 | if "%1" == "linkcheck" (
231 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
232 | 	if errorlevel 1 exit /b 1
233 | 	echo.
234 | 	echo.Link check complete; look for any errors in the above output ^
235 | or in %BUILDDIR%/linkcheck/output.txt.
236 | 	goto end
237 | )
238 | 
239 | if "%1" == "doctest" (
240 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
241 | 	if errorlevel 1 exit /b 1
242 | 	echo.
243 | 	echo.Testing of doctests in the sources finished, look at the ^
244 | results in %BUILDDIR%/doctest/output.txt.
245 | 	goto end
246 | )
247 | 
248 | if "%1" == "coverage" (
249 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
250 | 	if errorlevel 1 exit /b 1
251 | 	echo.
252 | 	echo.Testing of coverage in the sources finished, look at the ^
253 | results in %BUILDDIR%/coverage/python.txt.
254 | 	goto end
255 | )
256 | 
257 | if "%1" == "xml" (
258 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
259 | 	if errorlevel 1 exit /b 1
260 | 	echo.
261 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
262 | 	goto end
263 | )
264 | 
265 | if "%1" == "pseudoxml" (
266 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
267 | 	if errorlevel 1 exit /b 1
268 | 	echo.
269 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
270 | 	goto end
271 | )
272 | 
273 | if "%1" == "dummy" (
274 | 	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
275 | 	if errorlevel 1 exit /b 1
276 | 	echo.
277 | 	echo.Build finished. Dummy builder generates no files.
278 | 	goto end
279 | )
280 | 
281 | :end
282 | 


--------------------------------------------------------------------------------
/docs/userguide/examples.rst:
--------------------------------------------------------------------------------
 1 | Examples
 2 | ========
 3 | 
 4 | A number of typical usage scenarios are described here.
 5 | 
 6 | .. note::
 7 | 
 8 |     A full list of operations can be found in the :mod:`.Operations` module documentation.
 9 | 
10 | Initialising a pipeline
11 | -----------------------
12 | 
13 | .. code-block:: python
14 |     
15 |      import Augmentor
16 | 
17 |      path_to_data = "/home/user/images/dataset1/"
18 | 
19 |      # Create a pipeline
20 |      p = Augmentor.Pipeline(path_to_data)
21 | 
22 | Adding operations to a pipeline
23 | -------------------------------
24 | 
25 | .. code-block:: python
26 | 
27 |     # Add some operations to an existing pipeline.
28 | 
29 |     # First, we add a horizontal flip operation to the pipeline:
30 |     p.flip_left_right(probability=0.4)
31 | 
32 |     # Now we add a vertical flip operation to the pipeline:
33 |     p.flip_top_bottom(probability=0.8)
34 | 
35 |     # Add a rotate90 operation to the pipeline:
36 |     p.rotate90(probability=0.1)
37 | 
38 | Executing a pipeline
39 | --------------------
40 | 
41 | .. code-block:: python
42 | 
43 |     # Here we sample 100,000 images from the pipeline.
44 | 
45 |     # It is often useful to use scientific notation for specify
46 |     # large numbers with trailing zeros.
47 |     num_of_samples = int(1e5)
48 | 
49 |     # Now we can sample from the pipeline:
50 |     p.sample(num_of_samples)
51 | 


--------------------------------------------------------------------------------
/docs/userguide/extend.rst:
--------------------------------------------------------------------------------
 1 | .. _extendingaugmentor:
 2 | 
 3 | Extending Augmentor
 4 | ===================
 5 | 
 6 | Extending Augmentor to add new functionality is quite simple, and is performed in two steps:
 7 | 
 8 | 1) Create a custom class which subclasses from the :class:`.Operation` base class, and
 9 | 2) Add an object of your new class to the pipeline using the :func:`~Augmentor.Pipeline.Pipeline.add_operation` function.
10 | 
11 | This allows you to add custom functionality and extend Augmentor at run-time. Of course, if you have written an operation that may be of benefit to the community, you can make a pull request on the GitHub repository.
12 | 
13 | The following sections describe extending Augmentor in two steps. Step 1 involves creating a new :class:`.Operation` subclass, and step 2 involves using an object of your new custom operation in a pipeline.
14 | 
15 | Step 1: Create a New Operation Subclass
16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17 | 
18 | To create a custom operation and extend Augmentor:
19 | 
20 | 1) You create a new class that inherits from the :class:`.Operation` base class.
21 | 2) You must overload the :func:`~Augmentor.Operations.Operation.perform_operation` method belonging to the superclass.
22 | 3) You must call the superclass's :func:`__init__` constructor.
23 | 4) You must return an object of type :class:`PIL.Image`.
24 | 
25 | For example, to add a new operation called ``FoldImage``, you would write this code:
26 | 
27 | .. code-block:: python
28 | 
29 |     # Create your new operation by inheriting from the Operation superclass:
30 |     class FoldImage(Operation):
31 |         # Here you can accept as many custom parameters as required:
32 |         def __init__(self, probability, num_of_folds):
33 |             # Call the superclass's constructor (meaning you must
34 |             # supply a probability value):
35 |             Operation.__init__(self, probability)
36 |             # Set your custom operation's member variables here as required:
37 |             self.num_of_folds = num_of_folds
38 | 
39 |         # Your class must implement the perform_operation method:
40 |         def perform_operation(self, image):
41 |             # Start of code to perform custom image operation.
42 |             for fold in range(self.num_of_folds):
43 |                 pass
44 |             # End of code to perform custom image operation.
45 | 
46 |             # Return the image so that it can further processed in the pipeline:
47 |             return image
48 | 
49 | You have seen that you need to implement the :func:`~Augmentor.Operations.Operation.perform_operation` function and you must call the superclass's constructor which requires a :attr:`probability` value to be set. Ensure you return a PIL Image as a return value.
50 | 
51 | If you wish to make these changes permanent, place your code in the :mod:`~Augmentor.Operations` **module**.
52 | 
53 | .. hint::
54 | 
55 |     You can also overload the superclass's :func:`~Augmentor.Operations.Operation.__str__` function to return a custom string for the object's description text. This is useful for some methods that display information about the operation, such as the :func:`~Augmentor.Pipeline.Pipeline.status` method.
56 | 
57 | Step 2: Add an Object to the Pipeline Manually
58 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
59 | 
60 | Once you have a new operation which is of type :class:`.Operation`, you can add an object of you new operation to an existing pipeline.
61 | 
62 | .. code-block:: python
63 | 
64 |     # Instantiate a new object of your custom operation
65 |     fold = Fold(probability = 0.75, num_of_folds = 4)
66 | 
67 |     # Add this to the current pipeline
68 |     p.add_operation(fold)
69 | 
70 |     # Executed the pipeline as normal, and your custom operation will be executed
71 |     p.sample(1000)
72 | 
73 | As you can see, adding custom operations at run-time is possible by subclassing the :class:`.Operation` class and adding an object of this class to the pipeline manually using the :func:`~Augmentor.Pipeline.Pipeline.add_operation` function.
74 | 
75 | Using non-PIL Image Objects
76 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
77 | 
78 | Images can be converted to their raw formats for custom operations, for example by using NumPy:
79 | 
80 | .. code-block:: python
81 | 
82 |     import numpy
83 | 
84 |     # Custom class declaration
85 | 
86 |     def perform_operation(image):
87 | 
88 |         image_array = numpy.array(image).astype('uint8')
89 | 
90 |         # Perform your custom operations here
91 | 
92 |         image = PIL.Image.fromarray(image_array)
93 | 
94 |         return image
95 | 


--------------------------------------------------------------------------------
/docs/userguide/install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Installation is via ``pip``:
 5 | 
 6 | .. code-block:: bash
 7 | 
 8 |     pip install Augmentor
 9 | 
10 | If you have to use ``sudo`` it is recommended that you use the ``-H`` flag:
11 | 
12 | .. code-block:: bash
13 | 
14 |     sudo -H pip install Augmentor
15 | 
16 | Requirements
17 | ------------
18 | 
19 | Augmentor requires ``Pillow`` and ``tqdm``. Note that Pillow is a fork of PIL, but both packages cannot exist simultaneously. Uninstall PIL before installing Pillow.
20 | 
21 | Building
22 | --------
23 | 
24 | If you prefer to build the package from source, first clone the repository: 
25 | 
26 | .. code-block:: bash
27 | 
28 |     git clone https://github.com/mdbloice/Augmentor.git
29 | 
30 | Then enter the ``Augmentor`` directory and build the package:
31 | 
32 | .. code-block:: bash
33 | 
34 |     cd Augmentor
35 |     python setup.py install 
36 | 
37 | Alternatively you can first run ``python setup.py build`` followed by ``python setup.py install``. This can be useful for debugging.
38 | 
39 | .. attention::
40 | 
41 |     If you are compiling from source you may need to compile the dependencies also, including Pillow. On Linux this means having libpng (``libpng-dev``) and zlib (``zlib1g-dev``) installed.
42 | 


--------------------------------------------------------------------------------
/docs/userguide/mainfeatures.rst:
--------------------------------------------------------------------------------
  1 | .. _mainfeatures:
  2 | 
  3 | Main Features
  4 | =============
  5 | 
  6 | In this section we will describe the main features of Augmentor with example code and output.
  7 | 
  8 | Augmentor is software package for image augmentation with an emphasis on providing operations that are typically used in the generation of image data for machine learning problems.
  9 | 
 10 | In principle, Augmentor consists of a number of classes for standard image manipulation functions, such as the ``Rotate`` class or the ``Crop`` class. You interact and use these classes using a large number of convenience functions, which cover most of the functions you might require when augmenting image datasets for machine learning problems.
 11 | 
 12 | Because image augmentation is often a multi-stage procedure, Augmentor uses a **pipeline**-based approach, where **operations** are added sequentially in order to generate a pipeline. Images are then passed through this pipeline, where each operation is applied to the image as it passes through.
 13 | 
 14 | Also, Augmentor applies operations to images **stochastically** as they pass through the pipeline, according to a user-defined probability value for each operation. 
 15 | 
 16 | Therefore every operation has at minimum a probability parameter, which controls how likely the operation will be applied to each image that is seen as the image passes through the pipeline. Take for example a rotate operation, which is defined as follows:
 17 | 
 18 | .. code-block:: python
 19 |     
 20 |     rotate(probability=0.5, max_left_rotation=5, max_right_rotation=10)
 21 | 
 22 | The ``probability`` parameter controls how often the operation is applied. The ``max_left_rotation`` and ``max_right_rotation`` controls the degree by which the image is rotated, **if** the operation is applied. The value, in this case between -5 and 10 degrees, is chosen at random.
 23 | 
 24 | Therefore, Augmentor allows you to create an augmentation pipeline, which chains together operations that are applied stochastically, where the parameters of each of these operations are also chosen at random, within a range specified by the user. This means that each time an image is passed through the pipeline, a different image is returned. Depending on the number of operations in the pipeline, and the range of values that each operation has available, a very large amount of new image data can be created in this way.
 25 | 
 26 | All functions described in this section are made available by the Pipeline object. To begin using Augmentor, you always create a new Pipeline object by instantiating it with a path to a set of images or image that you wish to augment:
 27 | 
 28 | .. code-block:: python
 29 | 
 30 |     >>> import Augmentor
 31 |     >>> p = Augmentor.Pipeline("/path/to/images")
 32 |     Initialised with 100 images found in selected directory.
 33 | 
 34 | You can now add operations to this pipeline using the ``p`` Pipeline object. For example, to add a rotate operation:
 35 | 
 36 | .. code-block:: python
 37 | 
 38 |     >>> p.rotate(probability=1.0, max_left_rotation=5, max_right_rotation=10)
 39 | 
 40 | All pipeline operations have at least a probability parameter. 
 41 | 
 42 | To see the status of the current pipeline:
 43 | 
 44 | .. code-block:: python
 45 | 
 46 |     >>> p.status()
 47 |     There are 1 operation(s) in the current pipeline.
 48 |     Index 0:
 49 |         Operation RotateRange (probability: 1):
 50 |             Attribute: max_right_rotation (10)
 51 |             Attribute: max_left_rotation (-5)
 52 |             Attribute: probability (1)
 53 | 
 54 |     There are 1 image(s) in the source directory.
 55 |     Dimensions:
 56 |         Width: 400 Height: 400
 57 |     Formats:
 58 |         PNG
 59 | 
 60 | You can remove operations using the ``remove_operation(index)`` function and the appropriate ``index`` indicator from above.
 61 | 
 62 | Full documentation of all functions and operations can be found in the auto-generated documentation. This guide suffice as a rough guide to the major features of the package, however.
 63 | 
 64 | .. _perspectiveskewing:
 65 | 
 66 | Perspective Skewing
 67 | -------------------
 68 | 
 69 | Perspective skewing involves transforming the image so that it appears that you are looking at the image from a different angle.
 70 | 
 71 | The following main functions are used for skewing:
 72 | 
 73 | - ``skew_tilt()``
 74 | - ``skew_left_right()``
 75 | - ``skew_top_bottom()``
 76 | - ``skew_corner()``
 77 | - ``skew()``
 78 | 
 79 | To skew or tilt an image either left, right, forwards, or backwards, use the ``skew_tilt`` function. The image will be skewed by a random amount in the following directions:
 80 | 
 81 | +-------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------+
 82 | | Skew Tilt Left                                                                                        | Skew Tilt Right                                                                                        | Skew Tilt Forward                                                                                        | Skew Tilt Backward                                                                                        |
 83 | +-------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------+
 84 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/TiltLeft_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/TiltRight_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/TiltForward_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/TiltBackward_s.png |
 85 | +-------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------+
 86 | 
 87 | Or, to skew an image by a random corner, use the ``skew_corner()`` function. The image will be skewed using one of the following 8 skew types:
 88 | 
 89 | +------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
 90 | | Skew Type 0                                                                                          | Skew Type 1                                                                                          | Skew Type 2                                                                                          | Skew Type 3                                                                                          |
 91 | +------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
 92 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner0_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner1_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner2_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner3_s.png |
 93 | +------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
 94 | | Skew Type 4                                                                                          | Skew Type 5                                                                                          | Skew Type 6                                                                                          | Skew Type 7                                                                                          |
 95 | +------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
 96 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner4_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner5_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner6_s.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/Corner7_s.png |
 97 | +------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
 98 | 
 99 | If you only wish to skew either left or right, use ``skew_left_right()``. To skew only forwards or backwards, use ``skew_top_bottom()``. 
100 | 
101 | The function ``skew()`` will skew your image in a random direction of the 12 directions shown above.
102 | 
103 | Elastic Distortions
104 | -------------------
105 | 
106 | Elastic distortions allow you to make distortions to an image while maintaining the image's aspect ratio. 
107 | 
108 | - ``random_distortion()``
109 | 
110 | Here, we have taken a sample image and generated 50 samples, with a grid size of 16 and a distortion magnitude of 8:
111 | 
112 | +-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
113 | | Original Image                                                                                  | Random distortions applied                                                                         |
114 | +-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
115 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/distort.gif |
116 | +-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
117 | 
118 | To highlight how this might be useful in a real-world scenario, here is the distort function being applied to a single image of a figure 8. 
119 | 
120 | +------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+
121 | | Original Image                                                                                                   | Random distortions applied                                                                                       |
122 | +------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+
123 | | .. image:: https://cloud.githubusercontent.com/assets/16042756/23697279/79850d52-03e7-11e7-9445-475316b702a3.png | .. image:: https://cloud.githubusercontent.com/assets/16042756/23697283/802698a6-03e7-11e7-94b7-f0b61977ef33.gif |
124 | +------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+
125 | 
126 | Realistic new samples can be created using this method.
127 | 
128 | See the auto-generated documentation for more details regarding this function's parameters.
129 | 
130 | .. _rotating:
131 | 
132 | Rotating
133 | --------
134 | 
135 | Rotating can be performed in a number of ways. When rotating by modulo 90, the image is simply rotated and saved. To rotate by arbitrary degrees, then a crop is taken from the centre of the newly rotated image. 
136 | 
137 | Rotate functions that are available are:
138 | 
139 | - ``rotate()``
140 | - ``rotate90()``
141 | - ``rotate180()``
142 | - ``rotate270()``
143 | - ``rotate_random_90()``
144 | 
145 | Most of these methods are self-explanatory. The ``rotate_random_90()`` function will rotate the image by either 90, 180, or 270 degrees. 
146 | 
147 | However, the ``rotate()`` warrants more discussion and will be described here. When an image is rotated, and it is not a multiple of 90 degrees, the image must either be stretched to accommodate a now larger image, or some of the image must be cut, as demonstrated below:
148 | 
149 | +-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+
150 | | Original Image                                                                                  | Rotated 10 degrees                                                                                |
151 | +-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+
152 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/rotate.png |
153 | +-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+
154 | 
155 | As can be seen above, an arbitrary, non-modulo 90, rotation will unfortunately result in the image being padded in each corner. To alleviate this, Augmentor's default behaviour is to crop the image and retain the largest crop possible while maintaining the image's aspect ratio:
156 | 
157 | +-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+
158 | | Original Image                                                                                  | Rotated 10 degrees, automatically cropped                                                               |
159 | +-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+
160 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/rotate_aug_b.png |
161 | +-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+
162 | 
163 | This will, of course, result in the image being zoomed in. For smaller rotations of between -5 and 5 degrees, this zoom effect is not particularly drastic.
164 | 
165 | .. _shearing:
166 | 
167 | Shearing
168 | --------
169 | 
170 | Shearing tilts an image along one of its sides. The can be in the x-axis or y-axis direction. 
171 | 
172 | Functions available for shearing are:
173 | 
174 | - ``shear()``
175 | 
176 | If you shear in the x or y axis, you will normally get images that look as follows:
177 | 
178 | +-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
179 | | Original image                                                                                  | Shear (x-axis) 20 degrees                                                                          | Shear (y-axis) 20 degrees                                                                          |
180 | +-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
181 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/shear_x.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/shear_y.png |
182 | +-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
183 | 
184 | However, as with rotations, you are left with image that are either larger in size, or are cropped to the original size but contain padding in at the sides of the images.
185 | 
186 | Augmentor automatically crops the largest area possible before returning the image, as follows:
187 | 
188 | +-------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+
189 | | Original image                                                                                  | Shear (x-axis) 20 degrees                                                                              | Shear (y-axis) 20 degrees                                                                              |
190 | +-------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+
191 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/shear_x_aug.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/shear_y_aug.png |
192 | +-------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+
193 | 
194 | You can shear by random amounts, a fixed amount, in random directions, or in a fixed direction. See the auto-generated documentation for more details.
195 | 
196 | Cropping
197 | --------
198 | 
199 | Cropping functions which are available are:
200 | 
201 | - ``crop_centre()``
202 | - ``crop_by_size()``
203 | - ``crop_random()``
204 | 
205 | The ``crop_random()`` function warrants further explanation. Here a region of a size specified by the user is cropped at random from the original image: 
206 | 
207 | +-------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------+
208 | | Original image                                                                                  | Random crops                                                                                    |
209 | +-------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------+
210 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/crop.gif |
211 | +-------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------+
212 | 
213 | You could combine this with a resize operation, so that the images returned are the same size as the images of the original, pre-augmented dataset:
214 | 
215 | +-------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+
216 | | Original image                                                                                  | Random crops + resize operation                                                                        |
217 | +-------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+
218 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/orig.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/crop_resize.gif |
219 | +-------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+
220 | 
221 | 
222 | Mirroring
223 | ---------
224 | 
225 | The following functions are available for mirroring images (translating them through the x any y axes):
226 | 
227 | - ``flip_left_right()``
228 | - ``flip_top_bottom()``
229 | - ``flip_random()``
230 | 
231 | Of these, ``flip_random()`` can be used in situations where mirroring through both axes may make sense. We may, for example, combine random mirroring, with random distortions, to create new data:
232 | 
233 | +--------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+
234 | | Original image                                                                                         | Random mirroring + random distortions                                                                   |
235 | +--------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+
236 | | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/eight_200px.png | .. image:: https://raw.githubusercontent.com/mdbloice/AugmentorFiles/master/UsageGuide/flip_distort.gif |
237 | +--------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------+
238 | 
239 | 
240 | Notes
241 | -----
242 | 
243 | Checkerboard image obtained from WikiMedia Commons and is in the public domain. See https://commons.wikimedia.org/wiki/File:Checkerboard_pattern.svg
244 | 
245 | Skin lesion image obtained from the ISIC Archive:
246 | 
247 | - Image id: 5436e3adbae478396759f0f1
248 | - Image name: ISIC_0000017.jpg
249 | - Download: https://isic-archive.com:443/api/v1/image/5436e3adbae478396759f0f1/download
250 | 
251 | See https://isic-archive.com/#images for further details.
252 | 


--------------------------------------------------------------------------------
/docs/userguide/usage.rst:
--------------------------------------------------------------------------------
 1 | Usage
 2 | =====
 3 | 
 4 | Here we describe the general usage of Augmentor. 
 5 | 
 6 | Getting Started
 7 | ---------------
 8 | 
 9 | To use Augmentor, the following general procedure is followed:
10 | 
11 | 1. You instantiate a :class:`~Augmentor.Pipeline.Pipeline` object pointing to a directory containing your initial image data set.
12 | 2. You define a number of operations to perform on this data set using your :class:`~Augmentor.Pipeline.Pipeline` object.
13 | 3. You execute these operations by calling the :class:`~Augmentor.Pipeline.Pipeline`'s :func:`~Augmentor.Pipeline.Pipeline.sample` method.
14 | 
15 | We will go through each of these steps in order in the proceeding 3 sub-sections.
16 | 
17 | Step 1: Create a New Pipeline
18 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
19 | 
20 | Let us first create an empty pipeline. In other words, to begin any augmentation task, you must first initialise a :class:`~Augmentor.Pipeline.Pipeline` object, that points to a directory where your original image dataset is stored:
21 | 
22 | .. code-block:: python
23 | 
24 |     >>> import Augmentor
25 |     >>> p = Augmentor.Pipeline("/path/to/images")
26 |     Initialised with 100 images found in selected directory.
27 | 
28 | The variable ``p`` now contains a :class:`~Augmentor.Pipeline.Pipeline` object, and has been initialised with a list of images found in the source directory.
29 | 
30 | Step 2: Add Operations to the Pipeline
31 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
32 | 
33 | Once you have created a :class:`~Augmentor.Pipeline.Pipeline`, ``p``, we can begin by adding operations to ``p``. For example, we shall begin by adding a :func:`~Augmentor.Pipeline.Pipeline.rotate` operation:
34 | 
35 | .. code-block:: python
36 | 
37 |     >>> p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
38 | 
39 | In this case, we have added a :func:`~Augmentor.Pipeline.Pipeline.rotate` operation, that will execute with a probability of 70%, and have defined the maximum range by which an image will be rotated from between -10 and 10 degrees.
40 | 
41 | Next, we add a further operation, in this case a :func:`~Augmentor.Pipeline.Pipeline.zoom` operation:
42 | 
43 | .. code-block:: python
44 | 
45 |     >>> p.zoom(probability=0.3, min_factor=1.1, max_factor=1.6)
46 | 
47 | This time, we have specified that we wish the operation to be applied with a probability of 30%, while the scale should be randomly selected from between 1.1 and 1.6
48 | 
49 | Step 3: Execute and Sample From the Pipeline
50 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
51 | 
52 | Once you have added the operations that you require, you can generate new, augmented data by using the :func:`~Augmentor.Pipeline.Pipeline.sample` function and specify the number of images you require, in this case 10,000:
53 | 
54 | .. code-block:: python
55 | 
56 |     >>> p.sample(10000)
57 | 
58 | A progress bar will appear providing a number of metrics while your samples are generated. Newly generated, augmented images will by default be saved into an directory named **output**, relative to the directory which contains your initial image data set.
59 | 
60 | .. hint::
61 | 
62 |     A full list of operations can be found in the :mod:`~Augmentor.Operations` module documentation.
63 | 


--------------------------------------------------------------------------------
/notebooks/Augmentor_Keras.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a Neural Network using Augmentor and Keras\n",
  8 |     "\n",
  9 |     "In this notebook, we will train a simple convolutional neural network on the MNIST dataset using Augmentor to augment images on the fly using a generator.\n",
 10 |     "\n",
 11 |     "## Import Required Libraries\n",
 12 |     "\n",
 13 |     "We start by making a number of imports:"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stderr",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Using TensorFlow backend.\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "import Augmentor\n",
 31 |     "\n",
 32 |     "import keras\n",
 33 |     "from keras.models import Sequential\n",
 34 |     "from keras.layers import Dense, Dropout, Flatten\n",
 35 |     "from keras.layers import Conv2D, MaxPooling2D\n",
 36 |     "\n",
 37 |     "import numpy as np\n",
 38 |     "\n",
 39 |     "import matplotlib.pyplot as plt\n",
 40 |     "%matplotlib inline"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Define a Convolutional Neural Network\n",
 48 |     "\n",
 49 |     "Once the libraries have been imported, we define a small convolutional neural network. See the Keras documentation for details of this network: <https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py> \n",
 50 |     "\n",
 51 |     "It is a three layer deep neural network, consisting of 2 convolutional layers and a fully connected layer:"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 2,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "num_classes = 10\n",
 61 |     "input_shape = (28, 28, 1)\n",
 62 |     "\n",
 63 |     "model = Sequential()\n",
 64 |     "model.add(Conv2D(32, kernel_size=(3, 3),\n",
 65 |     "                 activation='relu',\n",
 66 |     "                 input_shape=input_shape))\n",
 67 |     "model.add(Conv2D(64, (3, 3), activation='relu'))\n",
 68 |     "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
 69 |     "model.add(Dropout(0.25))\n",
 70 |     "model.add(Flatten())\n",
 71 |     "model.add(Dense(128, activation='relu'))\n",
 72 |     "model.add(Dropout(0.5))\n",
 73 |     "model.add(Dense(num_classes, activation='softmax'))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "Once a network has been defined, you can compile it so that the model is ready to be trained with data:"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 3,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "model.compile(loss=keras.losses.categorical_crossentropy,\n",
 90 |     "              optimizer=keras.optimizers.Adadelta(),\n",
 91 |     "              metrics=['accuracy'])"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "You can view a summary of the network using the `summary()` function:"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 4,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "_________________________________________________________________\n",
111 |       "Layer (type)                 Output Shape              Param #   \n",
112 |       "=================================================================\n",
113 |       "conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       \n",
114 |       "_________________________________________________________________\n",
115 |       "conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     \n",
116 |       "_________________________________________________________________\n",
117 |       "max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         \n",
118 |       "_________________________________________________________________\n",
119 |       "dropout_1 (Dropout)          (None, 12, 12, 64)        0         \n",
120 |       "_________________________________________________________________\n",
121 |       "flatten_1 (Flatten)          (None, 9216)              0         \n",
122 |       "_________________________________________________________________\n",
123 |       "dense_1 (Dense)              (None, 128)               1179776   \n",
124 |       "_________________________________________________________________\n",
125 |       "dropout_2 (Dropout)          (None, 128)               0         \n",
126 |       "_________________________________________________________________\n",
127 |       "dense_2 (Dense)              (None, 10)                1290      \n",
128 |       "=================================================================\n",
129 |       "Total params: 1,199,882\n",
130 |       "Trainable params: 1,199,882\n",
131 |       "Non-trainable params: 0\n",
132 |       "_________________________________________________________________\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "model.summary()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Use Augmentor to Scan Directory for Data\n",
145 |     "\n",
146 |     "Now we will use Augmentor to scan a directory containing our data that we will eventually feed into the neural network in order to train it. \n",
147 |     "\n",
148 |     "When you point a pipeline to a directory, it will scan each subdirectory and treat each subdirectory as a class for your machine learning problem. \n",
149 |     "\n",
150 |     "For example, within the directory `mnist`, there are subdirectories for each digit:\n",
151 |     "\n",
152 |     "```\n",
153 |     "mnist/\n",
154 |     "├── 0/\n",
155 |     "│   ├── 0001.png\n",
156 |     "│   ├── 0002.png\n",
157 |     "│   ├── ...\n",
158 |     "│   └── 5985.png\n",
159 |     "├── 1/\n",
160 |     "│   ├── 0001.png\n",
161 |     "│   ├── 0002.png\n",
162 |     "│   ├── ...\n",
163 |     "│   └── 6101.png\n",
164 |     "├── 2/\n",
165 |     "│   ├── 0000.png\n",
166 |     "│   ├── 0001.png\n",
167 |     "│   ├── ...\n",
168 |     "│   └── 5801.png\n",
169 |     "│ ...\n",
170 |     "├── 9/\n",
171 |     "│   ├── 0001.png\n",
172 |     "│   ├── 0002.png\n",
173 |     "│   ├── ...\n",
174 |     "│   └── 6001.png\n",
175 |     "└\n",
176 |     "```\n",
177 |     "\n",
178 |     "The directory `0` contains all the images corresponding to the 0 class.\n",
179 |     "\n",
180 |     "To get the data, we can use `wget` (this may not work under Windows):"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 5,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "--2018-03-23 15:15:37--  https://rawgit.com/myleott/mnist_png/master/mnist_png.tar.gz\n",
193 |       "Resolving rawgit.com (rawgit.com)... 104.18.62.176, 104.18.63.176, 2400:cb00:2048:1::6812:3eb0, ...\n",
194 |       "Connecting to rawgit.com (rawgit.com)|104.18.62.176|:443... connected.\n",
195 |       "HTTP request sent, awaiting response... 301 Moved Permanently\n",
196 |       "Location: https://raw.githubusercontent.com/myleott/mnist_png/master/mnist_png.tar.gz [following]\n",
197 |       "--2018-03-23 15:15:37--  https://raw.githubusercontent.com/myleott/mnist_png/master/mnist_png.tar.gz\n",
198 |       "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.112.133\n",
199 |       "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.112.133|:443... connected.\n",
200 |       "HTTP request sent, awaiting response... 200 OK\n",
201 |       "Length: 15683414 (15M) [application/octet-stream]\n",
202 |       "Saving to: ‘mnist_png.tar.gz’\n",
203 |       "\n",
204 |       "100%[======================================>] 15,683,414  9.06MB/s   in 1.7s   \n",
205 |       "\n",
206 |       "2018-03-23 15:15:38 (9.06 MB/s) - ‘mnist_png.tar.gz’ saved [15683414/15683414]\n",
207 |       "\n"
208 |      ]
209 |     }
210 |    ],
211 |    "source": [
212 |     "!wget https://rawgit.com/myleott/mnist_png/master/mnist_png.tar.gz\n",
213 |     "!tar -xf mnist_png.tar.gz"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "After the MNIST data has downloaded, we can instantiate a `Pipeline` object in the `training` directory to add the images to the current pipeline:"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 6,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "name": "stdout",
230 |      "output_type": "stream",
231 |      "text": [
232 |       "Initialised with 60000 image(s) found.\n",
233 |       "Output directory set to mnist_png/training/output."
234 |      ]
235 |     }
236 |    ],
237 |    "source": [
238 |     "p = Augmentor.Pipeline(\"mnist_png/training\")"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "## Add Operations to the Pipeline\n",
246 |     "\n",
247 |     "Now that a pipeline object `p` has been created, we can add operations to the pipeline. Below we add several simple  operations:"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 7,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "p.flip_top_bottom(probability=0.1)\n",
257 |     "p.rotate(probability=0.3, max_left_rotation=5, max_right_rotation=5)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "markdown",
262 |    "metadata": {},
263 |    "source": [
264 |     "You can view the status of pipeline using the `status()` function, which shows information regarding the number of classes in the pipeline, the number of images, and what operations have been added to the pipeline:"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 8,
270 |    "metadata": {},
271 |    "outputs": [
272 |     {
273 |      "name": "stdout",
274 |      "output_type": "stream",
275 |      "text": [
276 |       "Operations: 2\n",
277 |       "\t0: Flip (top_bottom_left_right=TOP_BOTTOM probability=0.1 )\n",
278 |       "\t1: RotateRange (max_right_rotation=5.0 max_left_rotation=-5.0 probability=0.3 )\n",
279 |       "Images: 60000\n",
280 |       "Classes: 10\n",
281 |       "\tClass index: 0 Class label: 0 \n",
282 |       "\tClass index: 1 Class label: 1 \n",
283 |       "\tClass index: 2 Class label: 2 \n",
284 |       "\tClass index: 3 Class label: 3 \n",
285 |       "\tClass index: 4 Class label: 4 \n",
286 |       "\tClass index: 5 Class label: 5 \n",
287 |       "\tClass index: 6 Class label: 6 \n",
288 |       "\tClass index: 7 Class label: 7 \n",
289 |       "\tClass index: 8 Class label: 8 \n",
290 |       "\tClass index: 9 Class label: 9 \n",
291 |       "Dimensions: 1\n",
292 |       "\tWidth: 28 Height: 28\n",
293 |       "Formats: 1\n",
294 |       "\t PNG\n",
295 |       "\n",
296 |       "You can remove operations using the appropriate index and the remove_operation(index) function.\n"
297 |      ]
298 |     }
299 |    ],
300 |    "source": [
301 |     "p.status()"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "## Creating a Generator\n",
309 |     "\n",
310 |     "A generator will create images indefinitely, and we can use this generator as input into the model created above. The generator is created with a user-defined batch size, which we define here in a variable named `batch_size`. This is used later to define number of steps per epoch, so it is best to keep it stored as a variable."
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 9,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": [
319 |     "batch_size = 128\n",
320 |     "g = p.keras_generator(batch_size=batch_size)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "The generator can now be used to created augmented data. In Python, generators are invoked using the `next()` function - the Augmentor generators will return images indefinitely, and so `next()` can be called as often as required. \n",
328 |     "\n",
329 |     "You can view the output of generator manually:"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": 10,
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": [
338 |     "images, labels = next(g)"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "markdown",
343 |    "metadata": {},
344 |    "source": [
345 |     "Images, and their labels, are returned in batches of the size defined above by `batch_size`. The `image_batch` variable is a tuple, containing the augmentented images and their corresponding labels.\n",
346 |     "\n",
347 |     "To see the label of the first image returned by the generator you can use the array's index:"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 11,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "[0 0 0 0 0 1 0 0 0 0]\n"
360 |      ]
361 |     }
362 |    ],
363 |    "source": [
364 |     "print(labels[0])"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "Or preview the images using Matplotlib (the image should be a 5, according to the label information above):"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 12,
377 |    "metadata": {},
378 |    "outputs": [
379 |     {
380 |      "data": {
381 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADfBJREFUeJzt3X+sVPWZx/HPAxRQgYhlFsHCXra5\nrhqSpZsJ2VjTtGgbq02wGkkxEhrNXqI12Sb9wx/7x6oxUZuWhkTFUCUFZW03aY0YTQtLNhIS09zB\nsF6surhwayFcuFeRAiHpKs/+cY/mVu58zzBzZs5cnvcrubkz5zlnzsPAh3PmnDPna+4uAPFMKrsB\nAOUg/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgprSyZXNmTPHe3p6OrlKIJTBwUGNjIxYI/O2\nFH4zu17SOkmTJT3j7o+l5u/p6VGtVmtllQASqtVqw/M2vdtvZpMlPSnp25KukrTSzK5q9vUAdFYr\nn/mXSnrP3fe7+18k/VLS8mLaAtBurYT/Mkl/GvP8YDbtr5hZn5nVzKw2PDzcwuoAFKntR/vdfYO7\nV929WqlU2r06AA1qJfyHJC0Y8/xL2TQAE0Ar4e+X1Gtmi8xsqqTvSdpaTFsA2q3pU33u/rGZ3SPp\ndxo91bfR3d8qrDMAbdXSeX53f1XSqwX1AqCDuLwXCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQ\nhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaA6OkQ3xvfaa68l\n68uWLUvWt2zZUre2cOHC5LJXX311so7zF1t+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiqpfP8ZjYo\n6YSkTyR97O7VIpo63+zbty9Zv/nmm5N1M0vWb7/99rq1Cy64ILnsunXrkvX3338/We/r60vW58+f\nn6yjPEVc5PMNdx8p4HUAdBC7/UBQrYbfJW0zs91mlt7/A9BVWt3tv8bdD5nZ30jabmbvuPvOsTNk\n/yn0SfnXmQPonJa2/O5+KPt9VNKLkpaOM88Gd6+6e7VSqbSyOgAFajr8ZnaRmc389LGkb0naW1Rj\nANqrld3+uZJezE5DTZH07+7+20K6AtB2TYff3fdL+ocCezlvnTp1Klk/fvx4S68/e/bsurWTJ08m\nl807T+/uyfozzzyTrG/fvr1u7corr0wui/biVB8QFOEHgiL8QFCEHwiK8ANBEX4gKG7d3QEvv/xy\nW1//4MGDdWubN29OLnvXXXe1tO6hoaFkfenSsy76/Mzu3buTy15++eVN9YTGsOUHgiL8QFCEHwiK\n8ANBEX4gKMIPBEX4gaA4z98Bzz33XEvLr127NlmfOnVq3dodd9yRXHb69OnJ+lNPPZWs9/f3J+un\nT5+uW8sbHvzAgQPJ+syZM5N1pLHlB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgOM9fgIGBgWR9ZKS1\nQYzPnDnT9LKTJ09O1letWpWsL1iwIFlfs2ZNsp66X8AHH3yQXDbvGgS0hi0/EBThB4Ii/EBQhB8I\nivADQRF+ICjCDwRleUMwm9lGSd+RdNTdF2fTLpH0K0k9kgYlrXD3Y3krq1arXqvVWmx54sm7//z+\n/ftbev3UEODTpk1r6bUxsVSrVdVqNWtk3ka2/L+QdP3npt0naYe790rakT0HMIHkht/dd0r68HOT\nl0valD3eJOmmgvsC0GbNfuaf6+6Hs8dDkuYW1A+ADmn5gJ+PHjSoe+DAzPrMrGZmteHh4VZXB6Ag\nzYb/iJnNk6Ts99F6M7r7Bnevunu1Uqk0uToARWs2/Fslrc4er5b0UjHtAOiU3PCb2QuSXpf092Z2\n0MzulPSYpG+a2T5J12XPAUwgud/nd/eVdUrXFtwLgA7iCj8gKMIPBEX4gaAIPxAU4QeCIvxAUNy6\nuwPWr1+frK9YsSJZP378eLJ+yy231K1NmdLaX3HeV74feeSRZL23t7dujVtzl4stPxAU4QeCIvxA\nUIQfCIrwA0ERfiAowg8ElXvr7iJFvXV3np07dybry5Yt61AnZ2vg1u7J+q233lq3lndL8/vvvz9Z\n5zqBsxV9624A5yHCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8/wTQH9/f7J+8cUX1609//zzLa374Ycf\nTtYnTWrf9uPYsfSo77NmzWrbuicqzvMDyEX4gaAIPxAU4QeCIvxAUIQfCIrwA0Hlnuc3s42SviPp\nqLsvzqY9KOmfJQ1nsz3g7q/mrYzz/BPP0NBQsv7OO+8k69ddd13T616yZEmy/vjjjyfr114bbxT5\nos/z/0LS9eNM/5m7L8l+coMPoLvkht/dd0r6sAO9AOigVj7z32Nmb5rZRjObXVhHADqi2fCvl/Rl\nSUskHZb003ozmlmfmdXMrDY8PFxvNgAd1lT43f2Iu3/i7mck/VzS0sS8G9y96u7VSqXSbJ8ACtZU\n+M1s3pin35W0t5h2AHRK7vjNZvaCpK9LmmNmByX9m6Svm9kSSS5pUNKaNvYIoA1yw+/uK8eZ/Gwb\nekEXuvTSS5P1vO/U33jjjXVrr7zySnLZPXv2JOt33313sv7uu+8m69FxhR8QFOEHgiL8QFCEHwiK\n8ANBEX4gqNxTfUDKhRdemKz39PS0bd0fffRRsn7gwIG6tUWLFhXdzoTDlh8IivADQRF+ICjCDwRF\n+IGgCD8QFOEHguI8P9rqoYceqlt78sknW3rtGTNmJOvz589v6fXPd2z5gaAIPxAU4QeCIvxAUIQf\nCIrwA0ERfiAozvOjraZPn962154yJf3Pd9q0aW1b9/mALT8QFOEHgiL8QFCEHwiK8ANBEX4gKMIP\nBJV7nt/MFkjaLGmuJJe0wd3Xmdklkn4lqUfSoKQV7n6sfa2iG+XdO//pp5/uUCc4V41s+T+W9CN3\nv0rSP0n6gZldJek+STvcvVfSjuw5gAkiN/zuftjd38gen5D0tqTLJC2XtCmbbZOkm9rVJIDindNn\nfjPrkfQVSb+XNNfdD2elIY1+LAAwQTQcfjObIenXkn7o7n8eW3N31+jxgPGW6zOzmpnVhoeHW2oW\nQHEaCr+ZfUGjwd/i7r/JJh8xs3lZfZ6ko+Mt6+4b3L3q7tVKpVJEzwAKkBt+MzNJz0p6293Xjilt\nlbQ6e7xa0kvFtwegXRr5Su9XJa2SNGBme7JpD0h6TNJ/mNmdkv4oaUV7Wpz4Tpw4kazfdtttyfrr\nr79eZDvn5MyZM8n66Ce++vL+7K0YGRlJ1vfu3Vu3tnjx4qLbmXByw+/uuyRZnfK1xbYDoFO4wg8I\nivADQRF+ICjCDwRF+IGgCD8QFLfu7oChoaFk/dix9Dehr7jiiqbXferUqWR9YGAgWc87jz96DVh7\nrFmzJlm/9957k/WFCxcW2c55hy0/EBThB4Ii/EBQhB8IivADQRF+ICjCDwTFef4O6O3tTdZ37drV\ntnWfPn06Wd+xY0eyvm3btmR90qT09uOJJ55I1lMeffTRZH3WrFlNvzbY8gNhEX4gKMIPBEX4gaAI\nPxAU4QeCIvxAUJb3fe0iVatVr9VqHVsfEE21WlWtVmvoJgts+YGgCD8QFOEHgiL8QFCEHwiK8ANB\nEX4gqNzwm9kCM/svM/uDmb1lZv+STX/QzA6Z2Z7s54b2twugKI3czONjST9y9zfMbKak3Wa2Pav9\nzN1/0r72ALRLbvjd/bCkw9njE2b2tqTL2t0YgPY6p8/8ZtYj6SuSfp9NusfM3jSzjWY2u84yfWZW\nM7Pa8PBwS80CKE7D4TezGZJ+LemH7v5nSeslfVnSEo3uGfx0vOXcfYO7V929WqlUCmgZQBEaCr+Z\nfUGjwd/i7r+RJHc/4u6fuPsZST+XtLR9bQIoWiNH+03Ss5Ledve1Y6bPGzPbdyXtLb49AO3SyNH+\nr0paJWnAzPZk0x6QtNLMlkhySYOS0uMpA+gqjRzt3yVpvO8Hv1p8OwA6hSv8gKAIPxAU4QeCIvxA\nUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQXV0iG4zG5b0xzGT5kga6VgD56Zb\ne+vWviR6a1aRvf2tuzd0v7yOhv+slZvV3L1aWgMJ3dpbt/Yl0VuzyuqN3X4gKMIPBFV2+DeUvP6U\nbu2tW/uS6K1ZpfRW6md+AOUpe8sPoCSlhN/Mrjezd83sPTO7r4we6jGzQTMbyEYerpXcy0YzO2pm\ne8dMu8TMtpvZvuz3uMOkldRbV4zcnBhZutT3rttGvO74br+ZTZb0P5K+KemgpH5JK939Dx1tpA4z\nG5RUdffSzwmb2dcknZS02d0XZ9N+LOlDd38s+49ztrvf2yW9PSjpZNkjN2cDyswbO7K0pJskfV8l\nvneJvlaohPetjC3/Uknvuft+d/+LpF9KWl5CH13P3XdK+vBzk5dL2pQ93qTRfzwdV6e3ruDuh939\njezxCUmfjixd6nuX6KsUZYT/Mkl/GvP8oLpryG+XtM3MdptZX9nNjGNuNmy6JA1JmltmM+PIHbm5\nkz43snTXvHfNjHhdNA74ne0ad/9HSd+W9INs97Yr+ehntm46XdPQyM2dMs7I0p8p871rdsTropUR\n/kOSFox5/qVsWldw90PZ76OSXlT3jT585NNBUrPfR0vu5zPdNHLzeCNLqwveu24a8bqM8PdL6jWz\nRWY2VdL3JG0toY+zmNlF2YEYmdlFkr6l7ht9eKuk1dnj1ZJeKrGXv9ItIzfXG1laJb93XTfitbt3\n/EfSDRo94v+/kv61jB7q9PV3kv47+3mr7N4kvaDR3cD/0+ixkTslfVHSDkn7JP2npEu6qLfnJA1I\nelOjQZtXUm/XaHSX/k1Je7KfG8p+7xJ9lfK+cYUfEBQH/ICgCD8QFOEHgiL8QFCEHwiK8ANBEX4g\nKMIPBPX/WkNh2s7INOEAAAAASUVORK5CYII=\n",
382 |       "text/plain": [
383 |        "<matplotlib.figure.Figure at 0x7ff6e6ffa550>"
384 |       ]
385 |      },
386 |      "metadata": {},
387 |      "output_type": "display_data"
388 |     }
389 |    ],
390 |    "source": [
391 |     "plt.imshow(images[0].reshape(28, 28), cmap=\"Greys\");"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "## Train the Network\n",
399 |     "\n",
400 |     "We train the network by passing the generator, `g`, to the model's fit function. In Keras, if a generator is used we used the `fit_generator()` function as opposed to the standard `fit()` function. Also, the steps per epoch should roughly equal the total number of images in your dataset divided by the `batch_size`.\n",
401 |     "\n",
402 |     "Training the network over 5 epochs, we get the following output:"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "code",
407 |    "execution_count": 13,
408 |    "metadata": {},
409 |    "outputs": [
410 |     {
411 |      "name": "stdout",
412 |      "output_type": "stream",
413 |      "text": [
414 |       "Epoch 1/5\n",
415 |       "468/468 [==============================] - 30s 65ms/step - loss: 0.4860 - acc: 0.8478\n",
416 |       "Epoch 2/5\n",
417 |       "468/468 [==============================] - 29s 63ms/step - loss: 0.2026 - acc: 0.9392\n",
418 |       "Epoch 3/5\n",
419 |       "468/468 [==============================] - 29s 61ms/step - loss: 0.1611 - acc: 0.9523\n",
420 |       "Epoch 4/5\n",
421 |       "468/468 [==============================] - 28s 60ms/step - loss: 0.1405 - acc: 0.9582\n",
422 |       "Epoch 5/5\n",
423 |       "468/468 [==============================] - 28s 59ms/step - loss: 0.1203 - acc: 0.9645\n"
424 |      ]
425 |     }
426 |    ],
427 |    "source": [
428 |     "h = model.fit_generator(g, steps_per_epoch=len(p.augmentor_images)/batch_size, epochs=5, verbose=1)"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {},
434 |    "source": [
435 |     "## Summary\n",
436 |     "\n",
437 |     "Using Augmentor with Keras means only that you need to create a generator when you are finished creating your pipeline. This has the advantage that no images need to be saved to disk and are augmented on the fly."
438 |    ]
439 |   }
440 |  ],
441 |  "metadata": {
442 |   "kernelspec": {
443 |    "display_name": "Python 2",
444 |    "language": "python",
445 |    "name": "python2"
446 |   },
447 |   "language_info": {
448 |    "codemirror_mode": {
449 |     "name": "ipython",
450 |     "version": 2
451 |    },
452 |    "file_extension": ".py",
453 |    "mimetype": "text/x-python",
454 |    "name": "python",
455 |    "nbconvert_exporter": "python",
456 |    "pygments_lexer": "ipython2",
457 |    "version": "2.7.6"
458 |   }
459 |  },
460 |  "nbformat": 4,
461 |  "nbformat_minor": 2
462 | }
463 | 


--------------------------------------------------------------------------------
/notebooks/Augmentor_Keras_DataFrame.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training a Neural Network using Augmentor and Keras\n",
  8 |     "\n",
  9 |     "In this notebook, we will train a simple convolutional neural network on the MNIST dataset using Augmentor to augment images on the fly using a generator.\n",
 10 |     "\n",
 11 |     "## Import Required Libraries\n",
 12 |     "\n",
 13 |     "We start by making a number of imports:"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import os, sys\n",
 23 |     "sys.path.append('..')"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stderr",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "/anaconda3/envs/pyqae_base/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 36 |       "  from ._conv import register_converters as _register_converters\n",
 37 |       "Using TensorFlow backend.\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "import Augmentor\n",
 43 |     "\n",
 44 |     "import keras\n",
 45 |     "from keras.models import Sequential\n",
 46 |     "from keras.layers import Dense, Dropout, Flatten\n",
 47 |     "from keras.layers import Conv2D, MaxPooling2D\n",
 48 |     "\n",
 49 |     "import numpy as np\n",
 50 |     "\n",
 51 |     "import matplotlib.pyplot as plt\n",
 52 |     "%matplotlib inline"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## Define a Convolutional Neural Network\n",
 60 |     "\n",
 61 |     "Once the libraries have been imported, we define a small convolutional neural network. See the Keras documentation for details of this network: <https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py> \n",
 62 |     "\n",
 63 |     "It is a three layer deep neural network, consisting of 2 convolutional layers and a fully connected layer:"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "num_classes = 10\n",
 73 |     "input_shape = (28, 28, 1)\n",
 74 |     "\n",
 75 |     "model = Sequential()\n",
 76 |     "model.add(Conv2D(32, kernel_size=(3, 3),\n",
 77 |     "                 activation='relu',\n",
 78 |     "                 input_shape=input_shape))\n",
 79 |     "model.add(Conv2D(64, (3, 3), activation='relu'))\n",
 80 |     "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
 81 |     "model.add(Dropout(0.25))\n",
 82 |     "model.add(Flatten())\n",
 83 |     "model.add(Dense(128, activation='relu'))\n",
 84 |     "model.add(Dropout(0.5))\n",
 85 |     "model.add(Dense(num_classes, activation='softmax'))"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "Once a network has been defined, you can compile it so that the model is ready to be trained with data:"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "model.compile(loss=keras.losses.categorical_crossentropy,\n",
102 |     "              optimizer=keras.optimizers.Adadelta(),\n",
103 |     "              metrics=['accuracy'])"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "You can view a summary of the network using the `summary()` function:"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 5,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "_________________________________________________________________\n",
123 |       "Layer (type)                 Output Shape              Param #   \n",
124 |       "=================================================================\n",
125 |       "conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       \n",
126 |       "_________________________________________________________________\n",
127 |       "conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     \n",
128 |       "_________________________________________________________________\n",
129 |       "max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         \n",
130 |       "_________________________________________________________________\n",
131 |       "dropout_1 (Dropout)          (None, 12, 12, 64)        0         \n",
132 |       "_________________________________________________________________\n",
133 |       "flatten_1 (Flatten)          (None, 9216)              0         \n",
134 |       "_________________________________________________________________\n",
135 |       "dense_1 (Dense)              (None, 128)               1179776   \n",
136 |       "_________________________________________________________________\n",
137 |       "dropout_2 (Dropout)          (None, 128)               0         \n",
138 |       "_________________________________________________________________\n",
139 |       "dense_2 (Dense)              (None, 10)                1290      \n",
140 |       "=================================================================\n",
141 |       "Total params: 1,199,882\n",
142 |       "Trainable params: 1,199,882\n",
143 |       "Non-trainable params: 0\n",
144 |       "_________________________________________________________________\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "model.summary()"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "## Use Augmentor with a DataFrame\n",
157 |     "\n",
158 |     "Now we will use Augmentor from a DataFrame to train the model. We can create a DataFrame from the download data using the glob command and then parsing pieces of the path\n",
159 |     "\n",
160 |     "\n",
161 |     "To get the data, we can use `wget` (this may not work under Windows):"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 6,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "if not os.path.exists('mnist_png'):\n",
171 |     "    !wget https://rawgit.com/myleott/mnist_png/master/mnist_png.tar.gz\n",
172 |     "    !tar -xf mnist_png.tar.gz"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "After the MNIST data has downloaded, we can instantiate a `Pipeline` object in the `training` directory to add the images to the current pipeline:"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 7,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/html": [
190 |        "<div>\n",
191 |        "<style scoped>\n",
192 |        "    .dataframe tbody tr th:only-of-type {\n",
193 |        "        vertical-align: middle;\n",
194 |        "    }\n",
195 |        "\n",
196 |        "    .dataframe tbody tr th {\n",
197 |        "        vertical-align: top;\n",
198 |        "    }\n",
199 |        "\n",
200 |        "    .dataframe thead th {\n",
201 |        "        text-align: right;\n",
202 |        "    }\n",
203 |        "</style>\n",
204 |        "<table border=\"1\" class=\"dataframe\">\n",
205 |        "  <thead>\n",
206 |        "    <tr style=\"text-align: right;\">\n",
207 |        "      <th></th>\n",
208 |        "      <th>path</th>\n",
209 |        "      <th>data_split</th>\n",
210 |        "      <th>mnist_cat</th>\n",
211 |        "    </tr>\n",
212 |        "  </thead>\n",
213 |        "  <tbody>\n",
214 |        "    <tr>\n",
215 |        "      <th>0</th>\n",
216 |        "      <td>mnist_png/training/9/36655.png</td>\n",
217 |        "      <td>training</td>\n",
218 |        "      <td>9</td>\n",
219 |        "    </tr>\n",
220 |        "    <tr>\n",
221 |        "      <th>1</th>\n",
222 |        "      <td>mnist_png/training/9/32433.png</td>\n",
223 |        "      <td>training</td>\n",
224 |        "      <td>9</td>\n",
225 |        "    </tr>\n",
226 |        "    <tr>\n",
227 |        "      <th>2</th>\n",
228 |        "      <td>mnist_png/training/9/28319.png</td>\n",
229 |        "      <td>training</td>\n",
230 |        "      <td>9</td>\n",
231 |        "    </tr>\n",
232 |        "    <tr>\n",
233 |        "      <th>3</th>\n",
234 |        "      <td>mnist_png/training/9/4968.png</td>\n",
235 |        "      <td>training</td>\n",
236 |        "      <td>9</td>\n",
237 |        "    </tr>\n",
238 |        "    <tr>\n",
239 |        "      <th>4</th>\n",
240 |        "      <td>mnist_png/training/9/23502.png</td>\n",
241 |        "      <td>training</td>\n",
242 |        "      <td>9</td>\n",
243 |        "    </tr>\n",
244 |        "  </tbody>\n",
245 |        "</table>\n",
246 |        "</div>"
247 |       ],
248 |       "text/plain": [
249 |        "                             path data_split mnist_cat\n",
250 |        "0  mnist_png/training/9/36655.png   training         9\n",
251 |        "1  mnist_png/training/9/32433.png   training         9\n",
252 |        "2  mnist_png/training/9/28319.png   training         9\n",
253 |        "3   mnist_png/training/9/4968.png   training         9\n",
254 |        "4  mnist_png/training/9/23502.png   training         9"
255 |       ]
256 |      },
257 |      "execution_count": 7,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "from glob import glob\n",
264 |     "import pandas as pd\n",
265 |     "import os\n",
266 |     "image_df = pd.DataFrame(dict(path = glob('mnist_png/*/*/*.png')))\n",
267 |     "image_df['data_split'] = image_df['path'].map(lambda x: x.split('/')[-3])\n",
268 |     "image_df['mnist_cat'] = image_df['path'].map(lambda x: x.split('/')[-2])\n",
269 |     "image_df.head(5)"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 8,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "name": "stdout",
279 |      "output_type": "stream",
280 |      "text": [
281 |       "Initialised with 60000 image(s) found.\n",
282 |       "Output directory set to output."
283 |      ]
284 |     }
285 |    ],
286 |    "source": [
287 |     "p = Augmentor.DataFramePipeline(image_df.query('data_split==\"training\"'), \n",
288 |     "                                image_col = 'path', \n",
289 |     "                                category_col = 'mnist_cat')"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "## Add Operations to the Pipeline\n",
297 |     "\n",
298 |     "Now that a pipeline object `p` has been created, we can add operations to the pipeline. Below we add several simple  operations:"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 9,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "p.flip_top_bottom(probability=0.1)\n",
308 |     "p.rotate(probability=0.3, max_left_rotation=5, max_right_rotation=5)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "You can view the status of pipeline using the `status()` function, which shows information regarding the number of classes in the pipeline, the number of images, and what operations have been added to the pipeline:"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": 10,
321 |    "metadata": {},
322 |    "outputs": [
323 |     {
324 |      "name": "stdout",
325 |      "output_type": "stream",
326 |      "text": [
327 |       "Operations: 2\n",
328 |       "\t0: Flip (probability=0.1 top_bottom_left_right=TOP_BOTTOM )\n",
329 |       "\t1: RotateRange (probability=0.3 max_left_rotation=-5 max_right_rotation=5 )\n",
330 |       "Images: 60000\n",
331 |       "Classes: 10\n",
332 |       "\tClass index: 0 Class label: 0 \n",
333 |       "\tClass index: 1 Class label: 1 \n",
334 |       "\tClass index: 2 Class label: 2 \n",
335 |       "\tClass index: 3 Class label: 3 \n",
336 |       "\tClass index: 4 Class label: 4 \n",
337 |       "\tClass index: 5 Class label: 5 \n",
338 |       "\tClass index: 6 Class label: 6 \n",
339 |       "\tClass index: 7 Class label: 7 \n",
340 |       "\tClass index: 8 Class label: 8 \n",
341 |       "\tClass index: 9 Class label: 9 \n",
342 |       "Dimensions: 1\n",
343 |       "\tWidth: 28 Height: 28\n",
344 |       "Formats: 1\n",
345 |       "\t PNG\n",
346 |       "\n",
347 |       "You can remove operations using the appropriate index and the remove_operation(index) function.\n"
348 |      ]
349 |     }
350 |    ],
351 |    "source": [
352 |     "p.status()"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "metadata": {},
358 |    "source": [
359 |     "## Creating a Generator\n",
360 |     "\n",
361 |     "A generator will create images indefinitely, and we can use this generator as input into the model created above. The generator is created with a user-defined batch size, which we define here in a variable named `batch_size`. This is used later to define number of steps per epoch, so it is best to keep it stored as a variable."
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 11,
367 |    "metadata": {},
368 |    "outputs": [],
369 |    "source": [
370 |     "batch_size = 128\n",
371 |     "g = p.keras_generator(batch_size=batch_size)"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "The generator can now be used to created augmented data. In Python, generators are invoked using the `next()` function - the Augmentor generators will return images indefinitely, and so `next()` can be called as often as required. \n",
379 |     "\n",
380 |     "You can view the output of generator manually:"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": 12,
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": [
389 |     "images, labels = next(g)"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "Images, and their labels, are returned in batches of the size defined above by `batch_size`. The `image_batch` variable is a tuple, containing the augmentented images and their corresponding labels.\n",
397 |     "\n",
398 |     "To see the label of the first image returned by the generator you can use the array's index:"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 13,
404 |    "metadata": {},
405 |    "outputs": [
406 |     {
407 |      "name": "stdout",
408 |      "output_type": "stream",
409 |      "text": [
410 |       "[0 0 0 0 0 0 0 1 0 0]\n"
411 |      ]
412 |     }
413 |    ],
414 |    "source": [
415 |     "print(labels[0])"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "markdown",
420 |    "metadata": {},
421 |    "source": [
422 |     "Or preview the images using Matplotlib (the image should be a 5, according to the label information above):"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "code",
427 |    "execution_count": 14,
428 |    "metadata": {},
429 |    "outputs": [
430 |     {
431 |      "data": {
432 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADm5JREFUeJzt3X+sVPWZx/HPA4IoENQwWCK4lxKz0RCXrhMwcbO6VBtqmiAxNcWkuWsIaKxmmzS6SEwgmkUltqx/rDWXhRRiy49IUTS4W39s1CabxsGYKsvuVuVui1wul2DsbVR+PvvHPZhbvPOdYebMnLk871di7sx5zvfOw8TPPTPzPWe+5u4CEM+YohsAUAzCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gqAva+WBTp071rq6udj4kEEpvb6+OHDli9ezbVPjNbKGkpySNlfSv7v54av+uri5VKpVmHhJAQrlcrnvfhl/2m9lYSf8i6duSrpG0xMyuafT3AWivZt7zz5P0gbt/5O7HJW2VtCiftgC0WjPhv0LSH4bdP5Bt+zNmttzMKmZWGRgYaOLhAOSpmfCP9KHCV64Pdvcedy+7e7lUKjXxcADy1Ez4D0iaOez+DEkHm2sHQLs0E/63JV1lZrPMbLyk70nalU9bAFqt4ak+dz9pZvdJ+ncNTfVtdPe9uXUGoKWamud3992SdufUC4A24vReICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgmpqlV4z65U0KOmUpJPuXs6jKQCt11T4M3/n7kdy+D0A2oiX/UBQzYbfJf3KzPaY2fI8GgLQHs2+7L/B3Q+a2TRJr5jZf7v7m8N3yP4oLJekK6+8ssmHA5CXpo787n4w+3lY0k5J80bYp8fdy+5eLpVKzTwcgBw1HH4zm2hmk8/clvQtSe/n1RiA1mrmZf/lknaa2Znf8wt3/7dcugLQcg2H390/kvRXOfYCoI2Y6gOCIvxAUIQfCIrwA0ERfiAowg8ElcdVfUBVH3/8cdXawMBAcuzcuXPzbudLa9euTda3bt2arL/++uvJ+iWXXHLOPbUbR34gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIp5fiQdO3YsWV+2bFmy/txzz1Wtff7558mxDz74YLK+aNGiZP2ZZ56pWtuxY0dy7IQJE5L1MWNG/3Fz9P8LADSE8ANBEX4gKMIPBEX4gaAIPxAU4QeCYp7/PODuVWvZugpVDQ4OJut33XVXsv78888n62vWrKlaO3ToUHLspk2bkvVa1+Q3Y/Lkycn6BReM/uhw5AeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoGpOVprZRknfkXTY3edk2y6TtE1Sl6ReSXe4+yeta/P8durUqWT96NGjDY8fO3ZscuyCBQuS9SlTpiTrH374YbI+Y8aMqrXU+QmS9Oijjybr+/fvT9a3bdtWtfbkk08mx65evTpZHz9+fLI+GtRz5P+ZpIVnbVsh6TV3v0rSa9l9AKNIzfC7+5uSzj70LJJ05vSrTZJuy7kvAC3W6Hv+y929T5Kyn9PyawlAO7T8Az8zW25mFTOr1FqbDUD7NBr+fjObLknZz8PVdnT3Hncvu3u5VCo1+HAA8tZo+HdJ6s5ud0t6IZ92ALRLzfCb2RZJ/ynpL83sgJktlfS4pFvM7HeSbsnuAxhFas7zu/uSKqVv5txLWM8++2yyvnTp0mQ9Nc9/0UUXJcc+8MADyfrDDz+crI8bNy5Zb0ata+a7urqS9c2bN1etzZkzJzm2u7s7Wed6fgCjFuEHgiL8QFCEHwiK8ANBEX4gqNE/XzEK1DqtecWK9EWRp0+fTtZTy0XPnz8/Ofb+++9P1ls5lVdLrX/3Qw89lKynLoXevn17cmytKdLzAUd+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiKef42ePrpp5P1/v7+pn7/rbfeWrW2fv365NipU6c29djNqPXV3Vu2bEnWN2zYkKw/8cQTVWvlcjk5NgKO/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFPP8Ofj000+T9XXr1jX1+2fNmpWsp5aivvjii5t67Fb65JP0qu7Lli1L1u+5556G67WWLo+AIz8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBFVznt/MNkr6jqTD7j4n27Za0jJJZ76QfqW7725Vk50g9R3ya9euTY6tdR7A1Vdfnazv2bMnWe/k75g/efJk1dqNN96YHDt+/Phk/ZFHHknWi1xzYDSo58j/M0kLR9i+zt3nZv+d18EHzkc1w+/ub0qqvvQJgFGpmff895nZb81so5ldmltHANqi0fD/VNJsSXMl9Un6cbUdzWy5mVXMrFJrzToA7dNQ+N29391PuftpSeslzUvs2+PuZXcvl0qlRvsEkLOGwm9m04fdXSzp/XzaAdAu9Uz1bZF0k6SpZnZA0ipJN5nZXEkuqVfS3S3sEUAL1Ay/uy8ZYXP6C9PPQydOnKhae/nll5Njr7/++mT91VdfTdY7eR7/+PHjyfrtt99etXbo0KHk2FrnN0yaNClZRxpn+AFBEX4gKMIPBEX4gaAIPxAU4QeC4qu765S6PLTWMtgTJ05sql6kWsto7927N1l/6aWXqtbWrFmTHDt79uxkHc3hyA8ERfiBoAg/EBThB4Ii/EBQhB8IivADQTHPX6cxY6r/nbzuuuva2Em+as3j79+/P1lfsGBBsj5z5syqtXvvvTc5Fq3FkR8IivADQRF+ICjCDwRF+IGgCD8QFOEHgmKe/zxXax6/r68vWZ8/f36yfuGFFybrb7zxRtXalClTkmPRWhz5gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiComvP8ZjZT0mZJX5N0WlKPuz9lZpdJ2iapS1KvpDvc/ZPWtYpGHD16NFm/9tprk/Vjx44l6zt37kzWZ82alayjOPUc+U9K+pG7Xy3pekk/MLNrJK2Q9Jq7XyXptew+gFGiZvjdvc/d38luD0raJ+kKSYskbcp22yTptlY1CSB/5/Se38y6JH1D0m8kXe7ufdLQHwhJ0/JuDkDr1B1+M5skaYekH7r7H89h3HIzq5hZZWBgoJEeAbRAXeE3s3EaCv7P3f2X2eZ+M5ue1adLOjzSWHfvcfeyu5dLpVIePQPIQc3wm5lJ2iBpn7v/ZFhpl6Tu7Ha3pBfybw9Aq9RzSe8Nkr4v6T0zezfbtlLS45K2m9lSSb+X9N3WtIhaBgcHq9YWL17c8FhJ6unpSdZvvvnmZB2dq2b43f3XkqxK+Zv5tgOgXTjDDwiK8ANBEX4gKMIPBEX4gaAIPxAUX909Cnz22WfJ+qpVq6rW3nrrreTYxx57LFnv7u5O1jF6ceQHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaCY5+8AJ06cSNZ3796drK9bt65qbeHChcmxK1bwpctRceQHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaCY528Dd0/W9+7dm6zffffdyfqdd95ZtbZhw4bkWMTFkR8IivADQRF+ICjCDwRF+IGgCD8QFOEHgqo5z29mMyVtlvQ1Sacl9bj7U2a2WtIySQPZrivdPX3heVD9/f3J+uLFi5P1adOmJes9PT1VaxMmTEiORVz1nORzUtKP3P0dM5ssaY+ZvZLV1rn7k61rD0Cr1Ay/u/dJ6stuD5rZPklXtLoxAK11Tu/5zaxL0jck/SbbdJ+Z/dbMNprZpVXGLDeziplVBgYGRtoFQAHqDr+ZTZK0Q9IP3f2Pkn4qabakuRp6ZfDjkca5e4+7l929XCqVcmgZQB7qCr+ZjdNQ8H/u7r+UJHfvd/dT7n5a0npJ81rXJoC81Qy/mZmkDZL2uftPhm2fPmy3xZLez789AK1Sz6f9N0j6vqT3zOzdbNtKSUvMbK4kl9QrKX3d6Xns+PHjyfrWrVuT9S+++CJZf/HFF5P1iRMnJuvASOr5tP/XkmyEEnP6wCjGGX5AUIQfCIrwA0ERfiAowg8ERfiBoKzW10rnqVwue6VSadvjAdGUy2VVKpWRpua/giM/EBThB4Ii/EBQhB8IivADQRF+ICjCDwTV1nl+MxuQ9H/DNk2VdKRtDZybTu2tU/uS6K1Refb2F+5e1/fltTX8X3lws4q7lwtrIKFTe+vUviR6a1RRvfGyHwiK8ANBFR3+6utMFa9Te+vUviR6a1QhvRX6nh9AcYo+8gMoSCHhN7OFZvY/ZvaBma0ooodqzKzXzN4zs3fNrNDrj7Nl0A6b2fvDtl1mZq+Y2e+ynyMuk1ZQb6vN7OPsuXvXzG4tqLeZZvYfZrbPzPaa2T9k2wt97hJ9FfK8tf1lv5mNlfS/km6RdEDS25KWuPt/tbWRKsysV1LZ3QufEzazv5X0J0mb3X1Otm2tpKPu/nj2h/NSd//HDulttaQ/Fb1yc7agzPThK0tLuk3S36vA5y7R1x0q4Hkr4sg/T9IH7v6Rux+XtFXSogL66Hju/qako2dtXiRpU3Z7k4b+52m7Kr11BHfvc/d3stuDks6sLF3oc5foqxBFhP8KSX8Ydv+AOmvJb5f0KzPbY2bLi25mBJdny6afWT59WsH9nK3mys3tdNbK0h3z3DWy4nXeigj/SF8x1ElTDje4+19L+rakH2Qvb1GfulZubpcRVpbuCI2ueJ23IsJ/QNLMYfdnSDpYQB8jcveD2c/Dknaq81Yf7j+zSGr283DB/Xypk1ZuHmllaXXAc9dJK14XEf63JV1lZrPMbLyk70naVUAfX2FmE7MPYmRmEyV9S523+vAuSd3Z7W5JLxTYy5/plJWbq60srYKfu05b8bqQk3yyqYx/ljRW0kZ3/6e2NzECM/u6ho720tAipr8osjcz2yLpJg1d9dUvaZWk5yVtl3SlpN9L+q67t/2Dtyq93aShl65frtx85j12m3v7G0lvSXpP0uls80oNvb8u7LlL9LVEBTxvnOEHBMUZfkBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgvp/c4EhwmoWEhkAAAAASUVORK5CYII=\n",
433 |       "text/plain": [
434 |        "<Figure size 432x288 with 1 Axes>"
435 |       ]
436 |      },
437 |      "metadata": {},
438 |      "output_type": "display_data"
439 |     }
440 |    ],
441 |    "source": [
442 |     "plt.imshow(images[0].reshape(28, 28), cmap=\"Greys\");"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "markdown",
447 |    "metadata": {},
448 |    "source": [
449 |     "## Train the Network\n",
450 |     "\n",
451 |     "We train the network by passing the generator, `g`, to the model's fit function. In Keras, if a generator is used we used the `fit_generator()` function as opposed to the standard `fit()` function. Also, the steps per epoch should roughly equal the total number of images in your dataset divided by the `batch_size`.\n",
452 |     "\n",
453 |     "Training the network over 5 epochs, we get the following output:"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {},
460 |    "outputs": [
461 |     {
462 |      "name": "stdout",
463 |      "output_type": "stream",
464 |      "text": [
465 |       "Epoch 1/5\n",
466 |       "156/468 [========>.....................] - ETA: 3:55 - loss: 0.7015 - acc: 0.7725"
467 |      ]
468 |     }
469 |    ],
470 |    "source": [
471 |     "h = model.fit_generator(g, steps_per_epoch=len(p.augmentor_images)/batch_size, epochs=5, verbose=1)"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "markdown",
476 |    "metadata": {},
477 |    "source": [
478 |     "## Summary\n",
479 |     "\n",
480 |     "Using Augmentor with Keras means only that you need to create a generator when you are finished creating your pipeline. This has the advantage that no images need to be saved to disk and are augmented on the fly."
481 |    ]
482 |   }
483 |  ],
484 |  "metadata": {
485 |   "kernelspec": {
486 |    "display_name": "Python [conda env:pyqae_base]",
487 |    "language": "python",
488 |    "name": "conda-env-pyqae_base-py"
489 |   },
490 |   "language_info": {
491 |    "codemirror_mode": {
492 |     "name": "ipython",
493 |     "version": 3
494 |    },
495 |    "file_extension": ".py",
496 |    "mimetype": "text/x-python",
497 |    "name": "python",
498 |    "nbconvert_exporter": "python",
499 |    "pygments_lexer": "ipython3",
500 |    "version": "3.6.4"
501 |   }
502 |  },
503 |  "nbformat": 4,
504 |  "nbformat_minor": 2
505 | }
506 | 


--------------------------------------------------------------------------------
/notebooks/Per_Class_Augmentation_Strategy.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Applying a Different Augmentation Strategy Per Class\n",
  8 |     "\n",
  9 |     "Let's say we wished to augment the MNIST dataset, but you wished to use a generator to supply a neural network with data. \n",
 10 |     "\n",
 11 |     "Ordinarily you could write a pipeline that would augment all the data, regardless of the class. However with MNIST you might want to have different pipelines for each of the 10 different classes. \n",
 12 |     "\n",
 13 |     "For example, it would make sense to flip images for the figure 8 both horizontally and vertically and still end up with feasible data. The figure 3 could be flipped vertically but not horizontally. Conversely, the figure 4 should not be flipped either horizontally or vertically. \n",
 14 |     "\n",
 15 |     "We can do this by creating 10 different pipelines, and adding or removing the appropriate operations from each pipeline as required.\n",
 16 |     "\n",
 17 |     "Augmentor does not support this natively, but it can be performed easily enough, and here we will learn how. \n",
 18 |     "\n",
 19 |     "First we import the required libraries:"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {
 26 |     "collapsed": false
 27 |    },
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stderr",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "Using Theano backend.\n",
 34 |       "Using gpu device 0: GeForce GTX TITAN X (CNMeM is enabled with initial size: 90.0% of memory, cuDNN not available)\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "import Augmentor\n",
 40 |     "import numpy as np\n",
 41 |     "import os\n",
 42 |     "import glob\n",
 43 |     "import random\n",
 44 |     "import collections\n",
 45 |     "\n",
 46 |     "from PIL import Image\n",
 47 |     "\n",
 48 |     "import keras\n",
 49 |     "from keras.models import Sequential\n",
 50 |     "from keras.layers import Dense, Dropout, Flatten\n",
 51 |     "from keras.layers import Conv2D, MaxPooling2D\n",
 52 |     "from keras.datasets import mnist\n",
 53 |     "\n",
 54 |     "random.seed(0)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## 1. Point to a Root Directory\n",
 62 |     "\n",
 63 |     "Your root directory must contain subdirectories, one for each class in your machine learning classification problem:"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 2,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "root_directory = \"/home/marcus/Documents/mnist/train/*\""
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## 2. Scan for folders in the root directory\n",
 82 |     "\n",
 83 |     "We use `glob.glob()` to scan for all files in the `root_directory` and only choose those that are directories. These will be out classes:"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 3,
 89 |    "metadata": {
 90 |     "collapsed": false
 91 |    },
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "Folders (classes) found: ['6', '0', '3', '4', '7', '5', '1', '8', '2', '9'] \n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "folders = []\n",
103 |     "for f in glob.glob(root_directory):\n",
104 |     "    if os.path.isdir(f):\n",
105 |     "        folders.append(os.path.abspath(f))\n",
106 |     "\n",
107 |     "print(\"Folders (classes) found: %s \" % [os.path.split(x)[1] for x in folders])"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "## 3. Create a pipeline for each class\n",
115 |     "\n",
116 |     "Now we create a pipeline object for each of the classes. MNIST consists of 10 digits, and each digit represents one class:"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 4,
122 |    "metadata": {
123 |     "collapsed": false
124 |    },
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "Folder /home/marcus/Documents/mnist/train/6:\n",
131 |       "Initialised with 5918 image(s) found.\n",
132 |       "Output directory set to /home/marcus/Documents/mnist/train/6/output.\n",
133 |       "----------------------------\n",
134 |       "\n",
135 |       "Folder /home/marcus/Documents/mnist/train/0:\n",
136 |       "Initialised with 5923 image(s) found.\n",
137 |       "Output directory set to /home/marcus/Documents/mnist/train/0/output.\n",
138 |       "----------------------------\n",
139 |       "\n",
140 |       "Folder /home/marcus/Documents/mnist/train/3:\n",
141 |       "Initialised with 6131 image(s) found.\n",
142 |       "Output directory set to /home/marcus/Documents/mnist/train/3/output.\n",
143 |       "----------------------------\n",
144 |       "\n",
145 |       "Folder /home/marcus/Documents/mnist/train/4:\n",
146 |       "Initialised with 5842 image(s) found.\n",
147 |       "Output directory set to /home/marcus/Documents/mnist/train/4/output.\n",
148 |       "----------------------------\n",
149 |       "\n",
150 |       "Folder /home/marcus/Documents/mnist/train/7:\n",
151 |       "Initialised with 6265 image(s) found.\n",
152 |       "Output directory set to /home/marcus/Documents/mnist/train/7/output.\n",
153 |       "----------------------------\n",
154 |       "\n",
155 |       "Folder /home/marcus/Documents/mnist/train/5:\n",
156 |       "Initialised with 5421 image(s) found.\n",
157 |       "Output directory set to /home/marcus/Documents/mnist/train/5/output.\n",
158 |       "----------------------------\n",
159 |       "\n",
160 |       "Folder /home/marcus/Documents/mnist/train/1:\n",
161 |       "Initialised with 6742 image(s) found.\n",
162 |       "Output directory set to /home/marcus/Documents/mnist/train/1/output.\n",
163 |       "----------------------------\n",
164 |       "\n",
165 |       "Folder /home/marcus/Documents/mnist/train/8:\n",
166 |       "Initialised with 5851 image(s) found.\n",
167 |       "Output directory set to /home/marcus/Documents/mnist/train/8/output.\n",
168 |       "----------------------------\n",
169 |       "\n",
170 |       "Folder /home/marcus/Documents/mnist/train/2:\n",
171 |       "Initialised with 5958 image(s) found.\n",
172 |       "Output directory set to /home/marcus/Documents/mnist/train/2/output.\n",
173 |       "----------------------------\n",
174 |       "\n",
175 |       "Folder /home/marcus/Documents/mnist/train/9:\n",
176 |       "Initialised with 5949 image(s) found.\n",
177 |       "Output directory set to /home/marcus/Documents/mnist/train/9/output.\n",
178 |       "----------------------------\n",
179 |       "\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "pipelines = {}\n",
185 |     "for folder in folders:\n",
186 |     "    print(\"Folder %s:\" % (folder))\n",
187 |     "    pipelines[os.path.split(folder)[1]] = (Augmentor.Pipeline(folder))\n",
188 |     "    print(\"\\n----------------------------\\n\")"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "We can summarise what was scanned:"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 5,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "Class 1 has 6742 samples.\n",
210 |       "Class 0 has 5923 samples.\n",
211 |       "Class 3 has 6131 samples.\n",
212 |       "Class 2 has 5958 samples.\n",
213 |       "Class 5 has 5421 samples.\n",
214 |       "Class 4 has 5842 samples.\n",
215 |       "Class 7 has 6265 samples.\n",
216 |       "Class 6 has 5918 samples.\n",
217 |       "Class 9 has 5949 samples.\n",
218 |       "Class 8 has 5851 samples.\n"
219 |      ]
220 |     }
221 |    ],
222 |    "source": [
223 |     "for p in pipelines.values():\n",
224 |     "    print(\"Class %s has %s samples.\" % (p.augmentor_images[0].class_label, len(p.augmentor_images)))"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "## 4. Add operations to the pipelines\n",
232 |     "\n",
233 |     "Here we will add operations to each of the pipelines. Some operations will be applied to all pipelines, others only to some pipelines.\n",
234 |     "\n",
235 |     "Here we add a rotate operation to all pipelines (and hence will be applied to all digits):"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 6,
241 |    "metadata": {
242 |     "collapsed": true
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "for pipeline in pipelines.values():\n",
247 |     "    pipeline.rotate(probability=0.5, max_left_rotation=5, max_right_rotation=5)"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "Here we add some operations that we only want to apply to certain classes. The figure 8 can be flipped horizontally and vertically:"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 7,
260 |    "metadata": {
261 |     "collapsed": true
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "pipelines[\"8\"].flip_top_bottom(probability=0.5)\n",
266 |     "pipelines[\"8\"].flip_left_right(probability=0.5)"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "While the figure 3 can only be flipped vertically:"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 8,
279 |    "metadata": {
280 |     "collapsed": true
281 |    },
282 |    "outputs": [],
283 |    "source": [
284 |     "pipelines[\"3\"].flip_top_bottom(probability=0.5)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "## 5. Define a class label / class integer map\n",
292 |     "\n",
293 |     "The classes will have string labels associated with them, depending on the name of each class's parent folder. Here you must map the names of each of your classes with the 0-based index (which must correspond to the test data of your dataset).\n",
294 |     "\n",
295 |     "In the case of MNIST this is quite easy, the samples for the digit 0 were stored in a folder 0 and have the text label 0, and so on:"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": 9,
301 |    "metadata": {
302 |     "collapsed": true
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "integer_labels = {'0': 0, \n",
307 |     "                  '1': 1, \n",
308 |     "                  '2': 2, \n",
309 |     "                  '3': 3, \n",
310 |     "                  '4': 4, \n",
311 |     "                  '5': 5, \n",
312 |     "                  '6': 6, \n",
313 |     "                  '7': 7, \n",
314 |     "                  '8': 8, \n",
315 |     "                  '9': 9}"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "markdown",
320 |    "metadata": {},
321 |    "source": [
322 |     "## 6. Define pipeline containers to store the pipelines and additional information\n",
323 |     "\n",
324 |     "Later we will need each pipeline's 0-based integer label as well as its categorical label (depending on the type of neural network you define):"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 10,
330 |    "metadata": {
331 |     "collapsed": true
332 |    },
333 |    "outputs": [],
334 |    "source": [
335 |     "PipelineContainer = collections.namedtuple('PipelineContainer', \n",
336 |     "                                           'label label_integer label_categorical pipeline generator')\n",
337 |     "\n",
338 |     "pipeline_containers = []\n",
339 |     "\n",
340 |     "for label, pipeline in pipelines.items():\n",
341 |     "    label_categorical = np.zeros(len(pipelines), dtype=int)\n",
342 |     "    label_categorical[integer_labels[label]] = 1\n",
343 |     "    pipeline_containers.append(PipelineContainer(label, \n",
344 |     "                                                 integer_labels[label], \n",
345 |     "                                                 label_categorical, \n",
346 |     "                                                 pipeline, \n",
347 |     "                                                 pipeline.keras_generator(batch_size=1)))"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "## 7. Define a generator function\n",
355 |     "\n",
356 |     "Neural networks in Keras can be supplied with a generator to supply training data. Because we have one generator for each pipeline, we need to create \"generator of generators\":"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 11,
362 |    "metadata": {
363 |     "collapsed": false
364 |    },
365 |    "outputs": [],
366 |    "source": [
367 |     "def multi_generator(pipeline_containers, batch_size):\n",
368 |     "    while True:\n",
369 |     "        X = []\n",
370 |     "        y = []\n",
371 |     "        for i in range(batch_size):\n",
372 |     "            pipeline_container = random.choice(pipeline_containers)\n",
373 |     "            image, _ = next(pipeline_container.generator)\n",
374 |     "            image = image.reshape((28,28,1)) # Or (1, 28, 28) for channels_first, see Keras' docs.\n",
375 |     "            X.append(image)\n",
376 |     "            y.append(pipeline_container.label_categorical)  # Or label_integer if required by network\n",
377 |     "        X = np.asarray(X)\n",
378 |     "        y = np.asarray(y)\n",
379 |     "        yield X, y"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "## 8. Create the generator object\n",
387 |     "\n",
388 |     "Create a generator, `g` to pass data randomly from each pipeline (and hence each class) to a neural network:"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": 12,
394 |    "metadata": {
395 |     "collapsed": true
396 |    },
397 |    "outputs": [],
398 |    "source": [
399 |     "batch_size = 128\n",
400 |     "\n",
401 |     "g = multi_generator(pipeline_containers=pipeline_containers, \n",
402 |     "                    batch_size=batch_size)  # Here the batch size can be set to any value"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "To generate a batch of 128 images and labels, at random from a random pipeline defined above, we can use the `next()` function:"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 13,
415 |    "metadata": {
416 |     "collapsed": false
417 |    },
418 |    "outputs": [],
419 |    "source": [
420 |     "X, y = next(g)"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "We can confirm that we are receiving images in batches of 128 and that the labels correspond to the images in each pipeline:"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": 14,
433 |    "metadata": {
434 |     "collapsed": false
435 |    },
436 |    "outputs": [
437 |     {
438 |      "name": "stdout",
439 |      "output_type": "stream",
440 |      "text": [
441 |       "128 images returned. 128 labels returned.\n"
442 |      ]
443 |     }
444 |    ],
445 |    "source": [
446 |     "print(\"%s images returned. %s labels returned.\") % (np.shape(X)[0], len(y))"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "markdown",
451 |    "metadata": {},
452 |    "source": [
453 |     "We can use PIL to view the augmented images and cofirm the labels match (note that PIL requires images to be specified differently to how Keras expects data, hence some preprocessing of the data must be performed):"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": 15,
459 |    "metadata": {
460 |     "collapsed": false
461 |    },
462 |    "outputs": [
463 |     {
464 |      "data": {
465 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAABr0lEQVR4nJWSzUtUYRTGf+d675hf\nNM4wQmBhaiChMDIRLXLVInJRLdq56a8o+gdKiLCdgotKdKXUykUtE42ghIqiGqSoRck0XzaTc31n\n7mlxr3fuWC06i5f34cd7zuF5H1H+XVarVG2ef0ARcus5Qf8GgaWrcyWR/UYHaoqB+7v+1YrMUgCj\n7Z+f5qMzFalt5QHlw7aHMRGoSPXR3Y8CkC+YroGe6EupP7m1agAh5tEdb29CFZ7PvEmfAKBSxLOt\nEKrwbXZt8NIRADyIHe0MoWAerjE8igJeuYF6gQk2Kmze+9I2kgGovJv/QeknEkDQ5SynJkE8c+fm\nniW/dgKzbITyqx3Ongf3wY1Sd7LoUg4hVBrwesV99jZbyNyeX60lktpsm0yJbmQ15zY6picWDImu\nuhPCQxdeblWrwLXLGTtuE0/ZYVuVi7r4ye47PXbuGMQsBo9LCEUTV9Jfd1Mnkw4Uv++19ccjC6E9\n4+PGAVRco6p1/I0sQFBwQBFqhYbVGfju/0qQCgEq701/ujfiULQOD5Unzzj76kCCzOPrL1Q9X0hr\nqFX8YPhK/iPxrfUbQ4DLuKOlNzwAAAAASUVORK5CYII=\n",
466 |       "text/plain": [
467 |        "<PIL.Image.Image image mode=L size=28x28 at 0x7F061D62D850>"
468 |       ]
469 |      },
470 |      "execution_count": 15,
471 |      "metadata": {},
472 |      "output_type": "execute_result"
473 |     }
474 |    ],
475 |    "source": [
476 |     "image_index = 3  # Take image index 3 from the batch\n",
477 |     "\n",
478 |     "x_array = X[image_index]\n",
479 |     "x_array = x_array.reshape((28,28))\n",
480 |     "x_array = x_array * 255\n",
481 |     "x_array = x_array.astype(np.uint8)\n",
482 |     "Image.fromarray(x_array)"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "markdown",
487 |    "metadata": {},
488 |    "source": [
489 |     "The label below should correspond to the image output above:"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": 16,
495 |    "metadata": {
496 |     "collapsed": false
497 |    },
498 |    "outputs": [
499 |     {
500 |      "name": "stdout",
501 |      "output_type": "stream",
502 |      "text": [
503 |       "Image label: 4\n"
504 |      ]
505 |     }
506 |    ],
507 |    "source": [
508 |     "print(\"Image label: %s\" % (np.nonzero(y[image_index])[0][0]))"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "## 9. Train a neural network with the generator\n",
516 |     "\n",
517 |     "Last, we train a neural network with the differing pipelines for each class.\n",
518 |     "\n",
519 |     "First we define a neural network:"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "code",
524 |    "execution_count": 17,
525 |    "metadata": {
526 |     "collapsed": false
527 |    },
528 |    "outputs": [],
529 |    "source": [
530 |     "num_classes = len(pipelines)\n",
531 |     "input_shape = (28, 28, 1)\n",
532 |     "\n",
533 |     "model = Sequential()\n",
534 |     "model.add(Conv2D(32, kernel_size=(3, 3),\n",
535 |     "                 activation='relu',\n",
536 |     "                 input_shape=input_shape))\n",
537 |     "model.add(Conv2D(64, (3, 3), activation='relu'))\n",
538 |     "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
539 |     "model.add(Dropout(0.25))\n",
540 |     "model.add(Flatten())\n",
541 |     "model.add(Dense(128, activation='relu'))\n",
542 |     "model.add(Dropout(0.5))\n",
543 |     "model.add(Dense(num_classes, activation='softmax'))"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "markdown",
548 |    "metadata": {},
549 |    "source": [
550 |     "Once a network has been defined, you can compile it so that the model is ready to be trained with data:"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "code",
555 |    "execution_count": 18,
556 |    "metadata": {
557 |     "collapsed": true
558 |    },
559 |    "outputs": [],
560 |    "source": [
561 |     "model.compile(loss=keras.losses.categorical_crossentropy,\n",
562 |     "              optimizer=keras.optimizers.Adadelta(),\n",
563 |     "              metrics=['accuracy'])"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "Using the same batch size as the generator above, we can begin to train the neural network:"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": 19,
576 |    "metadata": {
577 |     "collapsed": false
578 |    },
579 |    "outputs": [
580 |     {
581 |      "name": "stdout",
582 |      "output_type": "stream",
583 |      "text": [
584 |       "Epoch 1/10\n",
585 |       "390/390 [==============================] - 31s - loss: 0.4767 - acc: 0.8502    \n",
586 |       "Epoch 2/10\n",
587 |       "390/390 [==============================] - 31s - loss: 0.1364 - acc: 0.9595    \n",
588 |       "Epoch 3/10\n",
589 |       "390/390 [==============================] - 30s - loss: 0.1029 - acc: 0.9691    \n",
590 |       "Epoch 4/10\n",
591 |       "390/390 [==============================] - 30s - loss: 0.0879 - acc: 0.9740    \n",
592 |       "Epoch 5/10\n",
593 |       "390/390 [==============================] - 30s - loss: 0.0778 - acc: 0.9771    \n",
594 |       "Epoch 6/10\n",
595 |       "390/390 [==============================] - 30s - loss: 0.0689 - acc: 0.9784    \n",
596 |       "Epoch 7/10\n",
597 |       "390/390 [==============================] - 30s - loss: 0.0650 - acc: 0.9804    \n",
598 |       "Epoch 8/10\n",
599 |       "390/390 [==============================] - 31s - loss: 0.0632 - acc: 0.9808    \n",
600 |       "Epoch 9/10\n",
601 |       "390/390 [==============================] - 30s - loss: 0.0594 - acc: 0.9828    \n",
602 |       "Epoch 10/10\n",
603 |       "390/390 [==============================] - 30s - loss: 0.0531 - acc: 0.9845    \n"
604 |      ]
605 |     }
606 |    ],
607 |    "source": [
608 |     "h = model.fit_generator(g, steps_per_epoch=50000/batch_size, epochs=10, verbose=1)"
609 |    ]
610 |   },
611 |   {
612 |    "cell_type": "markdown",
613 |    "metadata": {},
614 |    "source": [
615 |     "## Conclusion\n",
616 |     "\n",
617 |     "Certain tasks may require different augmentation strategies on a class-by-class basis. The procedure above allows you to do this using Augmentor and Keras."
618 |    ]
619 |   }
620 |  ],
621 |  "metadata": {
622 |   "kernelspec": {
623 |    "display_name": "empirical",
624 |    "language": "python",
625 |    "name": "empirical"
626 |   },
627 |   "language_info": {
628 |    "codemirror_mode": {
629 |     "name": "ipython",
630 |     "version": 2
631 |    },
632 |    "file_extension": ".py",
633 |    "mimetype": "text/x-python",
634 |    "name": "python",
635 |    "nbconvert_exporter": "python",
636 |    "pygments_lexer": "ipython2",
637 |    "version": "2.7.6"
638 |   }
639 |  },
640 |  "nbformat": 4,
641 |  "nbformat_minor": 1
642 | }
643 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | Pillow
2 | tqdm
3 | numpy
4 | pytest
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Pillow
2 | tqdm
3 | numpy


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [metadata]
5 | license_file = LICENSE.md
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='Augmentor',
 5 |     packages=['Augmentor'],
 6 |     version='0.2.12',
 7 |     description='Image augmentation library for Machine Learning',
 8 |     long_description='Image augmentation library for Machine Learning',
 9 |     license='MIT',
10 |     author='Marcus D. Bloice',
11 |     author_email='marcus.bloice@medunigraz.at',
12 |     url='https://github.com/mdbloice/Augmentor',                            # URL to GitHub repo
13 |     # download_url='https://github.com/mdbloice/Augmentor/tarball/0.1.1',   # Get this using git tag
14 |     keywords=['image', 'augmentation', 'artificial', 'generation', 'machine', 'learning'],
15 |     include_package_data=True,  # This will include all files in MANIFEST.in in the package when installing.
16 |     classifiers=[
17 |         'Development Status :: 5 - Production/Stable',
18 |         'Intended Audience :: Developers',
19 |         'Natural Language :: English',
20 |         'License :: OSI Approved :: MIT License',
21 |         'Operating System :: OS Independent',
22 |         'Programming Language :: Python',
23 |         'Programming Language :: Python :: 2',
24 |         'Programming Language :: Python :: 2.7',
25 |         'Programming Language :: Python :: 3',
26 |         'Programming Language :: Python :: 3.5',
27 |         'Programming Language :: Python :: 3.6',
28 |         'Programming Language :: Python :: 3.7',
29 |         'Programming Language :: Python :: 3.8',
30 |         'Programming Language :: Python :: 3.9',
31 |         'Topic :: Software Development :: Libraries :: Python Modules',
32 |     ],
33 |     install_requires=[
34 |         'Pillow>=5.2.0',
35 |         'tqdm>=4.9.0',
36 |         #'future>=0.16.0',
37 |         'numpy>=1.11.0',
38 |         'futures>=3.2.0; python_version == "2.7"'
39 |     ]
40 |     # zip_safe=False # Check this later.
41 | )
42 | 


--------------------------------------------------------------------------------
/tests/test_array_fuctions.py:
--------------------------------------------------------------------------------
 1 | # Context
 2 | import os
 3 | import sys
 4 | sys.path.insert(0, os.path.abspath('.'))
 5 | 
 6 | # Imports
 7 | import Augmentor
 8 | import tempfile
 9 | import io
10 | import shutil
11 | from PIL import Image
12 | from Augmentor import Operations
13 | 
14 | # NO TESTS YET


--------------------------------------------------------------------------------
/tests/test_custom_operations.py:
--------------------------------------------------------------------------------
 1 | # Context
 2 | import os
 3 | import sys
 4 | sys.path.insert(0, os.path.abspath('.'))
 5 | 
 6 | # Imports
 7 | import Augmentor
 8 | from Augmentor.Operations import Operation
 9 | import tempfile
10 | from PIL import Image
11 | import io
12 | 
13 | 
14 | # Custom class for testing
15 | class DoubleImageSize(Operation):
16 |     def __init__(self, probability, custom_parameter_integer):
17 |         Operation.__init__(self, probability)
18 |         self.custom_parameter_integer = custom_parameter_integer
19 | 
20 |     def do(self):
21 |         for image in self.images:
22 |             pass
23 | 
24 |     def perform_operation(self, images):
25 |         for i in range(self.custom_parameter_integer):
26 |             pass
27 | 
28 |         for image in images:
29 |             image = image.resize((image.size[0]*2, image.size[1]*2))
30 | 
31 |         return images
32 | 
33 | 
34 | def test_adding_custom_function():
35 |     width = 80
36 |     height = 80
37 | 
38 |     tmpdir = tempfile.mkdtemp()
39 |     tmps = []
40 | 
41 |     for i in range(10):
42 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
43 | 
44 |         bytestream = io.BytesIO()
45 | 
46 |         im = Image.new('RGB', (width, height))
47 |         im.save(bytestream, 'JPEG')
48 | 
49 |         tmps[i].file.write(bytestream.getvalue())
50 |         tmps[i].flush()
51 | 
52 |     p = Augmentor.Pipeline(tmpdir)
53 | 
54 |     assert len(p.augmentor_images) == len(tmps)
55 | 
56 |     # Use the DoubleImageSize custom operation above
57 |     #
58 |     # First instantiate a new object of the custom operation
59 |     double_image_size_operation = DoubleImageSize(probability=1, custom_parameter_integer=5)
60 | 
61 |     # Add to pipeline
62 |     p.add_operation(double_image_size_operation)
63 | 
64 |     # Executed the pipeline as normal, and your custom operation will be executed
65 |     p.sample(10)
66 | 
67 | 
68 | def test_execute_custom_function():
69 |     pass
70 | 


--------------------------------------------------------------------------------
/tests/test_datapipeline.py:
--------------------------------------------------------------------------------
  1 | from __future__ import (absolute_import, division,
  2 |                         print_function, unicode_literals)
  3 | from builtins import *
  4 | 
  5 | import os
  6 | import sys
  7 | sys.path.insert(0, os.path.abspath('.'))
  8 | 
  9 | import Augmentor
 10 | from PIL import Image
 11 | import tempfile
 12 | import io
 13 | import shutil
 14 | import glob
 15 | import random
 16 | import numpy as np
 17 | 
 18 | import pytest
 19 | 
 20 | 
 21 | @pytest.mark.skip(reason="DataPipeline has not been written to handle this circumstance yet.")
 22 | def test_sample_with_no_masks():
 23 |     # NOTE:
 24 |     # ---
 25 |     # Temporarily disable this test as it will fail currently.
 26 |     # The DataPipeline class currently does not handle images
 27 |     # that do not have associated masks. When this functionality
 28 |     # has been added, this test will be reinstated.
 29 |     # ---
 30 | 
 31 |     # This is to test if the user passes data that does not contain
 32 |     # any masks, in other words a list of images rather than the
 33 |     # data structure you have in other examples in this file.
 34 |     width = 80
 35 |     height = 80
 36 | 
 37 |     tmpdir = tempfile.mkdtemp()
 38 |     tmps = []
 39 | 
 40 |     num_of_images = 10
 41 | 
 42 |     for i in range(num_of_images):
 43 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
 44 | 
 45 |         bytestream = io.BytesIO()
 46 | 
 47 |         im = Image.new('RGB', (width, height))
 48 |         im.save(bytestream, 'JPEG')
 49 | 
 50 |         tmps[i].file.write(bytestream.getvalue())
 51 |         tmps[i].flush()
 52 | 
 53 |     # Make our data structures
 54 |     # Labels
 55 |     y = [0 if random.random() <= 0.5 else 1 for x in range(0, num_of_images)]
 56 |     # Image data
 57 |     images = [np.asarray(x) for x in tmps]
 58 | 
 59 |     p = Augmentor.DataPipeline(images)
 60 |     assert len(p.augmentor_images) == len(glob.glob(os.path.join(tmpdir, "*.JPEG")))
 61 | 
 62 |     p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5)
 63 | 
 64 |     sample_size = 100
 65 |     augmented_images = p.sample(sample_size)
 66 | 
 67 |     assert len(augmented_images) == sample_size
 68 | 
 69 |     # Close all temporary files which will also delete them automatically
 70 |     for i in range(len(tmps)):
 71 |         tmps[i].close()
 72 | 
 73 |     # Finally remove the directory (and everything in it) as mkdtemp does
 74 |     # not delete itself after closing automatically
 75 |     shutil.rmtree(tmpdir)
 76 | 
 77 | 
 78 | def test_sample_with_masks():
 79 |     width = 80
 80 |     height = 80
 81 | 
 82 |     # Original images
 83 |     tmpdir = tempfile.mkdtemp()
 84 |     tmps = []
 85 | 
 86 |     num_of_images = 10
 87 | 
 88 |     for i in range(num_of_images):
 89 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, prefix=str(i), suffix='.JPEG', delete=False))
 90 | 
 91 |         bytestream = io.BytesIO()
 92 | 
 93 |         im = Image.new('RGB', (width, height))
 94 |         im.save(bytestream, 'JPEG')
 95 | 
 96 |         tmps[i].file.write(bytestream.getvalue())
 97 |         tmps[i].flush()
 98 | 
 99 |     # Mask images
100 |     mask_tmpdir = tempfile.mkdtemp()
101 |     mask_tmps = []
102 | 
103 |     for i in range(num_of_images):
104 |         mask_tmps.append(tempfile.NamedTemporaryFile(dir=mask_tmpdir, prefix=str(i), suffix='.JPEG', delete=False))
105 | 
106 |         bytestream = io.BytesIO()
107 | 
108 |         im = Image.new('RGB', (width, height))
109 |         im.save(bytestream, 'JPEG')
110 | 
111 |         mask_tmps[i].file.write(bytestream.getvalue())
112 |         mask_tmps[i].flush()
113 | 
114 |     original_image_list = glob.glob(os.path.join(tmpdir, "*.JPEG"))
115 |     mask_image_list = glob.glob(os.path.join(mask_tmpdir, "*.JPEG"))
116 |     assert len(original_image_list) == len(mask_image_list)
117 |     assert len(original_image_list) == num_of_images
118 |     assert len(mask_image_list) == num_of_images
119 | 
120 |     collated_paths = list(zip(original_image_list, mask_image_list))  # list() required as Python 3 returns an iterator
121 | 
122 |     assert len(collated_paths) == num_of_images
123 | 
124 |     # Generate our labels and image data structure
125 |     # y = [0 if random.random() <= 0.5 else 1 for x in range(0, num_of_images)]  # Random list of 0s and 1s
126 |     image_class = 0 if random.random() <= 0.5 else 1
127 |     y = [image_class] * num_of_images  # List of either all 0s or all 1s
128 |     assert len(y) == num_of_images
129 | 
130 |     images = [[np.asarray(Image.open(im)) for im in im_list] for im_list in collated_paths]
131 |     assert len(images) == num_of_images
132 | 
133 |     p = Augmentor.DataPipeline(images, y)
134 |     p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5)
135 | 
136 |     sample_size = 10
137 |     augmented_images, augmented_labels = p.sample(sample_size)
138 | 
139 |     assert len(augmented_images) == sample_size
140 |     assert len(augmented_labels) == sample_size
141 | 
142 |     print(augmented_labels)
143 |     for i in range(0, len(augmented_labels)):
144 |         assert augmented_labels[i] == image_class
145 | 
146 |     for im_list in augmented_images:
147 |         for im in im_list:
148 |             pil_image_from_array = Image.fromarray(im)
149 |             assert pil_image_from_array is not None
150 | 
151 |     # Now without labels
152 |     p = Augmentor.DataPipeline(images)
153 |     p.zoom_random(probability=1, percentage_area=0.5)
154 | 
155 |     augmented_images_no_labels = p.sample(sample_size)
156 |     assert len(augmented_images_no_labels) == sample_size
157 | 
158 |     for im_list_no_labels in augmented_images_no_labels:
159 |         for im in im_list_no_labels:
160 |             pil_image_from_array_no_lbl = Image.fromarray(im)
161 |             assert pil_image_from_array_no_lbl is not None
162 | 
163 |     # Close all temporary files which will also delete them automatically
164 |     for i in range(len(tmps)):
165 |         tmps[i].close()
166 | 
167 |     for i in range(len(tmps)):
168 |         mask_tmps[i].close()
169 | 
170 |     # Finally remove the directory (and everything in it) as mkdtemp does
171 |     # not delete itself after closing automatically
172 |     shutil.rmtree(tmpdir)
173 |     shutil.rmtree(mask_tmpdir)
174 | 


--------------------------------------------------------------------------------
/tests/test_distortion.py:
--------------------------------------------------------------------------------
 1 | # Context
 2 | import os
 3 | import sys
 4 | sys.path.insert(0, os.path.abspath('.'))
 5 | 
 6 | # Imports
 7 | import Augmentor
 8 | import tempfile
 9 | import io
10 | import shutil
11 | from PIL import Image
12 | from Augmentor import Operations
13 | 
14 | from util_funcs import create_colour_temp_image, create_greyscale_temp_image
15 | 
16 | 
17 | def test_in_memory_distortions():
18 |     tmp, tmpdir = create_colour_temp_image((800, 800), "JPEG")
19 | 
20 |     r_d = Operations.Distort(probability=1, grid_width=8, grid_height=8, magnitude=8)
21 |     tmp_im = []
22 |     tmp_im.append(Image.open(tmp))
23 |     tmp_im = r_d.perform_operation(tmp_im)
24 | 
25 |     assert tmp_im is not None
26 |     assert tmp_im[0].size == (800,800)
27 | 
28 |     tmp_bw, tmpdir_bw = create_greyscale_temp_image((800, 800), "PNG")
29 | 
30 |     r_d_bw = Operations.Distort(probability=1, grid_width=8, grid_height=8, magnitude=8)
31 |     tmp_im_bw = []
32 |     tmp_im_bw.append(Image.open(tmp_bw))
33 |     tmp_im_bw = r_d_bw.perform_operation(tmp_im_bw)
34 | 
35 |     assert tmp_im_bw is not None
36 |     assert tmp_im_bw[0].size == (800,800)
37 |     assert isinstance(tmp_im_bw[0], Image.Image)
38 | 
39 |     tmp.close()
40 |     tmp_bw.close()
41 |     shutil.rmtree(tmpdir)
42 |     shutil.rmtree(tmpdir_bw)
43 | 


--------------------------------------------------------------------------------
/tests/test_gaussian.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | # Context
 4 | import os
 5 | import sys
 6 | import tempfile
 7 | sys.path.insert(0, os.path.abspath('.'))
 8 | 
 9 | import Augmentor
10 | from Augmentor import Operations
11 | 
12 | 
13 | def test_create_gaussian_distortion_object():
14 |     g = Augmentor.Operations.GaussianDistortion(1, 8, 8, 8, "true", "true", 1.0, 1.0, 1.0, 1.0)
15 |     assert g is not None
16 | 
17 | 
18 | def test_add_gaussian_to_pipeline():
19 |     tmp_dir = tempfile.mkdtemp()
20 | 
21 |     p = Augmentor.Pipeline(tmp_dir)
22 |     p.gaussian_distortion(1, 8, 8, 8, "true", "true")
23 | 
24 |     assert p is not None
25 | 


--------------------------------------------------------------------------------
/tests/test_generators.py:
--------------------------------------------------------------------------------
  1 | from __future__ import (absolute_import, division,
  2 |                         print_function, unicode_literals)
  3 | from builtins import *
  4 | 
  5 | import os
  6 | import sys
  7 | sys.path.insert(0, os.path.abspath('.'))
  8 | 
  9 | import Augmentor
 10 | import tempfile
 11 | import io
 12 | import shutil
 13 | import glob
 14 | import random
 15 | import numpy as np
 16 | 
 17 | from PIL import Image
 18 | 
 19 | from Augmentor import ImageUtilities
 20 | 
 21 | 
 22 | def test_image_generator_function():
 23 | 
 24 |     width = 80
 25 |     height = 80
 26 | 
 27 |     tmpdir = tempfile.mkdtemp()
 28 |     tmps = []
 29 | 
 30 |     for i in range(10):
 31 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
 32 | 
 33 |         bytestream = io.BytesIO()
 34 | 
 35 |         im = Image.new('RGB', (width, height))
 36 |         im.save(bytestream, 'JPEG')
 37 | 
 38 |         tmps[i].file.write(bytestream.getvalue())
 39 |         tmps[i].flush()
 40 | 
 41 |     p = Augmentor.Pipeline(tmpdir)
 42 |     assert len(p.augmentor_images) == len(tmps)
 43 | 
 44 |     p.rotate(probability=0.5, max_left_rotation=5, max_right_rotation=5)
 45 |     p.flip_left_right(probability=0.333)
 46 |     p.flip_top_bottom(probability=0.5)
 47 | 
 48 |     g = p.image_generator()
 49 | 
 50 |     X = next(g)
 51 | 
 52 |     assert X is not None
 53 | 
 54 |     # Close all temporary files which will also delete them automatically
 55 |     for i in range(len(tmps)):
 56 |         tmps[i].close()
 57 | 
 58 |     # Finally remove the directory (and everything in it) as mkdtemp does
 59 |     # not delete itself after closing automatically
 60 |     # Removing because this causes errors in Windows tests
 61 |     # shutil.rmtree(tmpdir)
 62 | 
 63 | 
 64 | def test_keras_generator_from_disk():
 65 | 
 66 |     batch_size = random.randint(1, 50)
 67 |     width = 80
 68 |     height = 80
 69 | 
 70 |     tmpdir = tempfile.mkdtemp()
 71 |     tmps = []
 72 | 
 73 |     for i in range(10):
 74 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
 75 | 
 76 |         bytestream = io.BytesIO()
 77 | 
 78 |         im = Image.new('RGB', (width, height))
 79 |         im.save(bytestream, 'JPEG')
 80 | 
 81 |         tmps[i].file.write(bytestream.getvalue())
 82 |         tmps[i].flush()
 83 | 
 84 |     p = Augmentor.Pipeline(tmpdir)
 85 |     assert len(p.augmentor_images) == len(tmps)
 86 | 
 87 |     p.rotate(probability=0.5, max_left_rotation=5, max_right_rotation=5)
 88 |     p.flip_left_right(probability=0.333)
 89 |     p.flip_top_bottom(probability=0.5)
 90 | 
 91 |     g = p.keras_generator(batch_size=batch_size, image_data_format="channels_last")
 92 | 
 93 |     X, y = next(g)
 94 | 
 95 |     assert len(X) == batch_size
 96 |     assert len(X) == batch_size
 97 |     assert len(X) == len(y)
 98 | 
 99 |     assert np.shape(X) == (batch_size, width, height, 3)
100 | 
101 |     # Call next() more than the total number of images in the pipeline
102 |     for i in range(20):
103 |         X, y = next(g)
104 |         assert len(X) == batch_size
105 |         assert len(X) == batch_size
106 |         assert len(X) == len(y)
107 |         assert np.shape(X) == (batch_size, width, height, 3)
108 | 
109 |     g2 = p.keras_generator(batch_size=batch_size, image_data_format="channels_first")
110 | 
111 |     X2, y2 = next(g2)
112 | 
113 |     assert len(X2) == batch_size
114 |     assert len(X2) == len(y2)
115 | 
116 |     assert np.shape(X2) == (batch_size, 3, width, height)
117 | 
118 |     # Close all temporary files which will also delete them automatically
119 |     for i in range(len(tmps)):
120 |         tmps[i].close()
121 | 
122 |     # Finally remove the directory (and everything in it) as mkdtemp does
123 |     # not delete itself after closing automatically
124 |     shutil.rmtree(tmpdir)
125 | 
126 | 
127 | def test_generator_with_array_data():
128 | 
129 |     batch_size = random.randint(1, 100)
130 |     width = 800
131 |     height = 800
132 | 
133 |     image_matrix = np.zeros((100, width, height, 3), dtype='uint8')
134 |     labels = np.zeros(100)
135 | 
136 |     p = Augmentor.Pipeline()
137 |     p.rotate(probability=1, max_right_rotation=10, max_left_rotation=10)
138 | 
139 |     g = p.keras_generator_from_array(image_matrix, labels, batch_size=batch_size, scaled=True)
140 | 
141 |     X, y = next(g)
142 | 
143 |     assert len(X) == batch_size
144 |     assert len(y) == batch_size
145 | 
146 |     for i in range(len(y)):
147 |         assert y[i] == 0
148 | 
149 |     for i in range(len(X)):
150 |         x_converted = X[i] * 255
151 |         x_converted = x_converted.astype("uint8")
152 |         im_pil = Image.fromarray(x_converted)
153 |         assert im_pil is not None
154 | 
155 |     image_matrix_2d = np.zeros((100, width, height), dtype='uint8')
156 |     labels_2d = np.zeros(100)
157 | 
158 |     p2 = Augmentor.Pipeline()
159 |     p2.rotate(probability=0.1, max_left_rotation=5, max_right_rotation=5)
160 | 
161 |     g2 = p2.keras_generator_from_array(image_matrix_2d, labels_2d, batch_size=batch_size)
162 | 
163 |     X2, y2 = next(g2)
164 | 
165 |     assert len(X2) == batch_size
166 |     assert len(y2) == batch_size
167 | 
168 |     for i in range(len(y2)):
169 |         assert y2[i] == 0
170 | 
171 |     for i in range(len(X2)):
172 |         im_pil = Image.fromarray(X2[i].reshape(width, height))
173 |         assert im_pil is not None
174 | 
175 | 
176 | def test_generator():
177 | 
178 |     tmpdir = tempfile.mkdtemp()
179 |     tmps = []
180 | 
181 |     num_of_images = 10
182 |     width = 800
183 |     height = 800
184 | 
185 |     for i in range(num_of_images):
186 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
187 | 
188 |         bytestream = io.BytesIO()
189 | 
190 |         im = Image.new('RGB', (width,height))
191 |         im.save(bytestream, 'JPEG')
192 | 
193 |         tmps[i].file.write(bytestream.getvalue())
194 |         tmps[i].flush()
195 | 
196 |     p = Augmentor.Pipeline(tmpdir)
197 | 
198 |     # Test a generator on the same number of images in the folder.
199 |     batch_size = len(p.augmentor_images)
200 |     g = p.keras_generator(batch_size=batch_size)
201 | 
202 |     batch = next(g)
203 |     # A tuple should be returned, containing the augmented images and their labels
204 |     assert len(batch) == 2
205 | 
206 |     X = batch[0]
207 |     y = batch[1]
208 | 
209 |     # They should be the same size/length e.g. 100 images and 100 labels
210 |     assert len(X) == len(y)
211 | 
212 |     assert len(X) == batch_size
213 |     assert len(y) == batch_size
214 | 
215 |     # Because we have in this case one class, y should be of shape (batch_size, 1)
216 |     assert np.shape(y)[0] == batch_size
217 |     assert np.shape(y)[1] == 1
218 | 
219 |     assert np.shape(X)[0] == batch_size
220 |     assert np.shape(X)[1] == width
221 |     assert np.shape(X)[2] == height
222 |     assert np.shape(X)[3] == 3  # For RGB we should have 3 layers
223 | 
224 |     # All labels in y should = 0 because we only have one class.
225 |     for label in y:
226 |         assert label == 0
227 | 
228 |     # Close all temporary files which will also delete them automatically
229 |     for i in range(len(tmps)):
230 |         tmps[i].close()
231 | 
232 |     # Finally remove the directory (and everything in it) as mkdtemp does
233 |     # not delete itself after closing automatically
234 |     shutil.rmtree(tmpdir)
235 | 
236 | 
237 | def test_generator_image_scan():
238 | 
239 |     num_of_sub_dirs = random.randint(1, 10)
240 |     num_of_im_files = random.randint(1, 10)
241 | 
242 |     # Test with an absolute path
243 |     output_directory = os.path.join(tempfile.mkdtemp(), "output_abs")
244 | 
245 |     # Make an empty temporary directory
246 |     initial_temp_directory = tempfile.mkdtemp()
247 | 
248 |     sub_dirs = []
249 | 
250 |     # Make num_of_sub_dirs subdirectories of this initial directory
251 |     for _ in range(num_of_sub_dirs):
252 |         sub_dirs.append(tempfile.mkdtemp(dir=initial_temp_directory))
253 | 
254 |     tmp_files = []
255 |     image_counter = 0
256 | 
257 |     # Just to mix things up, we can create different file types
258 |     suffix_filetypes = [('.PNG', 'PNG'),
259 |                         ('.JPEG', 'JPEG'),
260 |                         #('.GIF', 'GIF'),
261 |                         ('.JPG', 'JPEG'),
262 |                         ('.png', 'PNG'),
263 |                         ('.jpeg', 'JPEG'),
264 |                         #('.gif', 'GIF'),
265 |                         ('.jpg', 'JPEG')]
266 | 
267 |     # Make num_of_im_files images in each sub directory.
268 |     for sub_dir in sub_dirs:
269 |         for iterator in range(num_of_im_files):
270 |             suffix_filetype = random.choice(suffix_filetypes)
271 |             tmp_files.append(tempfile.NamedTemporaryFile(dir=os.path.abspath(sub_dir), suffix=suffix_filetype[0], delete=False))
272 |             im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))
273 |             im.save(tmp_files[image_counter].name, suffix_filetype[1])
274 |             image_counter += 1
275 | 
276 |     p = Augmentor.Pipeline(initial_temp_directory, output_directory=output_directory)
277 | 
278 |     batch_size = random.randint(1, 1000)
279 | 
280 |     g = p.keras_generator(batch_size=batch_size)
281 | 
282 |     X, y = next(g)
283 | 
284 |     # The number of classes must equal the number of sub directories
285 |     assert np.shape(y)[0] == batch_size
286 |     assert np.shape(y)[1] == num_of_sub_dirs
287 |     assert len(y) == batch_size
288 | 
289 |     # Call the generator again: this time the output directory will contain the
290 |     # the created output_directory directory.
291 |     g_2 = p.keras_generator(batch_size=batch_size)
292 |     X_2, y_2 = next(g_2)
293 | 
294 |     assert np.shape(y_2)[0] == batch_size
295 |     assert np.shape(y_2)[1] == num_of_sub_dirs
296 |     assert len(y_2) == batch_size
297 | 
298 |     # Clean up
299 |     for tmp_file in tmp_files:
300 |         tmp_file.close()
301 | 
302 |     for sub_dir in sub_dirs:
303 |         shutil.rmtree(sub_dir)
304 | 
305 |     shutil.rmtree(os.path.join(initial_temp_directory, output_directory))
306 |     shutil.rmtree(initial_temp_directory)
307 | 


--------------------------------------------------------------------------------
/tests/test_ground_truth_augmentation.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | # Context
  4 | import os
  5 | import sys
  6 | sys.path.insert(0, os.path.abspath('.'))
  7 | 
  8 | # Imports, some may not be needed.
  9 | import Augmentor
 10 | import tempfile
 11 | import io
 12 | import shutil
 13 | import glob
 14 | import random
 15 | import numpy as np
 16 | from PIL import Image
 17 | 
 18 | # Used to test the temporary ZoomGroundTruth operation.
 19 | from Augmentor.Operations import ZoomGroundTruth
 20 | 
 21 | 
 22 | def test_loading_ground_truth_images():
 23 |     # Create directories for the standard images and the ground truth images.
 24 |     standard_image_directory = tempfile.mkdtemp()
 25 |     ground_truth_image_directory = tempfile.mkdtemp(prefix="ground-truth_")
 26 | 
 27 |     # Create images in each directory, but with the same names.
 28 |     # First create a number of image names.
 29 |     image_names = []
 30 |     num_of_images = random.randint(1, 10)
 31 |     for i in range(num_of_images):
 32 |         image_names.append("im%s.png" % i)
 33 | 
 34 |     # Create random images, one set of 'standard' images
 35 |     # and another set of ground truth images.
 36 |     standard_images = []
 37 |     ground_truth_images = []
 38 | 
 39 |     for image_name in image_names:
 40 |         im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))  # (80, 80) for Greyscale
 41 |         im_path = os.path.join(os.path.abspath(standard_image_directory), image_name)
 42 |         im.save(im_path, 'PNG')
 43 |         standard_images.append(im_path)
 44 | 
 45 |     for image_name in image_names:
 46 |         im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))  # (80, 80) for Greyscale
 47 |         im_path = os.path.join(os.path.abspath(ground_truth_image_directory), image_name)
 48 |         im.save(im_path, 'PNG')
 49 |         ground_truth_images.append(im_path)
 50 | 
 51 |     # Create a pipeline, then add the ground truth image directory.
 52 |     p = Augmentor.Pipeline(standard_image_directory)
 53 |     assert len(p.augmentor_images) == len(image_names)
 54 | 
 55 |     # Add the ground truth directory.
 56 |     p.ground_truth(ground_truth_image_directory)
 57 | 
 58 |     # Check how many were found and make sure the
 59 |     # count is the same as the number of ground truth
 60 |     # images we created.
 61 |     count = 0
 62 |     for augmentor_image in p.augmentor_images:
 63 |         if augmentor_image.ground_truth is not None:
 64 |             count += 1
 65 | 
 66 |     assert count == len(ground_truth_images)
 67 | 
 68 |     # Check that each ground truth image is contained
 69 |     # in the augmentor_images list.
 70 |     stored_ground_truth_images = []
 71 |     for augmentor_image in p.augmentor_images:
 72 |         if augmentor_image.ground_truth is not None:
 73 |             stored_ground_truth_images.append(augmentor_image.ground_truth)
 74 | 
 75 |     for ground_truth_image in ground_truth_images:
 76 |         assert ground_truth_image in stored_ground_truth_images
 77 | 
 78 |     # Remove the directories that we used entirely
 79 |     shutil.rmtree(standard_image_directory)
 80 |     shutil.rmtree(ground_truth_image_directory)
 81 | 
 82 | 
 83 | def test_zoom_ground_truth_temporary_class_without_ground_truth_images():
 84 |     file_ending = "PNG"
 85 | 
 86 |     # Create directories for the standard images and the ground truth images.
 87 |     standard_image_directory = tempfile.mkdtemp()
 88 |     ground_truth_image_directory = tempfile.mkdtemp(prefix="ground-truth_")
 89 | 
 90 |     # Create images in each directory, but with the same names.
 91 |     # First create a number of image names.
 92 |     image_names = []
 93 |     num_of_images = random.randint(1, 10)
 94 |     for i in range(num_of_images):
 95 |         image_names.append("im%s.%s" % (i, file_ending))
 96 | 
 97 |     # Create random images, one set of 'standard' images
 98 |     # and another set of ground truth images.
 99 |     standard_images = []
100 |     ground_truth_images = []
101 | 
102 |     for image_name in image_names:
103 |         im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))  # (80, 80) for Greyscale
104 |         im_path = os.path.join(os.path.abspath(standard_image_directory), image_name)
105 |         im.save(im_path, file_ending)
106 |         standard_images.append(im_path)
107 | 
108 |     for image_name in image_names:
109 |         im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))  # (80, 80) for Greyscale
110 |         im_path = os.path.join(os.path.abspath(ground_truth_image_directory), image_name)
111 |         im.save(im_path, file_ending)
112 |         ground_truth_images.append(im_path)
113 | 
114 |     # Test the functionality using the ZoomGroundTruth test
115 |     # operation WITHOUT passing ground truth images. This is
116 |     # to test the operation performs as expected with only
117 |     # the standard images being passed to the operation
118 |     #
119 |     # Start by creating a Pipeline object.
120 |     p = Augmentor.Pipeline(standard_image_directory)
121 | 
122 |     # Now add the operation test class manually (as there is no helper
123 |     # function for this operation)
124 |     p.add_operation(ZoomGroundTruth(probability=1, min_factor=1.1, max_factor=1.4))
125 |     assert len(p.operations) == 1
126 | 
127 |     # Sample random number of times, generate, confirm presence.
128 |     num_of_samples_to_generate = random.randint(1, 100)
129 |     p.sample(num_of_samples_to_generate)
130 |     generated_files = glob.glob(os.path.join(standard_image_directory, "output/*"))
131 |     assert len(generated_files) == num_of_samples_to_generate
132 | 
133 |     # Remove the directories that we used entirely
134 |     shutil.rmtree(standard_image_directory)
135 |     shutil.rmtree(ground_truth_image_directory)
136 | 
137 | 
138 | def test_zoom_ground_truth_temporary_class():
139 |     file_ending = "PNG"
140 | 
141 |     # Create directories for the standard images and the ground truth images.
142 |     standard_image_directory = tempfile.mkdtemp()
143 |     ground_truth_image_directory = tempfile.mkdtemp(prefix="ground-truth_")
144 | 
145 |     # Create images in each directory, but with the same names.
146 |     # First create a number of image names.
147 |     image_names = []
148 |     num_of_images = random.randint(1, 10)
149 |     for i in range(num_of_images):
150 |         image_names.append("im%s.%s" % (i, file_ending))
151 | 
152 |     # Create random images, one set of 'standard' images
153 |     # and another set of ground truth images.
154 |     standard_images = []
155 |     ground_truth_images = []
156 | 
157 |     for image_name in image_names:
158 |         im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))  # (80, 80) for Greyscale
159 |         im_path = os.path.join(os.path.abspath(standard_image_directory), image_name)
160 |         im.save(im_path, file_ending)
161 |         standard_images.append(im_path)
162 | 
163 |     for image_name in image_names:
164 |         im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))  # (80, 80) for Greyscale
165 |         im_path = os.path.join(os.path.abspath(ground_truth_image_directory), image_name)
166 |         im.save(im_path, file_ending)
167 |         ground_truth_images.append(im_path)
168 | 
169 |     # Perform the operation using some ground truth images.
170 |     p = Augmentor.Pipeline(standard_image_directory)
171 |     p.ground_truth(ground_truth_image_directory)
172 | 
173 |     p.add_operation(ZoomGroundTruth(probability=1, min_factor=1.1, max_factor=1.5))
174 | 
175 |     num_samples = random.randint(2, 10)
176 |     p.sample(num_samples)
177 | 
178 |     generated_files = glob.glob(os.path.join(standard_image_directory, "output/*"))
179 |     assert (num_samples * 2) == len(generated_files)
180 | 
181 |     # Remove the directories that we used entirely
182 |     shutil.rmtree(standard_image_directory)
183 |     shutil.rmtree(ground_truth_image_directory)
184 | 
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/tests/test_load.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | # Context
  4 | import os
  5 | import sys
  6 | sys.path.insert(0, os.path.abspath('.'))
  7 | 
  8 | # Imports
  9 | import Augmentor
 10 | import tempfile
 11 | import io
 12 | import shutil
 13 | import glob
 14 | import random
 15 | import numpy as np
 16 | 
 17 | from PIL import Image
 18 | 
 19 | from Augmentor import ImageUtilities
 20 | 
 21 | from util_funcs import create_sub_folders
 22 | 
 23 | 
 24 | def test_initialise_with_no_parameters():
 25 |     p = Augmentor.Pipeline()
 26 |     assert len(p.augmentor_images) == 0
 27 |     assert isinstance(p, Augmentor.Pipeline)
 28 | 
 29 | 
 30 | def test_initialise_with_nondefault_output_directory():
 31 |     empty_temp_directory = tempfile.mkdtemp()
 32 |     output_directory = 'out'
 33 |     p = Augmentor.Pipeline(empty_temp_directory, output_directory=output_directory)
 34 |     assert os.path.exists(os.path.join(empty_temp_directory, output_directory))
 35 | 
 36 | 
 37 | def test_initialise_with_missing_folder():
 38 |     with pytest.raises(IOError):
 39 |         p = Augmentor.Pipeline("/path/does/not/exist")
 40 | 
 41 | 
 42 | def test_initialise_with_empty_folder():
 43 |     empty_temp_directory = tempfile.mkdtemp()
 44 |     p = Augmentor.Pipeline(empty_temp_directory)
 45 | 
 46 |     assert os.path.exists(os.path.join(empty_temp_directory, 'output'))
 47 |     assert len(p.augmentor_images) == 0
 48 | 
 49 | 
 50 | def test_initialise_with_subfolders():
 51 | 
 52 |     num_of_folders = 10
 53 |     num_of_images = 10
 54 | 
 55 |     temp_directories, temp_files, parent_temp_directory = \
 56 |         create_sub_folders(number_of_sub_folders=num_of_folders, number_of_images=num_of_images)
 57 | 
 58 |     assert len(temp_directories) == num_of_folders
 59 |     assert len(temp_files) == num_of_images * num_of_folders
 60 | 
 61 |     # Add some images in the root directory, and some folders in the sub directories,
 62 |     # they should not be found when doing the scan
 63 |     tmp_not_to_be_found = tempfile.NamedTemporaryFile(dir=parent_temp_directory, suffix='.JPEG', delete=False)
 64 |     im_not_to_be_found = Image.fromarray(np.uint8(np.random.rand(800, 800) * 255))
 65 |     im_not_to_be_found.save(tmp_not_to_be_found.name, "JPEG")
 66 | 
 67 |     sub_temp_directory_not_to_be_found = tempfile.mkdtemp(dir=temp_directories[random.randint(0, len(temp_directories)-1)])
 68 | 
 69 |     # TODO: fix
 70 |     files_found = ImageUtilities.scan_directory_with_classes(parent_temp_directory)
 71 | 
 72 |     assert len(files_found.keys()) == num_of_folders
 73 | 
 74 |     image_count = 0
 75 |     for val in files_found.values():
 76 |         image_count += len(val)
 77 |         for image_path in val:
 78 |             assert os.path.isfile(image_path)
 79 | 
 80 |     assert image_count == num_of_folders * num_of_images
 81 | 
 82 |     scanned_directories = []
 83 |     glob_scanned_files = glob.glob(os.path.join(parent_temp_directory, '*'))
 84 | 
 85 |     for glob_scanned_file in glob_scanned_files:
 86 |         if os.path.isdir(glob_scanned_file):
 87 |             scanned_directories.append(os.path.split(glob_scanned_file)[1])
 88 | 
 89 |     for key in files_found.keys():
 90 |         assert key in scanned_directories
 91 |         assert os.path.exists(os.path.join(parent_temp_directory, key))
 92 | 
 93 |     # Tidy up and delete temporary files.
 94 |     tmp_not_to_be_found.close()
 95 |     shutil.rmtree(sub_temp_directory_not_to_be_found)
 96 | 
 97 |     for temp_file in temp_files:
 98 |         temp_file.close()
 99 | 
100 |     for temp_directory in temp_directories:
101 |         shutil.rmtree(temp_directory)
102 | 
103 |     shutil.rmtree(parent_temp_directory)
104 | 
105 | 
106 | def test_initialise_with_ten_images():
107 | 
108 |     tmpdir = tempfile.mkdtemp()
109 |     tmps = []
110 | 
111 |     for i in range(10):
112 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
113 | 
114 |         bytestream = io.BytesIO()
115 | 
116 |         im = Image.new('RGB', (800, 800))
117 |         im.save(bytestream, 'JPEG')
118 | 
119 |         tmps[i].file.write(bytestream.getvalue())
120 |         tmps[i].flush()
121 | 
122 |     p = Augmentor.Pipeline(tmpdir)
123 |     assert len(p.augmentor_images) == len(tmps)
124 | 
125 |     # Check if we can re-read all these images using PIL.
126 |     # This will fail for Windows, as you cannot open a file that is already open.
127 |     if os.name != "nt":
128 |         for i in range(len(tmps)):
129 |             im = Image.open(p.augmentor_images[i].image_path)
130 |             assert im is not None
131 | 
132 |     # Check if the paths found during the scan are exactly the paths
133 |     # stored by Augmentor after initialisation
134 |     for i in range(len(tmps)):
135 |         p_paths = [x.image_path for x in p.augmentor_images]
136 |         assert tmps[i].name in p_paths
137 | 
138 |     # Check if all the paths stored by the Pipeline object
139 |     # actually exist and are valid paths
140 |     for i in range(len(tmps)):
141 |         assert os.path.exists(p.augmentor_images[i].image_path)
142 | 
143 |     # Close all temporary files which will also delete them automatically
144 |     for i in range(len(tmps)):
145 |         tmps[i].close()
146 | 
147 |     # Finally remove the directory (and everything in it) as mkdtemp does
148 |     # not delete itself after closing automatically
149 |     shutil.rmtree(tmpdir)
150 | 
151 | 
152 | def test_dataframe_initialise_with_ten_images():
153 |     pandas = pytest.importorskip("pandas")
154 | 
155 |     tmpdir = tempfile.mkdtemp()
156 |     tmps = []
157 | 
158 |     for i in range(10):
159 |         tmps.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
160 | 
161 |         bytestream = io.BytesIO()
162 | 
163 |         im = Image.new('RGB', (800, 800))
164 |         im.save(bytestream, 'JPEG')
165 | 
166 |         tmps[i].file.write(bytestream.getvalue())
167 |         tmps[i].flush()
168 | 
169 |     temp_df = pandas.DataFrame(dict(path = [i.name for i in tmps],
170 |                            cat_id = [len(i.name) for i in tmps]))
171 | 
172 |     p = Augmentor.DataFramePipeline(temp_df,
173 |                                     image_col = 'path',
174 |                                     category_col='cat_id')
175 |     assert len(p.augmentor_images) == len(tmps)
176 | 
177 |     # Check if we can re-read all these images using PIL.
178 |     # This will fail for Windows, as you cannot open a file that is already open.
179 |     if os.name != "nt":
180 |         for i in range(len(tmps)):
181 |             im = Image.open(p.augmentor_images[i].image_path)
182 |             assert im is not None
183 | 
184 |     # Check if the paths found during the scan are exactly the paths
185 |     # stored by Augmentor after initialisation
186 |     for i in range(len(tmps)):
187 |         p_paths = [x.image_path for x in p.augmentor_images]
188 |         assert tmps[i].name in p_paths
189 | 
190 |     # Check if all the paths stored by the Pipeline object
191 |     # actually exist and are valid paths
192 |     for i in range(len(tmps)):
193 |         assert os.path.exists(p.augmentor_images[i].image_path)
194 | 
195 |     # Close all temporary files which will also delete them automatically
196 |     for i in range(len(tmps)):
197 |         tmps[i].close()
198 | 
199 |     # Finally remove the directory (and everything in it) as mkdtemp does
200 |     # not delete itself after closing automatically
201 |     shutil.rmtree(tmpdir)
202 | 
203 | 
204 | def test_class_image_scan():
205 |     # Some constants
206 |     num_of_sub_dirs = random.randint(1, 10)
207 |     num_of_im_files = random.randint(1, 10)
208 | 
209 |     output_directory = "some_folder"
210 | 
211 |     # Make an empty temporary directory
212 |     initial_temp_directory = tempfile.mkdtemp()
213 | 
214 |     sub_dirs = []
215 | 
216 |     # Make num_of_sub_dirs subdirectories of this initial directory
217 |     for _ in range(num_of_sub_dirs):
218 |         sub_dirs.append(tempfile.mkdtemp(dir=initial_temp_directory))
219 | 
220 |     tmp_files = []
221 |     image_counter = 0
222 | 
223 |     # Just to mix things up, we can create different file types
224 |     suffix_filetypes = [('.PNG', 'PNG'), ('.JPEG', 'JPEG'), ('.GIF', 'GIF'), ('.JPG', 'JPEG'), ('.png', 'PNG'), ('.jpeg', 'JPEG'), ('.gif', 'GIF'), ('.jpg', 'JPEG')]
225 | 
226 |     # Make num_of_im_files images in each sub directory.
227 |     for sub_dir in sub_dirs:
228 |         for iterator in range(num_of_im_files):
229 |             suffix_filetype = random.choice(suffix_filetypes)
230 |             tmp_files.append(tempfile.NamedTemporaryFile(dir=os.path.abspath(sub_dir), suffix=suffix_filetype[0], delete=False))
231 |             im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))
232 |             im.save(tmp_files[image_counter].name, suffix_filetype[1])
233 |             image_counter += 1
234 | 
235 |     # Make a folder within the root directory with the same name as the output directory,
236 |     # it should be ignored and all the tests should still run fine
237 |     os.mkdir(os.path.join(initial_temp_directory, output_directory))
238 | 
239 |     def run():
240 |         p = Augmentor.Pipeline(initial_temp_directory, output_directory=output_directory)
241 | 
242 |         assert len(p.augmentor_images) == (num_of_sub_dirs * num_of_im_files)
243 |         assert len(p.class_labels) == num_of_sub_dirs
244 | 
245 |         class_label_strings = [x[0] for x in p.class_labels]
246 |         for sub_dir in sub_dirs:
247 |             assert os.path.basename(sub_dir) in class_label_strings
248 | 
249 |         unique_class_labels = [x.class_label for x in p.augmentor_images]
250 |         unique_class_labels = set(unique_class_labels)
251 |         unique_class_labels = list(unique_class_labels)
252 |         assert len(unique_class_labels) == num_of_sub_dirs
253 | 
254 |         for unique_class_label in unique_class_labels:
255 |             assert unique_class_label in class_label_strings
256 | 
257 |         for class_label_string in class_label_strings:
258 |             assert class_label_string in unique_class_labels
259 | 
260 |         assert set(class_label_strings) == set(unique_class_labels)
261 | 
262 |         # Count
263 |         labels_int = [x.class_label_int for x in p.augmentor_images]
264 |         bins = np.bincount(labels_int)
265 |         for bin in bins:
266 |             assert bin == num_of_im_files
267 | 
268 |         labels = [x.class_label for x in p.augmentor_images]
269 |         for sub_dir in sub_dirs:
270 |             assert labels.count(os.path.basename(sub_dir)) == num_of_im_files
271 | 
272 |     # Run the tests now, we will repeat later so it's been made into a func.
273 |     run()
274 | 
275 |     # Add some extra images in places where they should not be and re-run the tests.
276 |     temp_file_in_root_dir1 = tempfile.NamedTemporaryFile(dir=initial_temp_directory, suffix=".PNG", delete=False)
277 |     temp_file_in_root_dir2 = tempfile.NamedTemporaryFile(dir=initial_temp_directory, suffix=".PNG", delete=False)
278 | 
279 |     # All tests should run exactly as before, those two files above should be ignored.
280 |     run()
281 | 
282 |     # Sub directories in the sub directories should be ignored, so all tests should pass after
283 |     # randomly placing a folder in any of these sud directories
284 |     r1 = random.randint(0, len(sub_dirs)-1)
285 |     r2 = random.randint(0, len(sub_dirs)-1)
286 |     os.mkdir(os.path.join(initial_temp_directory, sub_dirs[r1], output_directory))
287 |     os.mkdir(os.path.join(initial_temp_directory, sub_dirs[r2], "ignore_me"))
288 |     temp_to_ignore = tempfile.NamedTemporaryFile(dir=os.path.join(initial_temp_directory,
289 |                      sub_dirs[r1], output_directory), suffix=".JPEG", delete=False)
290 |     im = Image.fromarray(np.uint8(np.random.rand(80, 80, 3) * 255))
291 |     im.save(temp_to_ignore.name, "JPEG")
292 |     run()
293 | 
294 |     # Clean up
295 |     for tmp_file in tmp_files:
296 |         tmp_file.close()
297 | 
298 |     temp_file_in_root_dir1.close()
299 |     temp_file_in_root_dir2.close()
300 |     temp_to_ignore.close()
301 | 
302 |     shutil.rmtree(os.path.join(initial_temp_directory, sub_dirs[r1], output_directory))
303 |     shutil.rmtree(os.path.join(initial_temp_directory, sub_dirs[r2], "ignore_me"))
304 | 
305 |     for sub_dir in sub_dirs:
306 |         shutil.rmtree(sub_dir)
307 | 
308 |     shutil.rmtree(os.path.join(initial_temp_directory, output_directory))
309 |     shutil.rmtree(initial_temp_directory)
310 | 


--------------------------------------------------------------------------------
/tests/test_multi_threading.py:
--------------------------------------------------------------------------------
  1 | # Context
  2 | import os
  3 | import sys
  4 | sys.path.insert(0, os.path.abspath('.'))
  5 | 
  6 | # Imports
  7 | import Augmentor
  8 | import tempfile
  9 | import io
 10 | import shutil
 11 | from PIL import Image
 12 | from Augmentor import Operations
 13 | import glob
 14 | 
 15 | original_dimensions = (640, 480)
 16 | larger_dimensions = (1200, 1000)
 17 | smaller_dimensions = (360, 240)
 18 | 
 19 | def test_simple_multi_threading_example():
 20 | 
 21 |     tmpdir = tempfile.mkdtemp()
 22 | 
 23 |     n = 100
 24 |     tmpfiles = []
 25 |     for i in range(n):
 26 |         tmpfiles.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
 27 |         im = Image.new('RGB', original_dimensions)
 28 |         im.save(tmpfiles[i].name, 'JPEG')
 29 | 
 30 |     p = Augmentor.Pipeline(tmpdir)
 31 |     assert len(p.augmentor_images) == n
 32 | 
 33 |     p.resize(probability=1, width=larger_dimensions[0], height=larger_dimensions[1])
 34 |     p.sample(n, multi_threaded=True)
 35 | 
 36 |     generated_images = glob.glob(os.path.join(tmpdir, "output", "*.JPEG"))
 37 |     number_of_gen_images = len(generated_images)
 38 | 
 39 |     assert number_of_gen_images == n
 40 | 
 41 |     for im_path in generated_images:
 42 |         im_g = Image.open(im_path)
 43 |         assert im_g.size == larger_dimensions
 44 | 
 45 |     # Clean up
 46 |     for t in tmpfiles:
 47 |         t.close()
 48 | 
 49 |     # Removing because it causes errors on Windows
 50 |     # shutil.rmtree(tmpdir)
 51 | 
 52 | 
 53 | def test_all_operations_multi_thread():
 54 |     tmpdir = tempfile.mkdtemp()
 55 | 
 56 |     n = 100
 57 |     tmpfiles = []
 58 |     for i in range(n):
 59 |         tmpfiles.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
 60 |         im = Image.new('RGB', (480, 800))
 61 |         im.save(tmpfiles[i].name, 'JPEG')
 62 | 
 63 |     p = Augmentor.Pipeline(tmpdir)
 64 |     assert len(p.augmentor_images) == n
 65 | 
 66 |     p.resize(probability=1, width=300, height=300)
 67 |     p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5)
 68 |     p.flip_left_right(probability=0.5)
 69 |     p.flip_top_bottom(probability=0.5)
 70 |     p.flip_random(probability=0.5)
 71 | 
 72 |     p.sample(n, multi_threaded=True)
 73 | 
 74 |     generated_images = glob.glob(os.path.join(tmpdir, "output", "*.JPEG"))
 75 |     number_of_gen_images = len(generated_images)
 76 | 
 77 |     assert number_of_gen_images == n
 78 | 
 79 |     # Clean up
 80 |     for t in tmpfiles:
 81 |         t.close()
 82 | 
 83 |     # Removing because it causes errors on Windows
 84 |     # shutil.rmtree(tmpdir)
 85 | 
 86 | def test_multi_threading_override():
 87 | 
 88 |     tmpdir = tempfile.mkdtemp()
 89 | 
 90 |     n = 100
 91 |     tmpfiles = []
 92 |     for i in range(n):
 93 |         tmpfiles.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
 94 |         im = Image.new('RGB', original_dimensions)
 95 |         im.save(tmpfiles[i].name, 'JPEG')
 96 | 
 97 |     p = Augmentor.Pipeline(tmpdir)
 98 |     assert len(p.augmentor_images) == n
 99 | 
100 |     p.resize(probability=1, width=larger_dimensions[0], height=larger_dimensions[1])
101 |     p.sample(n, multi_threaded=False)
102 | 
103 |     generated_images = glob.glob(os.path.join(tmpdir, "output", "*.JPEG"))
104 |     number_of_gen_images = len(generated_images)
105 | 
106 |     assert number_of_gen_images == n
107 | 
108 |     for im_path in generated_images:
109 |         im_g = Image.open(im_path)
110 |         assert im_g.size == larger_dimensions
111 | 
112 |     # Clean up
113 |     for t in tmpfiles:
114 |         t.close()
115 | 
116 |     # Removing because it causes errors on Windows
117 |     # shutil.rmtree(tmpdir)
118 | 


--------------------------------------------------------------------------------
/tests/test_pipeline_add_operations.py:
--------------------------------------------------------------------------------
 1 | # Context
 2 | import os
 3 | import sys
 4 | sys.path.insert(0, os.path.abspath('.'))
 5 | 
 6 | import pytest
 7 | 
 8 | # Imports
 9 | import Augmentor
10 | import Augmentor.Operations
11 | 
12 | 
13 | def test_add_rotate_operation():
14 |     p = Augmentor.Pipeline()
15 | 
16 |     assert len(p.augmentor_images) == 0
17 | 
18 |     with pytest.raises(ValueError):
19 |         p.rotate(probability=1, max_left_rotation=50, max_right_rotation=50)
20 |         p.rotate(probability=1.1, max_left_rotation=10, max_right_rotation=10)
21 |         p.rotate(probability='a string', max_left_rotation=10, max_right_rotation=10)
22 | 
23 |     assert len(p.operations) == 0
24 | 
25 |     p.rotate(probability=1, max_left_rotation=15, max_right_rotation=15)
26 | 
27 |     assert len(p.operations) == 1
28 |     assert isinstance(p.operations[0], Augmentor.Operations.Operation)
29 | 


--------------------------------------------------------------------------------
/tests/test_pipeline_utilities.py:
--------------------------------------------------------------------------------
1 | # Context
2 | import os
3 | import sys
4 | sys.path.insert(0, os.path.abspath('.'))
5 | 
6 | # Imports
7 | import Augmentor
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/test_random_color_brightness_contrast.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.insert(0, os.path.abspath('.'))
 4 | 
 5 | import tempfile
 6 | import shutil
 7 | from PIL import Image
 8 | from Augmentor import Operations
 9 | 
10 | 
11 | from util_funcs import create_colour_temp_image, create_greyscale_temp_image
12 | 
13 | 
14 | def test_random_color_in_memory():
15 | 
16 |     tmp, tmpdir = create_colour_temp_image((800, 800), "JPEG")
17 | 
18 |     op = Operations.RandomColor(probability=1,min_factor=0.5, max_factor =1.5)
19 |     tmp_im = []
20 |     tmp_im.append(Image.open(tmp))
21 |     tmp_im = op.perform_operation(tmp_im)
22 | 
23 |     assert tmp_im is not None
24 |     assert isinstance(tmp_im[0], Image.Image)
25 | 
26 | 
27 |     tmp_bw, tmpdir_bw = create_greyscale_temp_image((800, 800), "PNG")
28 | 
29 |     op = Operations.RandomColor(probability=1,min_factor=0.5, max_factor =1.5)
30 |     tmp_im = []
31 |     tmp_im.append(Image.open(tmp))
32 |     tmp_im = op.perform_operation(tmp_im)
33 | 
34 |     assert tmp_im is not None
35 |     assert isinstance(tmp_im[0], Image.Image)
36 | 
37 |     tmp.close()
38 |     tmp_bw.close()
39 |     shutil.rmtree(tmpdir)
40 |     shutil.rmtree(tmpdir_bw)
41 | 
42 | def test_random_contrast_in_memory():
43 | 
44 |     tmp, tmpdir = create_colour_temp_image((800, 800), "JPEG")
45 | 
46 |     op = Operations.RandomContrast(probability=1,min_factor=0.5, max_factor =1.5)
47 |     tmp_im = []
48 |     tmp_im.append(Image.open(tmp))
49 |     tmp_im = op.perform_operation(tmp_im)
50 | 
51 |     assert tmp_im is not None
52 |     assert isinstance(tmp_im[0], Image.Image)
53 | 
54 | 
55 |     tmp_bw, tmpdir_bw = create_greyscale_temp_image((800, 800), "PNG")
56 | 
57 |     op = Operations.RandomContrast(probability=1,min_factor=0.5, max_factor =1.5)
58 |     tmp_im = []
59 |     tmp_im.append(Image.open(tmp))
60 |     tmp_im = op.perform_operation(tmp_im)
61 | 
62 |     assert tmp_im is not None
63 |     assert isinstance(tmp_im[0], Image.Image)
64 | 
65 |     tmp.close()
66 |     tmp_bw.close()
67 |     shutil.rmtree(tmpdir)
68 |     shutil.rmtree(tmpdir_bw)
69 |     
70 | def test_random_brightness_in_memory():
71 | 
72 |     tmp, tmpdir = create_colour_temp_image((800, 800), "JPEG")
73 | 
74 |     op = Operations.RandomBrightness(probability=1,min_factor=0.5, max_factor =1.5)
75 |     tmp_im = []
76 |     tmp_im.append(Image.open(tmp))
77 |     tmp_im = op.perform_operation(tmp_im)
78 | 
79 |     assert tmp_im is not None
80 |     assert isinstance(tmp_im[0], Image.Image)
81 | 
82 |     tmp_bw, tmpdir_bw = create_greyscale_temp_image((800, 800), "PNG")
83 | 
84 |     op = Operations.RandomBrightness(probability=1,min_factor=0.5, max_factor =1.5)
85 |     tmp_im = []
86 |     tmp_im.append(Image.open(tmp))
87 |     tmp_im = op.perform_operation(tmp_im)
88 | 
89 |     assert tmp_im is not None
90 |     assert isinstance(tmp_im[0], Image.Image)
91 | 
92 |     tmp.close()
93 |     tmp_bw.close()
94 |     shutil.rmtree(tmpdir)
95 |     shutil.rmtree(tmpdir_bw)


--------------------------------------------------------------------------------
/tests/test_resize.py:
--------------------------------------------------------------------------------
 1 | # Context
 2 | import os
 3 | import sys
 4 | sys.path.insert(0, os.path.abspath('.'))
 5 | 
 6 | # Imports
 7 | import Augmentor
 8 | import tempfile
 9 | import io
10 | import shutil
11 | from PIL import Image
12 | from Augmentor import Operations
13 | import glob
14 | 
15 | original_dimensions = (800, 800)
16 | larger_dimensions = (1200, 1200)
17 | smaller_dimensions = (400, 400)
18 | 
19 | 
20 | def test_resize_in_memory():
21 | 
22 |     tmpdir = tempfile.mkdtemp()
23 |     tmp = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False)
24 |     im = Image.new('RGB', original_dimensions)
25 |     im.save(tmp.name, 'JPEG')
26 | 
27 |     resize = Operations.Resize(probability=1, width=larger_dimensions[0], height=larger_dimensions[1], resample_filter="BICUBIC")
28 | 
29 |     im = [im]
30 | 
31 |     im_resized = resize.perform_operation(im)
32 |     assert im_resized[0].size == larger_dimensions
33 | 
34 |     resize_smaller = Operations.Resize(probability=1, width=smaller_dimensions[0], height=smaller_dimensions[1], resample_filter="BICUBIC")
35 |     im_resized_smaller = resize_smaller.perform_operation(im)
36 | 
37 |     assert im_resized_smaller[0].size == smaller_dimensions
38 | 
39 |     tmp.close()
40 | 
41 | 
42 | def test_resize_save_to_disk():
43 |     tmpdir = tempfile.mkdtemp()
44 | 
45 |     n = 10
46 |     tmpfiles = []
47 |     for i in range(n):
48 |         tmpfiles.append(tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False))
49 |         im = Image.new('RGB', original_dimensions)
50 |         im.save(tmpfiles[i].name, 'JPEG')
51 | 
52 |     p = Augmentor.Pipeline(tmpdir)
53 |     assert len(p.augmentor_images) == n
54 | 
55 |     p.resize(probability=1, width=larger_dimensions[0], height=larger_dimensions[1])
56 |     p.sample(n)
57 | 
58 |     generated_images = glob.glob(os.path.join(tmpdir, "output", "*.JPEG"))
59 |     number_of_gen_images = len(generated_images)
60 | 
61 |     assert number_of_gen_images == n
62 | 
63 |     for im_path in generated_images:
64 |         im_g = Image.open(im_path)
65 |         assert im_g.size == larger_dimensions
66 | 
67 |     # Clean up
68 |     for t in tmpfiles:
69 |         t.close()
70 | 
71 |     # Removing because of errors caused in Windows
72 |     # shutil.rmtree(tmpdir)
73 | 


--------------------------------------------------------------------------------
/tests/test_rotate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.insert(0, os.path.abspath('.'))
 4 | 
 5 | import tempfile
 6 | import shutil
 7 | from PIL import Image
 8 | from Augmentor import Operations
 9 | 
10 | 
11 | def rotate_images(tmpdir, rot):
12 |     original_dimensions = (800, 800)
13 | 
14 |     im_tmp = tmpdir.mkdir("subfolder").join('test.JPEG')
15 |     im = Image.new('RGB', original_dimensions)
16 |     im.save(str(im_tmp), 'JPEG')
17 | 
18 |     r = Operations.Rotate(probability=1, rotation=rot)
19 |     im = [im]
20 |     im_r = r.perform_operation(im)
21 | 
22 |     assert im_r is not None
23 |     assert im_r[0].size == original_dimensions
24 | 
25 | 
26 | def test_rotate_images_90(tmpdir):
27 |     rotate_images(tmpdir, 90)
28 | 
29 | 
30 | def test_rotate_images_180(tmpdir):
31 |     rotate_images(tmpdir, 180)
32 | 
33 | 
34 | def test_rotate_images_270(tmpdir):
35 |     rotate_images(tmpdir, 270)
36 | 
37 | 
38 | def test_rotate_images_custom_temp_files():
39 | 
40 |     original_dimensions = (800, 800)
41 | 
42 |     tmpdir = tempfile.mkdtemp()
43 |     tmp = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False)
44 |     im = Image.new('RGB', original_dimensions)
45 |     im.save(tmp.name, 'JPEG')
46 | 
47 |     r = Operations.Rotate(probability=1, rotation=90)
48 |     im = [im]
49 |     im_r = r.perform_operation(im)
50 | 
51 |     assert im_r is not None
52 |     assert im_r[0].size == original_dimensions
53 | 
54 |     tmp.close()
55 |     shutil.rmtree(tmpdir)
56 | 


--------------------------------------------------------------------------------
/tests/test_torch_transform.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | # Context
 4 | import numpy as np
 5 | import PIL.Image as Image
 6 | 
 7 | import Augmentor
 8 | 
 9 | 
10 | def test_torch_transform():
11 |     torchvision = pytest.importorskip("torchvision")
12 | 
13 |     red = np.zeros([10, 10, 3], np.uint8)
14 |     red[..., 0] = 255
15 |     red = Image.fromarray(red)
16 | 
17 |     p = Augmentor.Pipeline()
18 | 
19 |     # include multiple transforms to test integration
20 |     p.greyscale(probability=1)
21 |     p.zoom(probability=1, min_factor=1.0, max_factor=1.0)
22 |     p.rotate_random_90(probability=1)
23 | 
24 |     transforms = torchvision.transforms.Compose([
25 |         p.torch_transform()
26 |     ])
27 | 
28 |     assert red != transforms(red)
29 | 
30 |     # assert that all operations were correctly applied
31 |     result = red
32 |     for op in p.operations:
33 |         result = op.perform_operation([result])[0]
34 |     assert transforms(red) == result
35 | 


--------------------------------------------------------------------------------
/tests/test_user_operation_parameter_input.py:
--------------------------------------------------------------------------------
 1 | import Augmentor
 2 | import Augmentor.ImageUtilities
 3 | 
 4 | 
 5 | def test_user_param_parsing():
 6 | 
 7 |     # Scalar input should return itself, as a integer
 8 |     scalar_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=5)
 9 |     assert scalar_input == 5
10 |     assert type(scalar_input) == int
11 | 
12 |     # A float input should return itself as a float.
13 |     float_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=1.01)
14 |     assert float_input == 1.01
15 |     assert type(float_input) == float
16 | 
17 |     # Lists are interpreted as [from, to, step] and should return a value between from and to
18 |     # while respecting the step parameter.
19 |     list_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=[-10, 10, 0.5])
20 |     assert -10 <= list_input <= 10
21 | 
22 |     list_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=[-10, 10, 0.01])
23 |     assert -10 <= list_input <= 10
24 | 
25 |     list_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=[-10, 10, 5])
26 |     assert -10 <= list_input <= 10
27 |     assert list_input % 5 == 0
28 | 
29 |     list_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=[-10, 10, 2])
30 |     assert -10 <= list_input <= 10
31 |     assert list_input % 2 == 0
32 | 
33 |     list_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=[2])
34 |     assert 0 <= list_input <= 2
35 |     assert list_input in [0, 1, 2]
36 | 
37 |     list_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=[10, 12])
38 |     assert 10 <= list_input <= 12
39 |     assert list_input in [10, 11, 12]
40 | 
41 |     # Tuples are interpreted as meaning a number of discrete values
42 |     tuple_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=(2, 4, 6))
43 |     assert tuple_input in (2, 4, 6)
44 | 
45 |     tuple_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=(2.1, 4.2, 6.3))
46 |     assert tuple_input in (2.1, 4.2, 6.3)
47 | 
48 |     tuple_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=(2,))  # Ensure it is interpreted
49 |     assert tuple_input == 2                                                       # as a tuple and not a scalar.
50 |     assert type(tuple_input) == int
51 | 
52 |     tuple_input = Augmentor.ImageUtilities.parse_user_parameter(user_param=(2.2,))
53 |     assert tuple_input == 2.2
54 |     assert type(tuple_input) == float
55 | 


--------------------------------------------------------------------------------
/tests/util_funcs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import tempfile
 4 | import io
 5 | import shutil
 6 | import numpy as np
 7 | 
 8 | from PIL import Image
 9 | 
10 | 
11 | def create_colour_temp_image(size, file_format):
12 |     tmpdir = tempfile.mkdtemp()
13 |     tmp = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False)
14 | 
15 |     im = Image.fromarray(np.uint8(np.random.rand(800, 800, 3) * 255))
16 |     im.save(tmp.name, file_format)
17 | 
18 |     return tmp, tmpdir
19 | 
20 | 
21 | def create_greyscale_temp_image(size, file_format):
22 |     tmpdir = tempfile.mkdtemp()
23 |     tmp = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.JPEG', delete=False)
24 | 
25 |     im = Image.fromarray(np.uint8(np.random.rand(800, 800) * 255))
26 |     im.save(tmp.name, file_format)
27 | 
28 |     return tmp, tmpdir
29 | 
30 | 
31 | def create_sub_folders(number_of_sub_folders, number_of_images):
32 | 
33 |     parent_temp_directory = tempfile.mkdtemp()
34 | 
35 |     temp_directories = []
36 |     temp_files = []
37 | 
38 |     for x in range(number_of_sub_folders):
39 |         sub_temp_directory = tempfile.mkdtemp(dir=parent_temp_directory)
40 |         temp_directories.append(sub_temp_directory)
41 |         for y in range(number_of_images):
42 |             temp_file = tempfile.NamedTemporaryFile(dir=sub_temp_directory, suffix='.JPEG', delete=False)
43 |             im_array = Image.fromarray(np.uint8(np.random.rand(800, 800) * 255))
44 |             im_array.save(temp_file.name, 'JPEG')
45 |             temp_files.append(temp_file)
46 | 
47 |     return temp_directories, temp_files, parent_temp_directory
48 | 


--------------------------------------------------------------------------------