├── .gitignore
├── .travis.yml
├── AUTHORS.rst
├── CONTRIBUTING.rst
├── DRCODEOWNERS
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
    ├── .gitignore
    ├── Makefile
    ├── authors.rst
    ├── conf.py
    ├── contributing.rst
    ├── guides
    │   └── installation.md
    ├── history.rst
    ├── index.rst
    ├── make.bat
    ├── markdowns
    │   └── parameters.md
    ├── readme.rst
    └── usage.rst
├── examples
    └── cats_vs_dogs
    │   ├── Cats_v_Dogs_Test_Example.ipynb
    │   └── cats_vs_dogs.csv
├── pic2vec
    ├── README.md
    ├── __init__.py
    ├── build_featurizer.py
    ├── data_featurizing.py
    ├── enums.py
    ├── feature_preprocessing.py
    ├── image_featurizer.py
    ├── saved_models
    │   └── squeezenet_weights_tf_dim_ordering_tf_kernels.h5
    └── squeezenet.py
├── requirements.txt
├── requirements_dev.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── build_featurizer_testing
    │   ├── inceptionv3_test_prediction.npy
    │   ├── resnet50_test_prediction.npy
    │   ├── squeezenet_test_prediction.npy
    │   ├── vgg16_test_prediction.npy
    │   ├── vgg19_test_prediction.npy
    │   └── xception_test_prediction.npy
    ├── data_featurizing_testing
    │   ├── array_testing
    │   │   └── check_featurize.npy
    │   └── csv_testing
    │   │   ├── featurize_data_check_csv_full
    │   │   └── featurize_data_check_csv_images
    ├── feature_preprocessing_testing
    │   ├── csv_testing
    │   │   ├── create_csv_check
    │   │   ├── csv_image_path_check
    │   │   ├── directory_combined_image_path_test
    │   │   ├── directory_preprocess_system_test
    │   │   ├── error_directory_combined_test
    │   │   ├── error_row
    │   │   └── url_test
    │   ├── test_image_arrays
    │   │   ├── image_test.npy
    │   │   ├── image_test_grayscale.npy
    │   │   ├── image_test_isotropic.npy
    │   │   └── image_test_isotropic_grayscale.npy
    │   ├── test_images
    │   │   ├── arendt.bmp
    │   │   ├── borges.jpg
    │   │   ├── heidegger.gif
    │   │   └── sappho.png
    │   └── test_preprocessing_arrays
    │   │   ├── arendt.npy
    │   │   ├── arendt_grayscale.npy
    │   │   ├── borges.npy
    │   │   ├── sappho.npy
    │   │   └── sappho_grayscale.npy
    ├── image_featurizer_testing
    │   ├── array_tests
    │   │   ├── check_prediction_array_inceptionv3.npy
    │   │   ├── check_prediction_array_inceptionv3_mult.npy
    │   │   ├── check_prediction_array_resnet50.npy
    │   │   ├── check_prediction_array_resnet50_mult.npy
    │   │   ├── check_prediction_array_squeezenet.npy
    │   │   ├── check_prediction_array_squeezenet_mult.npy
    │   │   ├── check_prediction_array_vgg16.npy
    │   │   ├── check_prediction_array_vgg16_mult.npy
    │   │   ├── check_prediction_array_vgg19.npy
    │   │   ├── check_prediction_array_vgg19_mult.npy
    │   │   ├── check_prediction_array_xception.npy
    │   │   └── check_prediction_array_xception_mult.npy
    │   └── csv_checking
    │   │   ├── inceptionv3_check_csv.csv
    │   │   ├── inceptionv3_check_csv_mult.csv
    │   │   ├── resnet50_check_csv.csv
    │   │   ├── resnet50_check_csv_mult.csv
    │   │   ├── squeezenet_check_csv.csv
    │   │   ├── squeezenet_check_csv_mult.csv
    │   │   ├── testing_data.csv
    │   │   ├── vgg16_check_csv.csv
    │   │   ├── vgg16_check_csv_mult.csv
    │   │   ├── vgg19_check_csv.csv
    │   │   ├── vgg19_check_csv_mult.csv
    │   │   ├── xception_check_csv.csv
    │   │   └── xception_check_csv_mult.csv
    ├── test_build_featurizer.py
    ├── test_data_featurizing.py
    ├── test_feature_preprocessing.py
    └── test_image_featurizer.py
├── tox.ini
├── travis_pypi_setup.py
└── utils
    └── create_test_files.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | .pytest_cache
 12 | env/
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # DS_Store Mac Nightmare
105 | .DS_Store
106 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | # Enable 3.7 without globally enabling sudo and xenial dist for other build jobs
 4 | matrix:
 5 |   include:
 6 |     - python: 2.7
 7 |     - python: 3.4
 8 |     - python: 3.5
 9 |     - python: 3.6
10 |     - python: 3.7
11 |       dist: xenial
12 |       sudo: true
13 | 
14 | # command to install dependencies
15 | install:
16 |   - pip install -r requirements.txt
17 |   - pip install coveralls pytest-xdist pytest-cov
18 | # Turn off email notifications
19 | notifications:
20 |   email: false
21 | # command to run tests
22 | script:
23 |   py.test --cov-report= --cov=pic2vec --boxed tests/ -vvvvs
24 | # Coveralls
25 | after_success:
26 |   coveralls
27 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Jett Oristaglio <jettori88@gmail.com>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first?
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | .. highlight:: shell
  2 | 
  3 | ============
  4 | Contributing
  5 | ============
  6 | 
  7 | Contributions are welcome, and they are greatly appreciated! Every
  8 | little bit helps, and credit will always be given.
  9 | 
 10 | You can contribute in many ways:
 11 | 
 12 | Types of Contributions
 13 | ----------------------
 14 | 
 15 | Report Bugs
 16 | ~~~~~~~~~~~
 17 | 
 18 | Report bugs at https://github.com/datarobot/pic2vec/issues.
 19 | 
 20 | If you are reporting a bug, please include:
 21 | 
 22 | * Your operating system name and version.
 23 | * Any details about your local setup that might be helpful in troubleshooting.
 24 | * Detailed steps to reproduce the bug.
 25 | 
 26 | Fix Bugs
 27 | ~~~~~~~~
 28 | 
 29 | Look through the GitHub issues for bugs. Anything tagged with "bug"
 30 | and "help wanted" is open to whoever wants to implement it.
 31 | 
 32 | Implement Features
 33 | ~~~~~~~~~~~~~~~~~~
 34 | 
 35 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 36 | and "help wanted" is open to whoever wants to implement it.
 37 | 
 38 | Write Documentation
 39 | ~~~~~~~~~~~~~~~~~~~
 40 | 
 41 | pic2vec could always use more documentation, whether as part of the
 42 | official pic2vec docs, in docstrings, or even on the web in blog posts,
 43 | articles, and such.
 44 | 
 45 | Submit Feedback
 46 | ~~~~~~~~~~~~~~~
 47 | 
 48 | The best way to send feedback is to file an issue at https://github.com/datarobot/pic2vec/issues.
 49 | 
 50 | If you are proposing a feature:
 51 | 
 52 | * Explain in detail how it would work.
 53 | * Keep the scope as narrow as possible, to make it easier to implement.
 54 | * Remember that this is a volunteer-driven project, and that contributions
 55 |   are welcome :)
 56 | 
 57 | Get Started!
 58 | ------------
 59 | 
 60 | Ready to contribute? Here's how to set up `pic2vec` for local development.
 61 | 
 62 | 1. Fork the `pic2vec` repo on GitHub.
 63 | 2. Clone your fork locally::
 64 | 
 65 |     $ git clone git@github.com:your_name_here/pic2vec.git
 66 | 
 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
 68 | 
 69 |     $ mkvirtualenv pic2vec
 70 |     $ cd pic2vec/
 71 |     $ python setup.py develop
 72 | 
 73 | 4. Create a branch for local development::
 74 | 
 75 |     $ git checkout -b name-of-your-bugfix-or-feature
 76 | 
 77 |    Now you can make your changes locally.
 78 | 
 79 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox::
 80 | 
 81 |     $ flake8 pic2vec tests
 82 |     $ python setup.py test or py.test
 83 |     $ tox
 84 | 
 85 |    To get flake8 and tox, just pip install them into your virtualenv.
 86 | 
 87 | 6. Commit your changes and push your branch to GitHub::
 88 | 
 89 |     $ git add .
 90 |     $ git commit -m "Your detailed description of your changes."
 91 |     $ git push origin name-of-your-bugfix-or-feature
 92 | 
 93 | 7. Submit a pull request through the GitHub website.
 94 | 
 95 | Pull Request Guidelines
 96 | -----------------------
 97 | 
 98 | Before you submit a pull request, check that it meets these guidelines:
 99 | 
100 | 1. The pull request should include tests.
101 | 2. If the pull request adds functionality, the docs should be updated. Put
102 |    your new functionality into a function with a docstring, and add the
103 |    feature to the list in README.rst.
104 | 3. The pull request should work for Python 2.7, 3.3, 3.4, 3.5, 3.6, and for PyPy. Check
105 |    https://travis-ci.org/datarobot/pic2vec/pull_requests
106 |    and make sure that the tests pass for all supported Python versions.
107 | 
108 | Tips
109 | ----
110 | 
111 | To run tests::
112 | 
113 | $ py.test tests
114 | 


--------------------------------------------------------------------------------
/DRCODEOWNERS:
--------------------------------------------------------------------------------
 1 | # This file defines which domain owns what parts of this repository.
 2 | # This repository is a central place for defining Jenkins Jobs and Jarvis Suites to orchestrate
 3 | # build and test workflows for pull requests and pipelines.  As such, it is expected
 4 | # that this repository will have many owners and some shared areas (reusable macros, etc.)
 5 | #
 6 | # The syntax is the same as defined in
 7 | # https://help.github.com/articles/about-codeowners/
 8 | #
 9 | # Important Rules:
10 | # 1. The last matching pattern in this file takes precedence
11 | #
12 | # 2. Only domains (github team) own code, not individuals
13 | #    see list at https://github.com/orgs/datarobot/teams
14 | #
15 | # Default owners for everything in the repo.
16 | # Unless a later match takes precedence, these groups will be requested for
17 | # review when someone opens a pull request.
18 | /               @datarobot/core-modeling
19 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
  1 | =======
  2 | History
  3 | =======
  4 | 0.101.1 (2019-9-25)
  5 | ------------------
  6 | * Limited Keras version to pre-2.3.0 to fix issues from Keras' breaking changes
  7 | 
  8 | 0.101.0 (2019-3-25)
  9 | ------------------
 10 | * Updated version of Trafaret to a non-beta version
 11 | * Updated keras to 2.2.3 or greater
 12 | * This library upgrade changes prediction consistency across past versions of pic2vec.
 13 |   ResNet50 is the model that has changed the most, due to changes in implementation.
 14 |   Other models have small floating point changes, but still pass np.testing.assert_allclose tests.
 15 | 
 16 | 0.100.1 (2019-3-24)
 17 | ------------------
 18 | * Updated version of Pillow to 5.4.1, in order to support Python 3.7
 19 | * Updated the README
 20 | 
 21 | 0.100.0 (2018-12-10)
 22 | ------------------
 23 | * Added test coverage and increased error checking
 24 | * Changed default csv name
 25 | * Changed `image_column_headers` to `image_columns` everywhere
 26 | * Updated examples
 27 | * Updated version of scipy to 1.1 and numpy to 1.15
 28 | 
 29 | 
 30 | 0.99.2 (2018-08-01)
 31 | ------------------
 32 | * Updated the notebook example
 33 | * Some code cleanup
 34 | 
 35 | 0.99.1 (2018-06-20)
 36 | ------------------
 37 | * Lots of code cleanup
 38 | * Changed new_csv_name argument to new_csv_path everywhere for consistency
 39 | * Removed '_full' from the saved csv_name for the full dataframe. Features-only csv still has
 40 |   '_features_only' in csv name.
 41 | * Added '_featurized_' to saved csv names
 42 | * Removed new_csv_path as argument to functions that do not actually require it
 43 | 
 44 | 0.99.0 (2018-04-02)
 45 | ------------------
 46 | * Added batch processing
 47 | * Made pic2vec more programmatic (removed automatic csv-writing, etc.)
 48 | * Bound keras to <2.1.5 to remove resnet problem
 49 | 
 50 | 0.9.0 (2017-09-24)
 51 | ------------------
 52 | * Fixed Keras backwards compatibility issues (include_top deprecated, require_flatten added)
 53 | * Fixed ResNet50 update issues (removed a zero-padding layer, updated weights)
 54 | 
 55 | 0.8.2 (2017-08-14)
 56 | ------------------
 57 | * Updated trafaret requirement for PyPi package
 58 | * Updated cats vs. dogs example
 59 | 
 60 | 0.8.1 (2017-08-07)
 61 | ------------------
 62 | * Fixed bugs with robust naming
 63 | * Added error message for failed image conversion
 64 | 
 65 | 0.8.0 (2017-08-02)
 66 | ------------------
 67 | * Added robust naming options to the generated csv files
 68 | 
 69 | 0.7.1 (2017-08-02)
 70 | ------------------
 71 | * Fixed PIL truncated image bug
 72 | 
 73 | 0.7.0 (2017-08-02)
 74 | ------------------
 75 | * Fixed bug with CSV badly formed URLs
 76 | * Fixed mistake with InceptionV3 preprocessing happening for every model
 77 | 
 78 | 0.6.3 (2017-07-25)
 79 | ------------------
 80 | * Added Travis and Coveralls for testing and coverage automation
 81 | * Repo went public
 82 | * Python 3.x compatibility
 83 | 
 84 | 0.6.2 (2017-07-14)
 85 | ------------------
 86 | * Fixed image format recognition.
 87 | 
 88 | 0.6.1 (2017-07-12)
 89 | ------------------
 90 | * Directory-only now natural sorted.
 91 | 
 92 | 0.6.0 (2017-07-11)
 93 | ------------------
 94 | * Added multi-column support
 95 | * Added missing image column to csv
 96 | 
 97 | 0.5.0 (2017-07-06)
 98 | ------------------
 99 | * Renamed to pic2vec
100 | * Tests parametrized
101 | 
102 | 0.4.3 (2017-07-03)
103 | ------------------
104 | * Second round of code review- optimized code, better type checking with trafaret
105 | 
106 | 0.4.2 (2017-06-30)
107 | ------------------
108 | * Improved README test examples
109 | 
110 | 0.4.1 (2017-06-30)
111 | ------------------
112 | * Fixed documentation
113 | 
114 | 0.4.0 (2017-06-29)
115 | ------------------
116 | * Added ability to call multiple models, and packaged in SqueezeNet with weights.
117 | 
118 | 0.3.0 (2017-06-26)
119 | ------------------
120 | * Created installation instructions and readme files, ready for prototype distribution
121 | 
122 | 0.2.9(2017-06-25)
123 | ------------------
124 | * Fixed import problem that prevented generated csvs from saving
125 | 
126 | 0.2.8(2017-06-25)
127 | ------------------
128 | * Fixed variable name bugs
129 | 
130 | 0.2.7(2017-06-25)
131 | ------------------
132 | * Changed image_directory_path to the more manageable image_path
133 | * Made testing module and preprocessing module slightly more robust.
134 | 
135 | 0.2.6(2017-06-23)
136 | ------------------
137 | * Added features-only csv test, and got rid of the column headers in the file
138 | * Added Documentation to data featurization modeules
139 | 
140 | 0.2.5(2017-06-23)
141 | ------------------
142 | * 100% test coverage
143 | * Fixed a problem where a combined directory + csv was appending to the wrong
144 |   rows when there was a mismatch between the directory and the csv.
145 | 
146 | 0.2.4(2017-06-22)
147 | ------------------
148 | * Fixed more bugs in build_featurizer
149 | 
150 | 0.2.3(2017-06-22)
151 | ------------------
152 | * Fixed build_featurizer troubles with building new csv paths in current directory
153 | 
154 | 0.2.2(2017-06-22)
155 | ------------------
156 | * Full requirements for keras imported
157 | 
158 | 0.2.1 (2017-06-22)
159 | ------------------
160 | * Bug fixes
161 | 
162 | 0.2.0 (2017-06-22)
163 | ------------------
164 | * Second release on PyPI.
165 | * Install keras with tensorflow backend specifically
166 | 
167 | 0.1.0 (2017-06-14)
168 | ------------------
169 | * First release on PyPI.
170 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | BSD License
 3 | 
 4 | Copyright (c) 2017, Jett Oristaglio
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without modification,
 8 | are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice, this
14 |   list of conditions and the following disclaimer in the documentation and/or
15 |   other materials provided with the distribution.
16 | 
17 | * Neither the name of the copyright holder nor the names of its
18 |   contributors may be used to endorse or promote products derived from this
19 |   software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
30 | OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 
32 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.rst
 2 | include CONTRIBUTING.rst
 3 | include HISTORY.rst
 4 | include LICENSE
 5 | include README.md
 6 | 
 7 | recursive-include tests *
 8 | recursive-exclude * __pycache__
 9 | recursive-exclude * *.py[co]
10 | 
11 | recursive-include pic2vec/saved_models *
12 | 
13 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif *.md
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | define BROWSER_PYSCRIPT
 4 | import os, webbrowser, sys
 5 | try:
 6 | 	from urllib import pathname2url
 7 | except:
 8 | 	from urllib.request import pathname2url
 9 | 
10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
11 | endef
12 | export BROWSER_PYSCRIPT
13 | 
14 | define PRINT_HELP_PYSCRIPT
15 | import re, sys
16 | 
17 | for line in sys.stdin:
18 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
19 | 	if match:
20 | 		target, help = match.groups()
21 | 		print("%-20s %s" % (target, help))
22 | endef
23 | export PRINT_HELP_PYSCRIPT
24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
25 | 
26 | help:
27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
28 | 
29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
30 | 
31 | 
32 | clean-build: ## remove build artifacts
33 | 	rm -fr build/
34 | 	rm -fr dist/
35 | 	rm -fr .eggs/
36 | 	find . -name '*.egg-info' -exec rm -fr {} +
37 | 	find . -name '*.egg' -exec rm -f {} +
38 | 
39 | clean-pyc: ## remove Python file artifacts
40 | 	find . -name '*.pyc' -exec rm -f {} +
41 | 	find . -name '*.pyo' -exec rm -f {} +
42 | 	find . -name '*~' -exec rm -f {} +
43 | 	find . -name '__pycache__' -exec rm -fr {} +
44 | 
45 | clean-test: ## remove test and coverage artifacts
46 | 	rm -fr .tox/
47 | 	rm -f .coverage
48 | 	rm -fr htmlcov/
49 | 
50 | lint: ## check style with flake8
51 | 	flake8 pic2vec tests
52 | 
53 | test: ## run tests quickly with the default Python
54 | 	py.test
55 | 
56 | 
57 | test-all: ## run tests on every Python version with tox
58 | 	tox
59 | 
60 | coverage: ## check code coverage quickly with the default Python
61 | 	coverage run --source pic2vec -m pytest
62 | 	coverage report -m
63 | 	coverage html
64 | 	$(BROWSER) htmlcov/index.html
65 | 
66 | docs: ## generate Sphinx HTML documentation, including API docs
67 | 	rm -f docs/pic2vec.rst
68 | 	rm -f docs/modules.rst
69 | 	sphinx-apidoc -o docs/ pic2vec
70 | 	$(MAKE) -C docs clean
71 | 	$(MAKE) -C docs html
72 | 	$(BROWSER) docs/_build/html/index.html
73 | 
74 | servedocs: docs ## compile the docs watching for changes
75 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
76 | 
77 | release: clean ## package and upload a release
78 | 	python setup.py sdist upload
79 | 	python setup.py bdist_wheel upload
80 | 
81 | dist: clean ## builds source and wheel package
82 | 	python setup.py sdist
83 | 	python setup.py bdist_wheel
84 | 	ls -l dist
85 | 
86 | install: clean ## install the package to the active Python's site-packages
87 | 	python setup.py install
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/datarobot/pic2vec.svg?branch=master)](https://travis-ci.org/datarobot/pic2vec) [![Coverage Status](https://coveralls.io/repos/github/datarobot/pic2vec/badge.svg?branch=master)](https://coveralls.io/github/datarobot/pic2vec?branch=master)
  2 | 
  3 | Pic2Vec
  4 | ================
  5 | 
  6 | Featurize images using a small, contained pre-trained deep learning network
  7 | 
  8 | 
  9 | * Free software: BSD license
 10 | 
 11 | 
 12 | Features
 13 | --------
 14 | 
 15 | This is the prototype for image features engineering. Supports Python 2.7, 3.4, 3.5, 3.6, and 3.7
 16 | 
 17 | ``pic2vec`` is a python package that performs automated feature extraction
 18 | for image data. It supports feature engineering on new image data, and allows
 19 | traditional machine learning algorithms (such as tree-based algorithms) to
 20 | train on image data.
 21 | 
 22 | 
 23 | ## Input Specification
 24 | 
 25 | ### Data Format
 26 | 
 27 | ``pic2vec`` works on image data represented as either:
 28 | 1. A directory of image files.
 29 | 2. As URL pointers contained in a CSV.
 30 | 3. Or as a directory of images with a CSV containing pointers to the image files.
 31 | 
 32 | If no CSV is provided with the directory, it automatically generates a CSV to store the features with the appropriate images.
 33 | 
 34 | Each row of the CSV represents a different image, and image rows can also have columns containing other data about the images as well. Each image's featurized representation will be appended as a series of new columns at the end of the appropriate image row.
 35 | 
 36 | 
 37 | ### Constraints Specification
 38 | The goal of this project was to make the featurizer as easy to use and hard to break as possible. If working properly, it should be resistant to badly-formatted data, such as missing rows or columns in the csv, image mismatches between a CSV and an image directory, and invalid image formats.
 39 | 
 40 | However, for the featurizer to function optimally, it prefers certain constraints:
 41 | * The CSV should have no missing columns or rows, and there should be full overlap between images in the CSV and the image directory
 42 | 
 43 | * If checking predictions on a separate test set (such as on Kaggle), the filesystem needs to sort filepaths consistently with the sorting of the test set labels. The order in the CSV (whether generated automatically or passed in) will be considered the canonical order for the feature vectors.
 44 | 
 45 | The featurizer can only process .png, .jpeg, or .bmp image files. Any other images will be left out of the featurization by being represented by zero vectors in the image batch.
 46 | 
 47 | ## Quick Start
 48 | 
 49 | The following Python code shows a typical usage of `pic2vec`:
 50 | 
 51 | ```python
 52 | from pic2vec import ImageFeaturizer
 53 | 
 54 | image_column_name = 'images'
 55 | my_csv = 'path/to/data.csv'
 56 | my_image_directory = 'path/to/image/directory/'
 57 | 
 58 | my_featurizer = ImageFeaturizer(model='xception', depth=2, autosample=True)
 59 | 
 60 | featurized_df = my_featurizer.featurize(image_column_name, csv_path=my_csv,
 61 |                                         image_path=my_image_directory)
 62 | 
 63 | ```
 64 | 
 65 | ## Examples
 66 | 
 67 | To get started, see the following example:
 68 | 
 69 | 1. [Cats vs. Dogs](examples/Cats_v_Dogs_Test_Example.ipynb): Dataset from combined directory + CSV
 70 | 
 71 | Examples coming soon:
 72 | 2. Hot Dog, Not Hot Dog: Dataset from a CSV with URLs and no image directory
 73 | 
 74 | 
 75 | ## Installation
 76 | 
 77 | See the [Installation Guide](docs/guides/installation.md) for details.
 78 | 
 79 | ### Installing Keras/Tensorflow
 80 | If you run into trouble installing Keras or Tensorflow as a dependency, read the [Keras installation guide](https://keras.io/#installation) and  [Tensorflow installation guide](https://www.tensorflow.org/install/) for details about installing Keras/Tensorflow on your machine.
 81 | 
 82 | 
 83 | ## Using Featurizer Output With DataRobot
 84 | ``pic2vec`` generates a flat CSV which is ready for supervised modeling, if the data has been labelled with a variable that
 85 | can be used as a target. The images are transformed into a set of regular columns containing numeric data.
 86 | Additionally, if unlabelled, it can be used for unsupervised learning (such as anomaly detection).
 87 | 
 88 | 
 89 | ### Running tests
 90 | 
 91 | To run the unit tests with ``pytest``, run
 92 | 
 93 | ```
 94 | py.test tests
 95 | ```
 96 | 
 97 | 
 98 | 
 99 | Credits
100 | ---------
101 | 
102 | This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) project template.
103 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /pic2vec.rst
2 | /pic2vec.*.rst
3 | /modules.rst
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pic2vec.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pic2vec.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pic2vec"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pic2vec"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # image_featurizer documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue Jul  9 22:26:36 2013.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another
 20 | # directory, add these directories to sys.path here. If the directory is
 21 | # relative to the documentation root, use os.path.abspath to make it
 22 | # absolute, like shown here.
 23 | #sys.path.insert(0, os.path.abspath('.'))
 24 | 
 25 | # Get the project root dir, which is the parent dir of this
 26 | cwd = os.getcwd()
 27 | project_root = os.path.dirname(cwd)
 28 | 
 29 | # Insert the project root dir as the first element in the PYTHONPATH.
 30 | # This lets us ensure that the source package is imported, and that its
 31 | # version is used.
 32 | sys.path.insert(0, project_root)
 33 | 
 34 | import image_featurizer
 35 | 
 36 | # -- General configuration ---------------------------------------------
 37 | 
 38 | # If your documentation needs a minimal Sphinx version, state it here.
 39 | #needs_sphinx = '1.0'
 40 | 
 41 | # Add any Sphinx extension module names here, as strings. They can be
 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 43 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
 44 | 
 45 | # Add any paths that contain templates here, relative to this directory.
 46 | templates_path = ['_templates']
 47 | 
 48 | # The suffix of source filenames.
 49 | source_suffix = '.rst'
 50 | 
 51 | # The encoding of source files.
 52 | #source_encoding = 'utf-8-sig'
 53 | 
 54 | # The master toctree document.
 55 | master_doc = 'index'
 56 | 
 57 | # General information about the project.
 58 | project = u'Image Featurizer'
 59 | copyright = u"2017, Jett Oristaglio"
 60 | 
 61 | # The version info for the project you're documenting, acts as replacement
 62 | # for |version| and |release|, also used in various other places throughout
 63 | # the built documents.
 64 | #
 65 | # The short X.Y version.
 66 | version = image_featurizer.__version__
 67 | # The full version, including alpha/beta/rc tags.
 68 | release = image_featurizer.__version__
 69 | 
 70 | # The language for content autogenerated by Sphinx. Refer to documentation
 71 | # for a list of supported languages.
 72 | #language = None
 73 | 
 74 | # There are two options for replacing |today|: either, you set today to
 75 | # some non-false value, then it is used:
 76 | #today = ''
 77 | # Else, today_fmt is used as the format for a strftime call.
 78 | #today_fmt = '%B %d, %Y'
 79 | 
 80 | # List of patterns, relative to source directory, that match files and
 81 | # directories to ignore when looking for source files.
 82 | exclude_patterns = ['_build']
 83 | 
 84 | # The reST default role (used for this markup: `text`) to use for all
 85 | # documents.
 86 | #default_role = None
 87 | 
 88 | # If true, '()' will be appended to :func: etc. cross-reference text.
 89 | #add_function_parentheses = True
 90 | 
 91 | # If true, the current module name will be prepended to all description
 92 | # unit titles (such as .. function::).
 93 | #add_module_names = True
 94 | 
 95 | # If true, sectionauthor and moduleauthor directives will be shown in the
 96 | # output. They are ignored by default.
 97 | #show_authors = False
 98 | 
 99 | # The name of the Pygments (syntax highlighting) style to use.
100 | pygments_style = 'sphinx'
101 | 
102 | # A list of ignored prefixes for module index sorting.
103 | #modindex_common_prefix = []
104 | 
105 | # If true, keep warnings as "system message" paragraphs in the built
106 | # documents.
107 | #keep_warnings = False
108 | 
109 | 
110 | # -- Options for HTML output -------------------------------------------
111 | 
112 | # The theme to use for HTML and HTML Help pages.  See the documentation for
113 | # a list of builtin themes.
114 | html_theme = 'default'
115 | 
116 | # Theme options are theme-specific and customize the look and feel of a
117 | # theme further.  For a list of options available for each theme, see the
118 | # documentation.
119 | #html_theme_options = {}
120 | 
121 | # Add any paths that contain custom themes here, relative to this directory.
122 | #html_theme_path = []
123 | 
124 | # The name for this set of Sphinx documents.  If None, it defaults to
125 | # "<project> v<release> documentation".
126 | #html_title = None
127 | 
128 | # A shorter title for the navigation bar.  Default is the same as
129 | # html_title.
130 | #html_short_title = None
131 | 
132 | # The name of an image file (relative to this directory) to place at the
133 | # top of the sidebar.
134 | #html_logo = None
135 | 
136 | # The name of an image file (within the static path) to use as favicon
137 | # of the docs.  This file should be a Windows icon file (.ico) being
138 | # 16x16 or 32x32 pixels large.
139 | #html_favicon = None
140 | 
141 | # Add any paths that contain custom static files (such as style sheets)
142 | # here, relative to this directory. They are copied after the builtin
143 | # static files, so a file named "default.css" will overwrite the builtin
144 | # "default.css".
145 | html_static_path = ['_static']
146 | 
147 | # If not '', a 'Last updated on:' timestamp is inserted at every page
148 | # bottom, using the given strftime format.
149 | #html_last_updated_fmt = '%b %d, %Y'
150 | 
151 | # If true, SmartyPants will be used to convert quotes and dashes to
152 | # typographically correct entities.
153 | #html_use_smartypants = True
154 | 
155 | # Custom sidebar templates, maps document names to template names.
156 | #html_sidebars = {}
157 | 
158 | # Additional templates that should be rendered to pages, maps page names
159 | # to template names.
160 | #html_additional_pages = {}
161 | 
162 | # If false, no module index is generated.
163 | #html_domain_indices = True
164 | 
165 | # If false, no index is generated.
166 | #html_use_index = True
167 | 
168 | # If true, the index is split into individual pages for each letter.
169 | #html_split_index = False
170 | 
171 | # If true, links to the reST sources are added to the pages.
172 | #html_show_sourcelink = True
173 | 
174 | # If true, "Created using Sphinx" is shown in the HTML footer.
175 | # Default is True.
176 | #html_show_sphinx = True
177 | 
178 | # If true, "(C) Copyright ..." is shown in the HTML footer.
179 | # Default is True.
180 | #html_show_copyright = True
181 | 
182 | # If true, an OpenSearch description file will be output, and all pages
183 | # will contain a <link> tag referring to it.  The value of this option
184 | # must be the base URL from which the finished HTML is served.
185 | #html_use_opensearch = ''
186 | 
187 | # This is the file name suffix for HTML files (e.g. ".xhtml").
188 | #html_file_suffix = None
189 | 
190 | # Output file base name for HTML help builder.
191 | htmlhelp_basename = 'image_featurizerdoc'
192 | 
193 | 
194 | # -- Options for LaTeX output ------------------------------------------
195 | 
196 | latex_elements = {
197 |     # The paper size ('letterpaper' or 'a4paper').
198 |     #'papersize': 'letterpaper',
199 | 
200 |     # The font size ('10pt', '11pt' or '12pt').
201 |     #'pointsize': '10pt',
202 | 
203 |     # Additional stuff for the LaTeX preamble.
204 |     #'preamble': '',
205 | }
206 | 
207 | # Grouping the document tree into LaTeX files. List of tuples
208 | # (source start file, target name, title, author, documentclass
209 | # [howto/manual]).
210 | latex_documents = [
211 |     ('index', 'image_featurizer.tex',
212 |      u'Image Featurizer Documentation',
213 |      u'Jett Oristaglio', 'manual'),
214 | ]
215 | 
216 | # The name of an image file (relative to this directory) to place at
217 | # the top of the title page.
218 | #latex_logo = None
219 | 
220 | # For "manual" documents, if this is true, then toplevel headings
221 | # are parts, not chapters.
222 | #latex_use_parts = False
223 | 
224 | # If true, show page references after internal links.
225 | #latex_show_pagerefs = False
226 | 
227 | # If true, show URL addresses after external links.
228 | #latex_show_urls = False
229 | 
230 | # Documents to append as an appendix to all manuals.
231 | #latex_appendices = []
232 | 
233 | # If false, no module index is generated.
234 | #latex_domain_indices = True
235 | 
236 | 
237 | # -- Options for manual page output ------------------------------------
238 | 
239 | # One entry per manual page. List of tuples
240 | # (source start file, name, description, authors, manual section).
241 | man_pages = [
242 |     ('index', 'image_featurizer',
243 |      u'Image Featurizer Documentation',
244 |      [u'Jett Oristaglio'], 1)
245 | ]
246 | 
247 | # If true, show URL addresses after external links.
248 | #man_show_urls = False
249 | 
250 | 
251 | # -- Options for Texinfo output ----------------------------------------
252 | 
253 | # Grouping the document tree into Texinfo files. List of tuples
254 | # (source start file, target name, title, author,
255 | #  dir menu entry, description, category)
256 | texinfo_documents = [
257 |     ('index', 'image_featurizer',
258 |      u'Image Featurizer Documentation',
259 |      u'Jett Oristaglio',
260 |      'image_featurizer',
261 |      'One line description of project.',
262 |      'Miscellaneous'),
263 | ]
264 | 
265 | # Documents to append as an appendix to all manuals.
266 | #texinfo_appendices = []
267 | 
268 | # If false, no module index is generated.
269 | #texinfo_domain_indices = True
270 | 
271 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
272 | #texinfo_show_urls = 'footnote'
273 | 
274 | # If true, do not generate a @detailmenu in the "Top" node's menu.
275 | #texinfo_no_detailmenu = False
276 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/guides/installation.md:
--------------------------------------------------------------------------------
  1 | Installation:
  2 | ============
  3 | 
  4 | 
  5 | 1: Setting Up The Virtual Environment
  6 | ---------------------------------
  7 | 
  8 | ### VirtualEnv
  9 | To install virtualenv, follow this guide: [virtualenv installation guide](http://sourabhbajaj.com/mac-setup/Python/virtualenv.html)
 10 | 
 11 | Once virtualenv is installed, create a new environment to run pic2vec:
 12 | 
 13 | ```bash
 14 |     $ virtualenv pic2vec
 15 | ```
 16 | Then activate the environment:
 17 | 
 18 | ```bash
 19 |     $ source pic2vec/bin/activate
 20 | ```
 21 | 
 22 | ### Conda
 23 | To install Anaconda, follow this guide: [Anaconda installation guide](https://docs.continuum.io/anaconda/install)
 24 | 
 25 | Once Anaconda is installed, create a new environment to run pic2vec:
 26 | 
 27 | ```bash
 28 |     $ conda create --name pic2vec
 29 | ```
 30 | 
 31 | When Conda asks for confirmation, type 'y' for 'yes'.
 32 | 
 33 | To activate the environment on OS X or Linux:
 34 | 
 35 | ```bash
 36 |     $ source activate pic2vec
 37 | ```
 38 | 
 39 | To activate the environment on Windows:
 40 | ```bash
 41 |     $ activate pic2vec
 42 | ```
 43 | 
 44 | 
 45 | Once in a virtual environment, there are several ways to install the
 46 | pic2vec package.
 47 | 
 48 | 
 49 | 
 50 | 2: Installing The Pic2Vec Package
 51 | -------------------
 52 | 
 53 | ### Pip Installation
 54 | To install pic2vec through pip on OS X or Linux, run this command in your terminal:
 55 | 
 56 | ```bash
 57 |     $ pip install pic2vec
 58 | ```
 59 | To install through pip on Windows, run this command in terminal:
 60 | 
 61 | ```bash
 62 |     $ python -m pip install pic2vec
 63 | ```
 64 | 
 65 | This is the preferred method to install pic2vec, as it will always install the most recent stable release.
 66 | 
 67 | If you don't have [pip](https://pip.pypa.io) installed, this [Python installation guide](http://docs.python-guide.org/en/latest/starting/installation/) can guide you through the process.
 68 | 
 69 | 
 70 | ### Installing From setup.py
 71 | The sources for pic2vec can be downloaded from the [Github repo](https://github.com/datarobot/pic2vec).
 72 | 
 73 | You can either clone the public repository:
 74 | 
 75 | ```bash
 76 |     $ git clone git@github.com:datarobot/pic2vec.git
 77 | ```
 78 | Or download the [tarball](https://github.com/datarobot/pic2vec/tarball/master):
 79 | 
 80 | ```bash
 81 |     $ curl  -OL https://github.com/datarobot/pic2vec/tarball/master
 82 | ```
 83 | 
 84 | Once you have a copy of the source, you can build a binary distribution and install it from inside the directory with:
 85 | 
 86 | ```bash
 87 |     $ python setup.py bdist_wheel
 88 |     $ cd dist/
 89 |     $ pip install pic2vec-{VERSION}-py2.py3-none-any.whl
 90 | ```
 91 | Check the dist folder to see what you need to fill in for the {VERSION} section.
 92 | 
 93 | 3: Troubleshooting
 94 | ---------------
 95 | 
 96 | 1. If you see an error similar to `TypeError: find_packages() got an unexpected
 97 | keyword argument 'include'` then you need to upgrade your setuptools.
 98 | 
 99 | ```bash
100 | pip install -U setuptools
101 | ```
102 | 
103 | 2. If you see an error similar to `No local packages or working download links
104 | found for tensorflow`  then you need to upgrade your pip.
105 | 
106 | ```bash
107 | pip install -U pip
108 | ```
109 | 
110 | 3. If you have problems with tests or strange runtime exceptions, make sure
111 | your Keras installation isn't configured for Theano use. Open `~/.keras/keras.json`
112 | and check that the `backend` parameter value is set to `tensorflow`. If it is `theano` -
113 | simply remove that file, and on next execution Keras will find your Tensorflow install and create the correct configuration file.
114 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../HISTORY.rst
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to Image Featurizer's documentation!
 2 | ======================================
 3 | 
 4 | Contents:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    readme
10 |    installation
11 |    usage
12 |    modules
13 |    contributing
14 |    authors
15 |    history
16 | 
17 | Indices and tables
18 | ==================
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pic2vec.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pic2vec.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/docs/markdowns/parameters.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/docs/markdowns/parameters.md


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.md
2 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | =====
2 | Usage
3 | =====
4 | 
5 | To use pic2vec in a project::
6 | 
7 |     import pic2vec
8 | 


--------------------------------------------------------------------------------
/pic2vec/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """Top-level package for Pic2Vec."""
 4 | 
 5 | __author__ = """Jett Oristaglio"""
 6 | __email__ = 'jettori88@gmail.com'
 7 | __version__ = '0.1.0'
 8 | 
 9 | from pic2vec.build_featurizer import (_decapitate_model, _find_pooling_constant,  # NOQA
10 |                                                _splice_layer, _downsample_model_features,
11 |                                                _initialize_model, _check_downsampling_mismatch,
12 |                                                build_featurizer)
13 | 
14 | from pic2vec.feature_preprocessing import (_create_df_with_image_paths,  # NOQA
15 |                                                     _find_directory_image_paths,
16 |                                                     _find_csv_image_paths,
17 |                                                     _find_combined_image_paths,
18 |                                                     _image_paths_finder, _convert_single_image,
19 |                                                     preprocess_data)
20 | 
21 | from pic2vec.data_featurizing import featurize_data, create_features # NOQA
22 | 
23 | from pic2vec.squeezenet import SqueezeNet  # NOQA
24 | 
25 | from pic2vec.image_featurizer import ImageFeaturizer  # NOQA
26 | 


--------------------------------------------------------------------------------
/pic2vec/build_featurizer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file deals with building the actual featurizer:
  3 | 1. Initializing the InceptionV3 model
  4 | 2. Decapitating it to the appropriate depth
  5 | 3. Downsampling, if desired
  6 | 
  7 | The integrated function is the build_featurizer function, which takes the depth,
  8 | a flag signalling downsampling, and the number of features to downsample to.
  9 | """
 10 | 
 11 | import logging
 12 | import os
 13 | import warnings
 14 | 
 15 | import trafaret as t
 16 | from keras.applications import InceptionV3, ResNet50, VGG16, VGG19, Xception
 17 | from keras.engine.topology import InputLayer
 18 | from keras.layers import GlobalAvgPool2D, Lambda, average
 19 | from keras.models import Model
 20 | import keras.backend as K
 21 | 
 22 | from .squeezenet import SqueezeNet
 23 | 
 24 | if K.backend() != 'tensorflow':
 25 |     logging.warn('Without a tensorflow backend, SqueezeNet and Xception will not be '
 26 |                  ' available. Please initialize ImageFeaturizer with either vgg16, vgg19, '
 27 |                  'resnet50, or inceptionv3.')
 28 | 
 29 | supported_model_types = {
 30 |     'squeezenet': {
 31 |         'label': 'SqueezeNet',
 32 |         'class': SqueezeNet,
 33 |         'kwargs': {'weights': None},
 34 |         'depth': {1: 5, 2: 12, 3: 19, 4: 26}
 35 |     },
 36 |     'inceptionv3': {
 37 |         'label': 'InceptionV3',
 38 |         'class': InceptionV3,
 39 |         'kwargs': {},
 40 |         'depth': {1: 2, 2: 19, 3: 33, 4: 50}
 41 |     },
 42 |     'vgg16': {
 43 |         'label': 'VGG16',
 44 |         'class': VGG16,
 45 |         'kwargs': {},
 46 |         'depth': {1: 1, 2: 2, 3: 4, 4: 8}
 47 |     },
 48 |     'vgg19': {
 49 |         'label': 'VGG19',
 50 |         'class': VGG19,
 51 |         'kwargs': {},
 52 |         'depth': {1: 1, 2: 2, 3: 4, 4: 9}
 53 |     },
 54 |     'resnet50': {
 55 |         'label': 'ResNet50',
 56 |         'class': ResNet50,
 57 |         'kwargs': {},
 58 |         'depth': {1: 2, 2: 5, 3: 13, 4: 23}
 59 |     },
 60 |     'xception': {
 61 |         'label': 'Xception',
 62 |         'class': Xception,
 63 |         'kwargs': {},
 64 |         'depth': {1: 1, 2: 8, 3: 18, 4: 28}
 65 |     }
 66 | }
 67 | 
 68 | 
 69 | @t.guard(model_str=t.Enum(*supported_model_types.keys()),
 70 |          loaded_weights=t.String(allow_blank=True))
 71 | def _initialize_model(model_str, loaded_weights=''):
 72 |     """
 73 |     Initialize the InceptionV3 model with the saved weights, or
 74 |     if the weight file can't be found, load them automatically through Keras.
 75 | 
 76 |     Parameters:
 77 |     ----------
 78 |         model_str : str
 79 |             String deciding which model to use for the featurizer
 80 | 
 81 |     Returns:
 82 |     -------
 83 |         model : keras.models.Model
 84 |             The initialized model loaded with pre-trained weights
 85 |     """
 86 |     logging.info('Loading/downloading {model_label} model weights. '
 87 |                  'This may take a minute first time.'
 88 |                  .format(model_label=supported_model_types[model_str]['label']))
 89 | 
 90 |     if loaded_weights != '':
 91 |         model = supported_model_types[model_str]['class'](weights=None)
 92 |         try:
 93 |             model.load_weights(loaded_weights)
 94 |         except IOError as err:
 95 |             logging.error('Problem loading the custom weights. If not an advanced user, please '
 96 |                           'leave loaded_weights unconfigured.')
 97 |             raise err
 98 |     else:
 99 |         model = supported_model_types[model_str]['class'](**supported_model_types
100 |                                                           [model_str]['kwargs'])
101 | 
102 |         if model_str == 'squeezenet':
103 |             # Special case for squeezenet - we already have weights for it
104 |             this_dir, this_filename = os.path.split(__file__)
105 |             model_path = os.path.join(this_dir,
106 |                                       'saved_models',
107 |                                       'squeezenet_weights_tf_dim_ordering_tf_kernels.h5')
108 |             if not os.path.isfile(model_path):
109 |                 raise ValueError('Could not find the weights. Download another model'
110 |                                  ' or replace the SqueezeNet weights in the model folder.')
111 |             model.load_weights(model_path)
112 | 
113 |     logging.info('Model successfully initialized.')
114 |     return model
115 | 
116 | 
117 | @t.guard(model=t.Type(Model), depth=t.Int(gte=1))
118 | def _decapitate_model(model, depth):
119 |     """
120 |     Cut off end layers of a model equal to the depth of the desired outputs,
121 |     and then remove the links connecting the new outer layer to the old ones.
122 | 
123 |     Parameters:
124 |     ----------
125 |     model: keras.models.Model
126 |         The model being decapitated. Note: original model is not changed, method returns new model.
127 |     depth: int
128 |         The number of layers to pop off the top of the network
129 | 
130 |     Returns:
131 |     -------
132 |     model: keras.models.Model
133 |         Decapitated model.
134 |     """
135 |     # -------------- #
136 |     # ERROR CHECKING #
137 | 
138 |     # Make sure the depth isn't greater than the number of layers (minus input)
139 |     if depth >= len(model.layers) - 1:
140 |         raise ValueError('Can\'t go deeper than the number of layers in the model. Tried to pop '
141 |                          '{} layers, but model only has {}'.format(depth, len(model.layers) - 1))
142 | 
143 |     if not isinstance(model.layers[0], InputLayer):
144 |         warnings.warn('First layer of the model is not an input layer. Beware of depth issues.')
145 |     # -------------------------------------------------------- #
146 | 
147 |     # Get the intermediate output
148 |     new_model_output = model.layers[(depth + 1) * -1].output
149 |     new_model = Model(inputs=model.input, outputs=new_model_output)
150 |     new_model.layers[-1].outbound_nodes = []
151 |     return new_model
152 | 
153 | 
154 | @t.guard(features=t.Any(), num_pooled_features=t.Int(gte=1))
155 | def _find_pooling_constant(features, num_pooled_features):
156 |     """
157 |     Given a tensor and an integer divisor for the desired downsampled features,
158 |     this will downsample the tensor to the desired number of features
159 | 
160 |     Parameters:
161 |     ----------
162 |     features : Tensor
163 |         the layer output being downsampled
164 |     num_pooled_features : int
165 |         the desired number of features to downsample to
166 | 
167 |     Returns:
168 |     -------
169 |     int
170 |         the integer pooling constant required to correctly splice the layer output for downsampling
171 |     """
172 |     # Initializing the outputs
173 |     num_features = features.shape[-1].__int__()
174 | 
175 |     # Find the pooling constant
176 |     pooling_constant = num_features / float(num_pooled_features)
177 | 
178 |     # -------------- #
179 |     # ERROR CHECKING #
180 | 
181 |     if pooling_constant < 1:
182 |         raise ValueError(
183 |             'You can\'t downsample to a number bigger than the original feature space.')
184 | 
185 |     # Check that the number of downsampled features is an integer divisor of the original output
186 |     if not pooling_constant.is_integer():
187 |         # Store recommended downsample
188 |         recommended_downsample = num_features / int(pooling_constant)
189 |         raise ValueError('Trying to downsample features to non-integer divisor: '
190 |                          'from {} to {}.\n\n Did you mean to downsample to'
191 |                          ' {}? Regardless, please choose an integer divisor.'
192 |                          .format(num_features, num_pooled_features, recommended_downsample))
193 |     # -------------------------------------------------------- #
194 |     # Cast the pooling constant back to an int from a float if it passes the tests
195 |     return int(pooling_constant)
196 | 
197 | 
198 | @t.guard(tensor=t.Any(), number_splices=t.Int(gte=1))
199 | def _splice_layer(tensor, number_splices):
200 |     """
201 |     Splice a layer into a number of even slices through skipping. This downsamples the layer,
202 |     and allows for operations to be performed over neighbors.
203 | 
204 |     Parameters:
205 |     ----------
206 |     layer: Tensor
207 |         the layer output being spliced
208 |     number_splices: int
209 |         the number of new layers the original layer is being spliced into.
210 |         NOTE: must be integer divisor of layer
211 | 
212 |     Returns:
213 |     -------
214 |     list_of_spliced_layers : list of Tensor
215 |         a list of the spliced tensor sections of the original layer, with neighboring nodes
216 |         occupying the same indices across splices
217 |     """
218 |     # -------------- #
219 |     # ERROR CHECKING #
220 |     # Need to check that the number of splices is an integer divisor of the feature
221 |     # size of the layer
222 |     num_features = tensor.shape[-1].__int__()
223 |     if num_features % number_splices:
224 |         raise ValueError('Number of splices needs to be an integer divisor of'
225 |                          ' the number of features. Tried to split {} features into'
226 |                          ' {} equal parts.'.format(num_features, number_splices))
227 |     # ------------------------------------------ #
228 |     # Split the tensor into equal parts by skipping nodes equal to the number
229 |     # of splices. This allows for merge operations over neighbor features
230 | 
231 |     return [Lambda(lambda features: features[:, i::number_splices])(tensor) for i in
232 |             range(number_splices)]
233 | 
234 | 
235 | @t.guard(features=t.Any(), num_pooled_features=t.Int(gte=1))
236 | def _downsample_model_features(features, num_pooled_features):
237 |     """
238 |     Take in a layer of a model, and downsample the layer to a specified size.
239 | 
240 |     Parameters:
241 |     ----------
242 |     features : Tensor
243 |         the final layer output being downsampled
244 |     num_pooled_features : int
245 |         the desired number of features to downsample to
246 | 
247 |     Returns:
248 |     -------
249 |     downsampled_features : Tensor
250 |         a tensor containing the downsampled features with size = (?, num_pooled_features)
251 |     """
252 |     # Find the pooling constant needed
253 |     pooling_constant = _find_pooling_constant(features, num_pooled_features)
254 |     # Splice the top layer into n layers, where n = pooling constant.
255 |     list_of_spliced_layers = _splice_layer(features, pooling_constant)
256 |     # Average the spliced layers to downsample
257 |     downsampled_features = average(list_of_spliced_layers)
258 |     return downsampled_features
259 | 
260 | 
261 | def _check_downsampling_mismatch(downsample, num_pooled_features, output_layer_size):
262 |     """
263 |     If downsample is flagged True, but no downsampling size is given, then automatically
264 |     downsample model. If downsample flagged false, but there is a size given, set downsample
265 |     to true.
266 | 
267 |     Parameters:
268 |     ----------
269 |     downsample : bool
270 |         Boolean flagging whether model is being downsampled
271 |     num_pooled_features : int
272 |         the desired number of features to downsample to
273 |     output_layer_size : int
274 |         number of nodes in the output layer being downsampled
275 |     Returns:
276 |     -------
277 |     downsample : boolean
278 |         Updated boolean flagging whether model is being downsampled
279 |     num_pooled_features : int
280 |         Updated number of features model output is being downsample to
281 |     """
282 |     # If num_pooled_features left uninitialized, and they want to downsample,
283 |     # perform automatic downsampling
284 |     if num_pooled_features == 0 and downsample:
285 |         if output_layer_size % 2 == 0:
286 |             num_pooled_features = output_layer_size // 2
287 |             logging.warning('Automatic downsampling to {}. If you would like to set custom '
288 |                             'downsampling, pass in an integer divisor of {} to '
289 |                             'num_pooled_features.'.format(num_pooled_features, output_layer_size))
290 |         else:
291 |             raise ValueError('Sorry, no automatic downsampling available for this model.')
292 | 
293 |     # If they have initialized num_pooled_features, but not turned on
294 |     # downsampling, downsample to what they entered
295 |     elif num_pooled_features != 0 and not downsample:
296 |         logging.info('Downsampling to {}.'.format(num_pooled_features))
297 |         downsample = True
298 | 
299 |     return downsample, num_pooled_features
300 | 
301 | 
302 | @t.guard(depth_of_featurizer=t.Int(gte=1, lte=4),
303 |          downsample=t.Bool,
304 |          num_pooled_features=t.Int(gte=0),
305 |          model_str=t.Enum(*supported_model_types.keys()),
306 |          loaded_model=t.Type(Model) | t.Null)
307 | def build_featurizer(depth_of_featurizer, downsample, num_pooled_features=0,
308 |                      model_str='squeezenet', loaded_model=None):
309 |     """
310 |     Create the full featurizer.
311 | 
312 |     Initialize the model, decapitate it to the appropriate depth, and check if downsampling
313 |     top-layer featurization. If so, downsample to the desired feature space
314 | 
315 |     Parameters:
316 |     ----------
317 |     depth_of_featurizer : int
318 |         How deep to cut the network. Can be 1, 2, 3, or 4.
319 |     downsample : bool
320 |         Boolean flagging whether to perform downsampling
321 |     num_pooled_features : int
322 |         If we downsample, integer determining how small to downsample.
323 |         NOTE: Must be integer divisor of original number of features
324 |         or 0 if we don't want to specify exact number
325 |     model_str : str
326 |         String deciding which model to use for the featurizer
327 |     loaded_model : keras.models.Model, optional
328 |         If specified - use the model for featurizing, istead of creating new one.
329 | 
330 |     Returns:
331 |     -------
332 |     model: keras.models.Model
333 |         The decapitated, potentially downsampled, pre-trained image featurizer.
334 |         With no downsampling, the output features are equal to the top densely-
335 |         connected layer of the network, which depends on the depth of the model.
336 |         With downsampling, the output is equal to a downsampled average of
337 |         multiple splices of the last densely connected layer.
338 |     """
339 |     # BUILDING INITIAL MODEL #
340 |     if loaded_model is not None:
341 |         model = loaded_model
342 |     else:
343 |         model = _initialize_model(model_str=model_str)
344 | 
345 |     # DECAPITATING MODEL #
346 |     # Find the right depth from the dictionary and decapitate the model
347 |     model = _decapitate_model(model, supported_model_types[model_str]['depth'][depth_of_featurizer])
348 |     model_output = model.layers[-1].output
349 |     # Add pooling layer to the top of the now-decapitated model as the featurizer,
350 |     # if it needs to be downsampled
351 |     if len(model.layers[-1].output_shape) > 2:
352 |         model_output = GlobalAvgPool2D(name='featurizer')(model_output)
353 | 
354 |     # Save the model output
355 |     num_output_features = model_output.shape[-1].__int__()
356 |     logging.info("Model decapitated.")
357 | 
358 |     # DOWNSAMPLING FEATURES #
359 |     # Checking that the user's downsampling flag matches the initialization of the downsampling
360 |     (downsample, num_pooled_features) = _check_downsampling_mismatch(downsample,
361 |                                                                      num_pooled_features,
362 |                                                                      num_output_features)
363 | 
364 |     # If we are downsampling the features, we add a pooling layer to the outputs
365 |     # to bring it to the correct size.
366 |     if downsample:
367 |         model_output = _downsample_model_features(model_output, num_pooled_features)
368 |     logging.info("Model downsampled.")
369 | 
370 |     # Finally save the model
371 |     model = Model(inputs=model.input, outputs=model_output)
372 |     logging.info("Full featurizer is built.")
373 |     if downsample:
374 |         logging.info("Final layer feature space downsampled to {}".format(num_pooled_features))
375 |     else:
376 |         logging.info("No downsampling. Final layer feature space has size {}"
377 |                      .format(num_output_features))
378 | 
379 |     return model
380 | 


--------------------------------------------------------------------------------
/pic2vec/data_featurizing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file deals with featurizing the data once the featurizer has been built and the data has been
  3 | loaded and vectorized.
  4 | 
  5 | It allows users to featurize the data with model.predict. It also lets the featurizer write the
  6 | featurized data to the csv containing the images, appending the features to additional columns
  7 | in-line with each image row. Also adds "image_missing" columns automatically for each image_column
  8 | which contains binary values of whether the image in that row is missing.
  9 | """
 10 | 
 11 | import logging
 12 | 
 13 | import trafaret as t
 14 | import numpy as np
 15 | import pandas as pd
 16 | 
 17 | from keras.models import Model
 18 | 
 19 | 
 20 | @t.guard(model=t.Type(Model), array=t.Type(np.ndarray))
 21 | def featurize_data(model, array):
 22 |     """
 23 |     Given a model and an array, perform error checking and return the prediction
 24 |     of the full feature array.
 25 | 
 26 |     Parameters:
 27 |     ----------
 28 |         model : keras.models.Model
 29 |             The featurizer model performing predictions
 30 | 
 31 |         array : np.ndarray
 32 |             The vectorized array of images being converted into features
 33 | 
 34 |     Returns:
 35 |     --------
 36 |         full_feature_array : np.ndarray
 37 |             A numpy array containing the featurized images
 38 | 
 39 |     """
 40 |     # Raise error if the array has the wrong shape
 41 |     if len(array.shape) != 4:
 42 |         raise ValueError('Image array must be a 4D tensor, with dimensions: '
 43 |                          '[batch, height, width, channel]')
 44 | 
 45 |     # Perform predictions
 46 |     logging.info('Creating feature array.')
 47 | 
 48 |     # NOTE: No clue why this is here, it's to make the models note break due to
 49 |     # Keras update: https://github.com/keras-team/keras/issues/9394
 50 |     model.compile('sgd', 'mse')
 51 |     full_feature_array = model.predict(array, verbose=1)
 52 | 
 53 |     # Return features
 54 |     logging.info('Feature array created successfully.')
 55 |     return full_feature_array
 56 | 
 57 | 
 58 | def _create_features_df_helper(data_array, full_feature_array, image_column_header):
 59 |     # Log how many photos are missing or blank:
 60 |     zeros_index = [np.count_nonzero(array_slice) == 0 for array_slice in data_array[:]]
 61 |     logging.info('Number of missing photos: {}'.format(len(zeros_index)))
 62 | 
 63 |     # Create column headers for features, and the features dataframe
 64 |     array_column_headers = ['{}_feat_{}'.format(image_column_header, feature) for feature in
 65 |                             range(full_feature_array.shape[1])]
 66 | 
 67 |     df_features = pd.DataFrame(data=full_feature_array, columns=array_column_headers)
 68 | 
 69 |     # Create the missing column
 70 |     missing_column_header = ['{}_missing'.format(image_column_header)]
 71 |     df_missing = pd.DataFrame(data=zeros_index, columns=missing_column_header)
 72 | 
 73 |     # Create the full combined csv+features dataframe
 74 |     df_features_full = pd.concat([df_missing, df_features], axis=1)
 75 | 
 76 |     return df_features_full
 77 | 
 78 | 
 79 | def create_features(data_array, new_feature_array, image_column_header):
 80 |     """
 81 |     Create features dataframe, and append the features to the appropriate
 82 |     rows of the original dataframe.
 83 | 
 84 |     Parameters:
 85 |     -----------
 86 |         data_array : np.ndarray
 87 |             The images contained in a single 2D array. Used to track missing images.
 88 | 
 89 |         new_feature_array : np.ndarray
 90 |             The array of generated features
 91 | 
 92 |         image_column_header : str
 93 |             String containing the name of the image column
 94 | 
 95 |     Returns:
 96 |     --------
 97 |         df_features : pandas.DataFrame
 98 |             The full dataframe containing the features appended to the dataframe of the images
 99 |     """
100 | 
101 |     # -------------- #
102 |     # ERROR CHECKING #
103 |     # Raise error if the data array has the wrong shape
104 |     if len(data_array.shape) != 4:
105 |         raise ValueError('Data array must be 4D array, with shape: [batch, height, width, channel].'
106 |                          ' Gave feature array of shape: {}'.format(data_array.shape))
107 | 
108 |     # Raise error if the feature array has the wrong shape
109 |     if len(new_feature_array.shape) != 2:
110 |         raise ValueError('Feature array must be 2D array, with shape: [batch, num_features]. '
111 |                          'Gave feature array of shape: {}'.format(new_feature_array.shape))
112 |     # --------------------------------------- #
113 | 
114 |     logging.info('Combining image features with original dataframe.')
115 | 
116 |     df_features = _create_features_df_helper(data_array, new_feature_array,
117 |                                              image_column_header)
118 | 
119 |     # Return the full combined dataframe
120 |     return df_features
121 | 


--------------------------------------------------------------------------------
/pic2vec/enums.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file contains a list of enums that are used across the entire pic2vec
 3 | package.
 4 | """
 5 | # List of models supported in pic2vec
 6 | MODELS = ['squeezenet', 'vgg16', 'vgg19', 'resnet50', 'inceptionv3', 'xception']
 7 | 
 8 | # Tolerance for prediction error
 9 | ATOL = 0.00001
10 | 


--------------------------------------------------------------------------------
/pic2vec/feature_preprocessing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file deals with preprocessing the images for the featurizer.
  3 | 
  4 | It gives the user 3 options:
  5 | 1. Upload a CSV with URL pointers.
  6 | 2. Upload an image directory with no CSV. The featurizer will generate a CSV automatically.
  7 | 3. Upload a CSV with an image directory. The CSV will contain pointers to image in the directory.
  8 | 
  9 | The integrated function is the preprocess_data function, which takes in the input and
 10 | generates a 4D tensor containing the vectorized representations of the image to be featurized.
 11 | """
 12 | 
 13 | from PIL import Image, ImageFile
 14 | import logging
 15 | import os
 16 | try:
 17 |     from urllib import urlretrieve
 18 | except ImportError:
 19 |     from urllib.request import urlretrieve
 20 | import re
 21 | 
 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 23 | Image.DEBUG = 0
 24 | 
 25 | import numpy as np  # noqa: E402
 26 | import pandas as pd  # noqa: E402
 27 | import trafaret as t  # noqa: E402
 28 | import keras.applications as ka  # noqa: E402
 29 | from keras.preprocessing.image import load_img, img_to_array  # noqa: E402
 30 | 
 31 | ##############################################
 32 | # FUNCTIONS FOR BUILDING LIST OF IMAGE PATHS #
 33 | ##############################################
 34 | 
 35 | # Dictionary for preprocessing algorithms
 36 | # Unnecessary 'size' entry, but leaving in case of future use...
 37 | preprocessing_dict = {
 38 |     'squeezenet': {
 39 |         'preprocess': ka.imagenet_utils.preprocess_input,
 40 |         'size': (227, 227)
 41 |     },
 42 |     'vgg16': {
 43 |         'preprocess': ka.vgg16.preprocess_input,
 44 |         'size': (224, 224)
 45 |     },
 46 |     'vgg19': {
 47 |         'preprocess': ka.vgg19.preprocess_input,
 48 |         'size': (224, 224)
 49 |     },
 50 |     'resnet50': {
 51 |         'preprocess': ka.resnet50.preprocess_input,
 52 |         'size': (224, 224)
 53 |     },
 54 |     'inceptionv3': {
 55 |         'preprocess': ka.inception_v3.preprocess_input,
 56 |         'size': (299, 299)
 57 |     },
 58 | 
 59 |     'xception': {
 60 |         'preprocess': ka.xception.preprocess_input,
 61 |         'size': (299, 299)
 62 |     },
 63 | }
 64 | 
 65 | 
 66 | def _create_df_with_image_paths(list_of_images, image_column_header):
 67 |     """
 68 |     Take in a list of image names, and return a DataFrame where each
 69 |     image name is a new row.
 70 | 
 71 |     Parameters:
 72 |     ----------
 73 |         list_of_images: list of str
 74 |             Full paths to images in a directory
 75 | 
 76 |         image_column_header : str
 77 |             The name of the header for the column of image paths
 78 | 
 79 |     Returns:
 80 |     -------
 81 |         df : pandas.DataFrame
 82 |             The dataframe containing the full list of image names.
 83 | 
 84 |     """
 85 |     df = pd.DataFrame(list_of_images, columns=[image_column_header])
 86 |     return df
 87 | 
 88 | 
 89 | def natural_key(string_):
 90 |     """See http://www.codinghorror.com/blog/archives/001018.html"""
 91 |     return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)]
 92 | 
 93 | 
 94 | def _find_directory_image_paths(image_directory):
 95 |     """
 96 |     Take in a directory and parse which files in it are valid images for
 97 |     loading into the featurizer.
 98 | 
 99 |     List ordering explanation for directory-only featurization:
100 |         The list will be sorted in order to create a deterministic file order for
101 |         the generated csv, regardless of filesystem ordering. The CSV will then be
102 |         used as the canonical order for all data preprocessing, featurizing, and
103 |         eventually writing the features back into the csv.
104 | 
105 |     Parameters:
106 |     ----------
107 |         image_directory : str
108 |             The filepath to the directory containing the images
109 | 
110 |     Returns:
111 |     -------
112 |         list_of_images : list of str
113 |             A sorted list of full paths to each valid image contained in the directory
114 | 
115 |     """
116 |     image_list = os.listdir(image_directory)
117 | 
118 |     valid = ['JPEG', 'BMP', 'PNG']
119 |     list_of_images = []
120 | 
121 |     for fichier in image_list:
122 |         try:
123 |             if Image.open(image_directory + fichier).format in valid:
124 |                 list_of_images.append(fichier)
125 |                 Image.close()
126 |         except Exception:
127 |             pass
128 | 
129 |     return sorted(list_of_images, key=natural_key)
130 | 
131 | 
132 | def _find_csv_image_paths(csv_path, image_column_header):
133 |     """
134 |     Find the image paths in a csv without an image directory.
135 | 
136 |     List ordering explanation for csv-included featurization:
137 |         The list does not need to be sorted, as it is already in a set order in the csv.
138 |         The csv will be used as the canonical order for all data preprocessing,
139 |         featurizing, and eventually writing the features back into the csv.
140 | 
141 | 
142 |     Parameters:
143 |     ----------
144 |         csv_path : str
145 |             Full path to the csv
146 | 
147 |         image_column_header : str
148 |             Name of the column containing the image paths
149 | 
150 |     Returns:
151 |     -------
152 |         list_of_images: list of str
153 |             Full paths to each valid image contained in the csv
154 | 
155 |     """
156 |     # Create the dataframe from the csv
157 |     df = pd.read_csv(csv_path, error_bad_lines=False)
158 | 
159 |     # -------------- #
160 |     # ERROR CHECKING #
161 |     # Raise an error if the image column header isn't in the dataframe
162 |     if image_column_header not in df.columns:
163 |         raise ValueError('image_column_header error: {} does not exist as a '
164 |                          'column in the csv file.'.format(image_column_header))
165 |     # -------------- #
166 | 
167 |     # Create the list of image paths from the column in the dataframe
168 |     list_of_images = df[image_column_header].tolist()
169 | 
170 |     return list_of_images, df
171 | 
172 | 
173 | def _find_combined_image_paths(image_path, csv_path, image_column_header):
174 |     """
175 |     Find the image paths of a csv combined with a directory: take only the overlap
176 |     to avoid errors.
177 | 
178 |     List ordering explanation for csv-included featurization:
179 |         See docstring for _find_csv_image_paths() method.
180 | 
181 |     Parameters:
182 |     ----------
183 |         image_path : str
184 |             Full path to the provided image directory
185 | 
186 |         csv_path : str
187 |             Full path to the provided csv
188 | 
189 |         image_column_header : str
190 |             Name of the column in the csv containing image paths
191 | 
192 |     Returns:
193 |     -------
194 |         list_of_images: list of str
195 |             Full paths to each valid image contained in both the csv and directory
196 | 
197 |     """
198 |     # Find the list of image paths in the csv
199 |     csv_list, df = _find_csv_image_paths(csv_path, image_column_header)
200 | 
201 |     # Find the list of image paths in the directory
202 |     directory_list = _find_directory_image_paths(image_path)
203 | 
204 |     list_of_images = []
205 | 
206 |     # Create the list of image paths by finding the overlap between the two,
207 |     # keeping the order in the csv
208 |     for path in csv_list:
209 |         if path in directory_list:
210 |             list_of_images.append(path)
211 | 
212 |         # If the image is in the csv but not the directory, input an empty string
213 |         # as a placeholder. This image will eventually get vectorized to zeros.
214 |         else:
215 |             list_of_images.append('')
216 | 
217 |     # -------------- #
218 |     # ERROR CHECKING #
219 | 
220 |     # Raise error if there are no shared images between the csv and the directory
221 |     if all(path == '' for path in list_of_images):
222 |         raise ValueError('Something is wrong. There are no shared images in the'
223 |                          ' csv and the image directory. Check formatting or files.')
224 |     # -------------- #
225 | 
226 |     return list_of_images, df
227 | 
228 | 
229 | def _image_paths_finder(image_path, csv_path, image_column_header):
230 |     """
231 |     Given an image column header, and either a csv path or an image directory,
232 |     find the list of image paths. If just a csv, it's pulled from the column.
233 |     If it's just a directory, it's pulled from the directory. If it's both,
234 |     the list is checked from the overlap between the directory and the csv.
235 | 
236 |     Parameters:
237 |     ----------
238 |         image_path : str
239 |             Path to the image directory, if it exists
240 | 
241 |         csv_path : str
242 |             Path to the csv, if it exists
243 | 
244 |         image_column_header : str
245 |             Name of column header holding image information
246 | 
247 |     Returns:
248 |     -------
249 |         list_of_images : list of str
250 |             a  list of the paths to all the images being featurized
251 | 
252 |     """
253 |     # CASE 1: They only give an image directory with no CSV
254 |     if csv_path == '':
255 | 
256 |         # Find list of images from the image directory
257 |         list_of_images = _find_directory_image_paths(image_path)
258 | 
259 |         # Create the new csv in a folder called 'featurizer_csv/'
260 |         df = _create_df_with_image_paths(list_of_images,
261 |                                          image_column_header=image_column_header)
262 | 
263 |     # CASE 2: They only give a CSV with no directory
264 |     elif image_path == '':
265 |         # Create the list_of_images from the csv
266 |         list_of_images, df = _find_csv_image_paths(csv_path, image_column_header)
267 |         logging.info('Found image paths from csv.')
268 | 
269 |     # CASE 3: They give both a CSV and a directory
270 |     else:
271 |         list_of_images, df = _find_combined_image_paths(image_path, csv_path, image_column_header)
272 |         logging.info('Found image paths that overlap between both the directory and the csv.\n')
273 | 
274 |     return list_of_images, df
275 | 
276 | 
277 | #####################################
278 | # FUNCTION FOR IMAGE VECTORIZATION #
279 | ####################################
280 | 
281 | def _convert_single_image(image_source, model_str, image_path, target_size=(299, 299),
282 |                           grayscale=False):
283 |     """
284 |     Take in a path to an image (either by URL or in a native directory)
285 |     and convert the image to a preprocessed 4D numpy array, ready to be plugged
286 |     into the featurizer.
287 | 
288 |     Parameters:
289 |     ----------
290 |         image_source : str
291 |             Flag for either url or directory source for image
292 | 
293 |         model_str : str
294 |             Name of the model converting the image
295 | 
296 |         image_path : str
297 |             Either the URL or the full path to the image
298 | 
299 |         target size : tuple of ints
300 |             The desired size of the image
301 | 
302 |         grayscale : bool
303 |             Boolean indicating whether the image is grayscale or not
304 | 
305 |     Returns:
306 |     -------
307 |         image_array : np.ndarray
308 |             a numpy array that represents the loaded and preprocessed image
309 | 
310 |     """
311 |     # Retrieve the image, either from a given url or from a directory
312 |     try:
313 |         if image_source == 'url':
314 |             image_file = urlretrieve(image_path)[0]
315 |         elif image_source == 'directory':
316 |             image_file = image_path
317 | 
318 |     # If the image can't be retrieved, return a zeros vector of the appropriate size
319 |     except (IOError, ValueError):
320 |         # The channel dimension for a missing image is 3 if not grayscale, or 1 if grayscale
321 |         im_size = target_size + (3 - 2 * grayscale,)
322 |         logging.error('ERROR: Could not load/convert image to numpy array: {}'.format(image_path))
323 |         return np.zeros(im_size)
324 | 
325 |     # Load the image, and convert it to a numpy array with the target size
326 |     image = load_img(image_file, target_size=target_size, grayscale=grayscale)
327 |     image_array = img_to_array(image)
328 | 
329 |     # Expand the dimension for keras preprocessing, and preprocess the data
330 |     # according to the InceptionV3 training that they performed.
331 |     image_array = np.expand_dims(image_array, axis=0)
332 |     image_array = preprocessing_dict[model_str]['preprocess'](image_array)
333 | 
334 |     # Return the image array
335 |     return image_array
336 | 
337 | 
338 | ################################################
339 | #  FUNCTION FOR END-TO-END DATA PREPROCESSING  #
340 | ################################################
341 | 
342 | def _find_image_source(image_path):
343 | 
344 |     # IMAGE RETRIEVAL AND VECTORIZATION #
345 |     # Find image source: whether from url or directory
346 |     if image_path == '':
347 |         image_source = 'url'
348 | 
349 |     else:
350 |         image_source = 'directory'
351 | 
352 |     return image_source
353 | 
354 | 
355 | @t.guard(image_column_header=t.String(allow_blank=False),
356 |          model_str=t.String(allow_blank=False),
357 |          list_of_images=t.List(t.String(allow_blank=True)),
358 |          image_path=t.String(allow_blank=True),
359 |          csv_path=t.String(allow_blank=True),
360 |          target_size=t.Tuple(t.Int, t.Int),
361 |          grayscale=t.Bool)
362 | def preprocess_data(image_column_header,
363 |                     model_str,
364 |                     list_of_images,
365 |                     image_path='',
366 |                     csv_path='',
367 |                     target_size=(299, 299),
368 |                     grayscale=False):
369 |     """
370 |     Receive the data (some combination of image directory + csv), find
371 |     the list of valid images, and then convert each to an array and adds
372 |     them to the full batch.
373 | 
374 |     Parameters:
375 |     ----------
376 |         image_path : str
377 |             The path to the image directory, if it is being passed
378 | 
379 |         csv_path : str
380 |             The path to the csv, if it is being passed
381 | 
382 |         image_column_header : str
383 |             The name of the column that contains the image paths in the csv
384 | 
385 |         target_size : tuple of ints
386 |             The size that the images will be scaled to
387 | 
388 |         grayscale : bool
389 |             Boolean indicating whether the images are grayscale or not
390 | 
391 |     Returns:
392 |     -------
393 |         image_data : np.ndarray
394 |             a 4D numpy tensor containing the (full or batched) vectorized images,
395 |             ready to be pushed through the featurizer
396 | 
397 |         list_of_images : list of str
398 |             the list of image paths in the same order as the batches
399 |             of the numpy tensor. This will allow us to add the
400 |             features to the correct row of the csv.
401 | 
402 |     """
403 |     # -------------- #
404 |     # ERROR CHECKING #
405 |     # -------------- #
406 | 
407 |     # If there is no image directory or csv, then something is wrong.
408 |     if image_path == '' and csv_path == '':
409 |         raise ValueError('Need to load either an image directory or a CSV with'
410 |                          ' URLs, if no image directory included.')
411 |     # Raise an error if the image_path doesn't point to a directory
412 |     if image_path and not os.path.isdir(image_path):
413 |         raise TypeError('image_path must lead to a directory if '
414 |                         'it is initialized. It is where the images are stored.')
415 | 
416 |     if model_str not in preprocessing_dict.keys():
417 |         raise ValueError('model_str must be one the following: {}'.format(preprocessing_dict.keys))
418 |     # ------------------------------------------------------ #
419 | 
420 |     # BUILDING IMAGE PATH LIST #
421 |     num_images = len(list_of_images)
422 | 
423 |     image_source = _find_image_source(image_path)
424 | 
425 |     # Set number of grayscale channels (3 if color, 1 if grayscale)
426 |     channels = 3 - (2 * grayscale)
427 | 
428 |     # Initialize the full batch
429 |     image_data = np.ones((num_images, target_size[0], target_size[1], channels))
430 | 
431 |     # Create the full image tensor
432 |     logging.info('Converting images.')
433 | 
434 |     image_dict = {}
435 | 
436 |     index = 0
437 | 
438 |     # Iterate through each image in the list of image names
439 |     for image in list_of_images:
440 |         # If the image is in the csv, but not in the directory, set it to all zeros
441 |         # This allows the featurizer to correctly append features when there is
442 |         # mismatch between the csv and the directory. Otherwise it would lose rows
443 |         if image == '':
444 |             image_data[index, :, :, :] = 0
445 |             index += 1
446 |             continue
447 | 
448 |         # If the image has already been vectorized before, just copy that slice
449 |         if image in image_dict:
450 |             image_data[index, :, :, :] = image_data[image_dict[image], :, :, :]
451 | 
452 |         # Otherwise, vectorize the image
453 |         else:
454 |             # Add the index to the dictionary to check in the future
455 |             image_dict[image] = index
456 | 
457 |             # Append the image path to the image name. If there's none, nothing will change
458 |             image = '{}{}'.format(image_path, image)
459 | 
460 |             # Place the vectorized image into the image data
461 |             image_data[index, :, :, :] = _convert_single_image(image_source, model_str, image,
462 |                                                                target_size=target_size,
463 |                                                                grayscale=grayscale)
464 | 
465 |         # Progress report at set intervals
466 |         if num_images < 1000:
467 |             report_step = 100
468 |         elif num_images < 5000:
469 |             report_step = 500
470 |         else:
471 |             report_step = 1000
472 |         if not index % report_step:
473 |             logging.info('Converted {} images in batch. Only {} images left to go.'.format(
474 |                 index, num_images - index))
475 | 
476 |         index += 1
477 | 
478 |     return image_data, list_of_images
479 | 


--------------------------------------------------------------------------------
/pic2vec/saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/pic2vec/saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5


--------------------------------------------------------------------------------
/pic2vec/squeezenet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code for this squeezenet implementation pulled directly from Refik Can Malli's
  3 | Keras Squeezenet project:
  4 | https://github.com/rcmalli/keras-squeezenet/blob/master/README.md
  5 | 
  6 | Original paper:
  7 | https://arxiv.org/abs/1602.07360
  8 | 
  9 | Squeezenet original repo:
 10 | https://github.com/DeepScale/SqueezeNet
 11 | 
 12 | Keras documentation:
 13 | https://keras.io/
 14 | """
 15 | import warnings
 16 | 
 17 | try:
 18 |     from keras.applications.imagenet_utils import _obtain_input_shape
 19 | except ImportError:
 20 |     from keras_applications.imagenet_utils import _obtain_input_shape
 21 | 
 22 | from keras import backend as K
 23 | from keras.layers import Input, Convolution2D, MaxPooling2D, Activation, concatenate, Dropout, \
 24 |     GlobalAveragePooling2D
 25 | from keras.models import Model
 26 | from keras.engine.topology import get_source_inputs
 27 | from keras.utils import get_file
 28 | from keras.utils import layer_utils
 29 | 
 30 | sq1x1 = "squeeze1x1"
 31 | exp1x1 = "expand1x1"
 32 | exp3x3 = "expand3x3"
 33 | relu = "relu_"
 34 | 
 35 | WEIGHTS_PATH = "https://github.com/rcmalli/keras-squeezenet/releases/download/v1.0/" \
 36 |                "squeezenet_weights_tf_dim_ordering_tf_kernels.h5"
 37 | 
 38 | # Modular function for Fire Node
 39 | 
 40 | 
 41 | def fire_module(x, fire_id, squeeze=16, expand=64):
 42 |     """Build special layer for SqueezeNet"""
 43 |     s_id = 'fire' + str(fire_id) + '/'
 44 | 
 45 |     if K.image_data_format() == 'channels_first':
 46 |         channel_axis = 1
 47 |     else:
 48 |         channel_axis = 3
 49 | 
 50 |     x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)
 51 |     x = Activation('relu', name=s_id + relu + sq1x1)(x)
 52 | 
 53 |     left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x)
 54 |     left = Activation('relu', name=s_id + relu + exp1x1)(left)
 55 | 
 56 |     right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x)
 57 |     right = Activation('relu', name=s_id + relu + exp3x3)(right)
 58 | 
 59 |     x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat')
 60 |     return x
 61 | 
 62 | 
 63 | # Original SqueezeNet from paper.
 64 | 
 65 | def SqueezeNet(input_tensor=None, input_shape=None,
 66 |                weights='imagenet',
 67 |                classes=1000):
 68 |     """Build SqueezeNet model"""
 69 |     if weights not in {'imagenet', None}:
 70 |         raise ValueError('The `weights` argument should be either '
 71 |                          '`None` (random initialization) or `imagenet` '
 72 |                          '(pre-training on ImageNet).')
 73 | 
 74 |     if weights == 'imagenet' and classes != 1000:
 75 |         raise ValueError('If using `weights` as imagenet with `include_top`'
 76 |                          ' as true, `classes` should be 1000')
 77 | 
 78 |     input_shape = _obtain_input_shape(input_shape,
 79 |                                       default_size=227,
 80 |                                       min_size=48,
 81 |                                       data_format=K.image_data_format(),
 82 |                                       require_flatten=False)
 83 | 
 84 |     if input_tensor is None:
 85 |         img_input = Input(shape=input_shape)
 86 |     else:
 87 |         if not K.is_keras_tensor(input_tensor):
 88 |             img_input = Input(tensor=input_tensor, shape=input_shape)
 89 |         else:
 90 |             img_input = input_tensor
 91 | 
 92 |     x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input)
 93 |     x = Activation('relu', name='relu_conv1')(x)
 94 |     x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)
 95 | 
 96 |     x = fire_module(x, fire_id=2, squeeze=16, expand=64)
 97 |     x = fire_module(x, fire_id=3, squeeze=16, expand=64)
 98 |     x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)
 99 | 
100 |     x = fire_module(x, fire_id=4, squeeze=32, expand=128)
101 |     x = fire_module(x, fire_id=5, squeeze=32, expand=128)
102 |     x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)
103 | 
104 |     x = fire_module(x, fire_id=6, squeeze=48, expand=192)
105 |     x = fire_module(x, fire_id=7, squeeze=48, expand=192)
106 |     x = fire_module(x, fire_id=8, squeeze=64, expand=256)
107 |     x = fire_module(x, fire_id=9, squeeze=64, expand=256)
108 |     x = Dropout(0.5, name='drop9')(x)
109 | 
110 |     x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x)
111 |     x = Activation('relu', name='relu_conv10')(x)
112 |     x = GlobalAveragePooling2D()(x)
113 |     out = Activation('softmax', name='loss')(x)
114 | 
115 |     # Ensure that the model takes into account
116 |     # any potential predecessors of `input_tensor`.
117 |     if input_tensor is not None:
118 |         inputs = get_source_inputs(input_tensor)
119 |     else:
120 |         inputs = img_input
121 | 
122 |     model = Model(inputs, out, name='squeezenet')
123 | 
124 |     # load weights
125 |     if weights == 'imagenet':
126 | 
127 |         weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels.h5',
128 |                                 WEIGHTS_PATH,
129 |                                 cache_subdir='models')
130 |         model.load_weights(weights_path)
131 |         if K.backend() == 'theano':
132 |             layer_utils.convert_all_kernels_in_model(model)
133 | 
134 |         if K.image_data_format() == 'channels_first':
135 | 
136 |             if K.backend() == 'tensorflow':
137 |                 warnings.warn('You are using the TensorFlow backend, yet you '
138 |                               'are using the Theano '
139 |                               'image data format convention '
140 |                               '(`image_data_format="channels_first"`). '
141 |                               'For best performance, set '
142 |                               '`image_data_format="channels_last"` in '
143 |                               'your Keras config '
144 |                               'at ~/.keras/keras.json.')
145 |     return model
146 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.15.4,<2
2 | h5py>=2.7.0,<3
3 | scipy>=1.1,<2
4 | tensorflow>=1.2.0,<2
5 | keras>=2.2.3,<2.3.0
6 | pandas>=0.20.2,<1
7 | Pillow>=5.4.1,<6
8 | trafaret>=1,<2
9 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | pip==8.1.2
 2 | bumpversion==0.5.3
 3 | wheel==0.29.0
 4 | watchdog==0.8.3
 5 | flake8==2.6.0
 6 | tox==2.3.1
 7 | coverage==4.1
 8 | Sphinx==1.4.8
 9 | cryptography==1.7
10 | PyYAML==4.2b1
11 | pytest==2.9.2
12 | pytest-runner==2.11.1
13 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.101.1
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:pic2vec/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs
19 | max-line-length = 100
20 | 
21 | [aliases]
22 | test = pytest
23 | # Define setup.py command aliases here
24 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from setuptools import setup, find_packages
 5 | 
 6 | with open('README.md') as readme_file:
 7 |     readme = readme_file.read()
 8 | 
 9 | with open('HISTORY.rst') as history_file:
10 |     history = history_file.read()
11 | 
12 | requirements = [
13 |     'h5py>=2.7.0,<3',
14 |     'scipy>=1.1,<2',
15 |     'numpy>=1.15.4,<2',
16 |     'tensorflow>=1.2.0,<2',
17 |     'keras>=2.2.3,<2.3.0',
18 |     'pandas>=0.20.2,<1',
19 |     'Pillow>=5.4.1,<6',
20 |     'trafaret>=1,<2'
21 | ]
22 | 
23 | setup_requirements = [
24 |     'pytest-runner',
25 |     # Put setup requirements (distutils extensions, etc.) here
26 | ]
27 | 
28 | test_requirements = [
29 |     'numpy',
30 |     'pytest',
31 |     'keras',
32 | ]
33 | 
34 | setup(
35 |     name='pic2vec',
36 |     version='0.101.1',
37 |     description='Featurize images using a decapitated, pre-trained deep learning network',
38 |     long_description=readme + '\n\n' + history,
39 |     long_description_content_type='text/markdown',
40 |     author='Jett Oristaglio',
41 |     author_email='jettori88@gmail.com',
42 |     url='https://github.com/datarobot/pic2vec',
43 |     packages=find_packages(include=['pic2vec']),
44 |     include_package_data=True,
45 |     package_data={
46 |         'pic2vec': ['saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5']
47 |         },
48 |     install_requires=requirements,
49 |     license='BSD license',
50 |     zip_safe=False,
51 |     keywords=['image_featurizer', 'featurize', 'pic2vec'],
52 |     classifiers=[
53 |         'Development Status :: 3 - Alpha',
54 |         'Intended Audience :: Developers',
55 |         'License :: OSI Approved :: BSD License',
56 |         'Natural Language :: English',
57 |         'Programming Language :: Python :: 2.7',
58 |         'Programming Language :: Python :: 3.4',
59 |         'Programming Language :: Python :: 3.5',
60 |         'Programming Language :: Python :: 3.6',
61 |         'Programming Language :: Python :: 3.7',
62 |     ],
63 |     test_suite='tests',
64 |     tests_require=test_requirements,
65 |     setup_requires=setup_requirements,
66 | )
67 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | """Unit test package for image_featurizer."""
4 | 


--------------------------------------------------------------------------------
/tests/build_featurizer_testing/inceptionv3_test_prediction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/inceptionv3_test_prediction.npy


--------------------------------------------------------------------------------
/tests/build_featurizer_testing/resnet50_test_prediction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/resnet50_test_prediction.npy


--------------------------------------------------------------------------------
/tests/build_featurizer_testing/squeezenet_test_prediction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/squeezenet_test_prediction.npy


--------------------------------------------------------------------------------
/tests/build_featurizer_testing/vgg16_test_prediction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/vgg16_test_prediction.npy


--------------------------------------------------------------------------------
/tests/build_featurizer_testing/vgg19_test_prediction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/vgg19_test_prediction.npy


--------------------------------------------------------------------------------
/tests/build_featurizer_testing/xception_test_prediction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/xception_test_prediction.npy


--------------------------------------------------------------------------------
/tests/data_featurizing_testing/array_testing/check_featurize.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/data_featurizing_testing/array_testing/check_featurize.npy


--------------------------------------------------------------------------------
/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full:
--------------------------------------------------------------------------------
1 | image_missing,image_feat_0,image_feat_1,image_feat_2
2 | False,1.0,2.0,3.0
3 | False,4.0,5.0,6.0
4 | True,0.0,0.0,0.0
5 | False,7.0,8.0,9.0
6 | 


--------------------------------------------------------------------------------
/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_images:
--------------------------------------------------------------------------------
1 | image
2 | borges.jpg
3 | arendt.bmp
4 | heidegger.jpg
5 | sappho.png
6 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/create_csv_check:
--------------------------------------------------------------------------------
1 | images
2 | arendt.bmp
3 | borges.jpg
4 | sappho.png
5 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/csv_image_path_check:
--------------------------------------------------------------------------------
1 | ,cats,images,
2 | ,foo,borges.jpg,dog
3 | ,bar,arendt.bmp,dog
4 | ,,sappho.png,dog
5 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/directory_combined_image_path_test:
--------------------------------------------------------------------------------
1 | ,cats,images,
2 | ,foo,heidegger.png,dog
3 | ,bar,arendt.bmp,dog
4 | ,,sappho.png,dog
5 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/directory_preprocess_system_test:
--------------------------------------------------------------------------------
1 | ,cats,images,
2 | ,foo,heidegger.png,dog
3 | ,bar,arendt.bmp,dog
4 | ,,sappho.png,dog
5 | ,bar,arendt.bmp,dog
6 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/error_directory_combined_test:
--------------------------------------------------------------------------------
1 | ,cats,images,
2 | ,foo,heidegger.png,dog
3 | ,bar,flynn.bmp,dog
4 | ,,pork.png,dog
5 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/error_row:
--------------------------------------------------------------------------------
1 | ,cats,images,
2 | ,foo,this_is_an_error,dog
3 | ,bar,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/borges.jpg,dog
4 | ,,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/sappho.png,dog
5 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/csv_testing/url_test:
--------------------------------------------------------------------------------
1 | ,cats,images,
2 | ,bar,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/borges.jpg,dog
3 | ,,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/arendt.bmp,dog
4 | ,,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/sappho.png,
5 | 


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_image_arrays/image_test.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_image_arrays/image_test_grayscale.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test_grayscale.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic_grayscale.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic_grayscale.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_images/arendt.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/arendt.bmp


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_images/borges.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/borges.jpg


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_images/heidegger.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/heidegger.gif


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_images/sappho.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/sappho.png


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt_grayscale.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt_grayscale.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_preprocessing_arrays/borges.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/borges.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho.npy


--------------------------------------------------------------------------------
/tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho_grayscale.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho_grayscale.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3_mult.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3_mult.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50_mult.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50_mult.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet_mult.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet_mult.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16_mult.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16_mult.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19_mult.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19_mult.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_xception.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_xception.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/array_tests/check_prediction_array_xception_mult.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_xception_mult.npy


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/csv_checking/squeezenet_check_csv.csv:
--------------------------------------------------------------------------------
1 | images_missing,images_feat_0,images_feat_1,images_feat_2,images_feat_3,images_feat_4,images_feat_5,images_feat_6,images_feat_7,images_feat_8,images_feat_9,images_feat_10,images_feat_11,images_feat_12,images_feat_13,images_feat_14,images_feat_15,images_feat_16,images_feat_17,images_feat_18,images_feat_19,images_feat_20,images_feat_21,images_feat_22,images_feat_23,images_feat_24,images_feat_25,images_feat_26,images_feat_27,images_feat_28,images_feat_29,images_feat_30,images_feat_31,images_feat_32,images_feat_33,images_feat_34,images_feat_35,images_feat_36,images_feat_37,images_feat_38,images_feat_39,images_feat_40,images_feat_41,images_feat_42,images_feat_43,images_feat_44,images_feat_45,images_feat_46,images_feat_47,images_feat_48,images_feat_49,images_feat_50,images_feat_51,images_feat_52,images_feat_53,images_feat_54,images_feat_55,images_feat_56,images_feat_57,images_feat_58,images_feat_59,images_feat_60,images_feat_61,images_feat_62,images_feat_63,images_feat_64,images_feat_65,images_feat_66,images_feat_67,images_feat_68,images_feat_69,images_feat_70,images_feat_71,images_feat_72,images_feat_73,images_feat_74,images_feat_75,images_feat_76,images_feat_77,images_feat_78,images_feat_79,images_feat_80,images_feat_81,images_feat_82,images_feat_83,images_feat_84,images_feat_85,images_feat_86,images_feat_87,images_feat_88,images_feat_89,images_feat_90,images_feat_91,images_feat_92,images_feat_93,images_feat_94,images_feat_95,images_feat_96,images_feat_97,images_feat_98,images_feat_99,images_feat_100,images_feat_101,images_feat_102,images_feat_103,images_feat_104,images_feat_105,images_feat_106,images_feat_107,images_feat_108,images_feat_109,images_feat_110,images_feat_111,images_feat_112,images_feat_113,images_feat_114,images_feat_115,images_feat_116,images_feat_117,images_feat_118,images_feat_119,images_feat_120,images_feat_121,images_feat_122,images_feat_123,images_feat_124,images_feat_125,images_feat_126,images_feat_127,images_feat_128,images_feat_129,images_feat_130,images_feat_131,images_feat_132,images_feat_133,images_feat_134,images_feat_135,images_feat_136,images_feat_137,images_feat_138,images_feat_139,images_feat_140,images_feat_141,images_feat_142,images_feat_143,images_feat_144,images_feat_145,images_feat_146,images_feat_147,images_feat_148,images_feat_149,images_feat_150,images_feat_151,images_feat_152,images_feat_153,images_feat_154,images_feat_155,images_feat_156,images_feat_157,images_feat_158,images_feat_159,images_feat_160,images_feat_161,images_feat_162,images_feat_163,images_feat_164,images_feat_165,images_feat_166,images_feat_167,images_feat_168,images_feat_169,images_feat_170,images_feat_171,images_feat_172,images_feat_173,images_feat_174,images_feat_175,images_feat_176,images_feat_177,images_feat_178,images_feat_179,images_feat_180,images_feat_181,images_feat_182,images_feat_183,images_feat_184,images_feat_185,images_feat_186,images_feat_187,images_feat_188,images_feat_189,images_feat_190,images_feat_191,images_feat_192,images_feat_193,images_feat_194,images_feat_195,images_feat_196,images_feat_197,images_feat_198,images_feat_199,images_feat_200,images_feat_201,images_feat_202,images_feat_203,images_feat_204,images_feat_205,images_feat_206,images_feat_207,images_feat_208,images_feat_209,images_feat_210,images_feat_211,images_feat_212,images_feat_213,images_feat_214,images_feat_215,images_feat_216,images_feat_217,images_feat_218,images_feat_219,images_feat_220,images_feat_221,images_feat_222,images_feat_223,images_feat_224,images_feat_225,images_feat_226,images_feat_227,images_feat_228,images_feat_229,images_feat_230,images_feat_231,images_feat_232,images_feat_233,images_feat_234,images_feat_235,images_feat_236,images_feat_237,images_feat_238,images_feat_239,images_feat_240,images_feat_241,images_feat_242,images_feat_243,images_feat_244,images_feat_245,images_feat_246,images_feat_247,images_feat_248,images_feat_249,images_feat_250,images_feat_251,images_feat_252,images_feat_253,images_feat_254,images_feat_255,images_feat_256,images_feat_257,images_feat_258,images_feat_259,images_feat_260,images_feat_261,images_feat_262,images_feat_263,images_feat_264,images_feat_265,images_feat_266,images_feat_267,images_feat_268,images_feat_269,images_feat_270,images_feat_271,images_feat_272,images_feat_273,images_feat_274,images_feat_275,images_feat_276,images_feat_277,images_feat_278,images_feat_279,images_feat_280,images_feat_281,images_feat_282,images_feat_283,images_feat_284,images_feat_285,images_feat_286,images_feat_287,images_feat_288,images_feat_289,images_feat_290,images_feat_291,images_feat_292,images_feat_293,images_feat_294,images_feat_295,images_feat_296,images_feat_297,images_feat_298,images_feat_299,images_feat_300,images_feat_301,images_feat_302,images_feat_303,images_feat_304,images_feat_305,images_feat_306,images_feat_307,images_feat_308,images_feat_309,images_feat_310,images_feat_311,images_feat_312,images_feat_313,images_feat_314,images_feat_315,images_feat_316,images_feat_317,images_feat_318,images_feat_319,images_feat_320,images_feat_321,images_feat_322,images_feat_323,images_feat_324,images_feat_325,images_feat_326,images_feat_327,images_feat_328,images_feat_329,images_feat_330,images_feat_331,images_feat_332,images_feat_333,images_feat_334,images_feat_335,images_feat_336,images_feat_337,images_feat_338,images_feat_339,images_feat_340,images_feat_341,images_feat_342,images_feat_343,images_feat_344,images_feat_345,images_feat_346,images_feat_347,images_feat_348,images_feat_349,images_feat_350,images_feat_351,images_feat_352,images_feat_353,images_feat_354,images_feat_355,images_feat_356,images_feat_357,images_feat_358,images_feat_359,images_feat_360,images_feat_361,images_feat_362,images_feat_363,images_feat_364,images_feat_365,images_feat_366,images_feat_367,images_feat_368,images_feat_369,images_feat_370,images_feat_371,images_feat_372,images_feat_373,images_feat_374,images_feat_375,images_feat_376,images_feat_377,images_feat_378,images_feat_379,images_feat_380,images_feat_381,images_feat_382,images_feat_383,images_feat_384,images_feat_385,images_feat_386,images_feat_387,images_feat_388,images_feat_389,images_feat_390,images_feat_391,images_feat_392,images_feat_393,images_feat_394,images_feat_395,images_feat_396,images_feat_397,images_feat_398,images_feat_399,images_feat_400,images_feat_401,images_feat_402,images_feat_403,images_feat_404,images_feat_405,images_feat_406,images_feat_407,images_feat_408,images_feat_409,images_feat_410,images_feat_411,images_feat_412,images_feat_413,images_feat_414,images_feat_415,images_feat_416,images_feat_417,images_feat_418,images_feat_419,images_feat_420,images_feat_421,images_feat_422,images_feat_423,images_feat_424,images_feat_425,images_feat_426,images_feat_427,images_feat_428,images_feat_429,images_feat_430,images_feat_431,images_feat_432,images_feat_433,images_feat_434,images_feat_435,images_feat_436,images_feat_437,images_feat_438,images_feat_439,images_feat_440,images_feat_441,images_feat_442,images_feat_443,images_feat_444,images_feat_445,images_feat_446,images_feat_447,images_feat_448,images_feat_449,images_feat_450,images_feat_451,images_feat_452,images_feat_453,images_feat_454,images_feat_455,images_feat_456,images_feat_457,images_feat_458,images_feat_459,images_feat_460,images_feat_461,images_feat_462,images_feat_463,images_feat_464,images_feat_465,images_feat_466,images_feat_467,images_feat_468,images_feat_469,images_feat_470,images_feat_471,images_feat_472,images_feat_473,images_feat_474,images_feat_475,images_feat_476,images_feat_477,images_feat_478,images_feat_479,images_feat_480,images_feat_481,images_feat_482,images_feat_483,images_feat_484,images_feat_485,images_feat_486,images_feat_487,images_feat_488,images_feat_489,images_feat_490,images_feat_491,images_feat_492,images_feat_493,images_feat_494,images_feat_495,images_feat_496,images_feat_497,images_feat_498,images_feat_499,images_feat_500,images_feat_501,images_feat_502,images_feat_503,images_feat_504,images_feat_505,images_feat_506,images_feat_507,images_feat_508,images_feat_509,images_feat_510,images_feat_511
2 | False,0.0,0.06430369,5.925252,1.2477938,1.9022676,0.7292365,0.35128284,1.2580512,0.0,1.9784806,6.760506,0.3802457,1.4616345,0.48937023,0.12520242,0.0,0.0,1.1434659,0.030002972,4.420496,0.0,0.08563948,7.4073334,0.011735982,0.2933519,3.811751,0.16320845,0.63225144,0.88222635,0.0,0.19566797,0.005651583,0.0,6.2003555,4.4338775,0.0,1.1506345,0.934765,0.9933638,0.12695538,5.620974,0.014169323,0.19265716,5.561198,0.03258076,0.2505361,1.1689211,2.3166757,1.3553897,0.00996804,1.6288273,0.18410471,6.302098,1.0044285,4.648501,0.9760399,1.512236,2.6010919,0.0,1.1071947,0.2489015,0.7132885,0.50306994,0.3520863,5.2353253,0.26186904,0.5956484,3.5394926,1.4525433,1.7428367,0.12893242,4.7874084,1.0850035,0.6844506,2.8190055,1.8660756,3.3059452,0.86724275,1.912618,0.50218135,7.156048,1.4795531,0.0,0.0,0.5205103,1.2208014,0.013696391,2.7571406,0.0,0.0,0.41602758,5.15148,4.7260184,1.9724343,0.9735211,5.360706,0.0725911,1.1349676,1.6301644,0.77679425,0.357845,0.4446485,1.4450411,2.4311843,0.3848904,0.14403474,1.9345828,2.3822713,5.25369,0.0,0.12637495,0.0,0.0,1.852182,0.0,0.24121365,0.0,0.6686679,0.0,0.082115926,1.0559088,1.1733891,0.0,0.61270684,10.096444,0.115368225,4.761737,0.7631597,1.4007128,2.838534,0.0,2.541959,0.31714454,7.229467,0.30863938,0.004410572,0.12027911,0.0,1.5905406,6.1431565,0.3887413,0.7746064,5.4975033,1.3848671,0.6790592,0.089019366,5.7478013,0.63026905,1.6860456,0.22501561,0.09998074,0.22472723,0.048187234,4.548613,0.39404544,0.0,3.2012563,2.4875112,4.816038,0.47314155,1.9401855,5.418616,2.3376446,0.581827,2.3534772,6.9106417,2.6489549,2.8527553,4.611235,0.57903886,0.11823338,0.23346938,0.0,2.60903,0.86994904,0.8627086,0.0,1.50256,0.002849428,0.042190213,0.13597369,0.12739834,0.0015194238,1.0115864,0.0,1.1693184,6.5084243,0.3633387,0.0,0.4999057,0.8373399,10.139998,0.032343287,2.3642514,3.5328903,0.19237278,0.118270814,0.0,4.096768,0.114599116,0.0,4.190225,0.56754124,0.8311648,3.7349038,0.0,4.2200136,0.01088281,0.1085515,1.0522821,0.41156217,2.6143224,0.28228495,0.64316785,0.91498697,2.548603,2.2334642,1.2991979,1.6584415,2.0870817,0.19921392,0.34985822,0.11114285,10.224248,1.9509667,0.7822371,0.0,3.3270075,0.11672954,2.2384195,0.46649593,0.16295668,12.06565,3.2309685,0.2596637,0.2021587,1.1646566,0.2846576,3.9792683,0.20739509,6.3545895,2.8161318,0.0,5.4241223,0.0835355,2.7165213,0.5605489,0.0187813,0.22815442,4.255856,0.7698795,0.5028055,0.85259837,0.28411278,0.067203335,0.124913335,5.493942,2.5626507,1.5939229,1.1782578,11.209688,0.09071099,3.4199302,15.100232,6.7983665,0.099231154,0.0,0.0,0.86766124,0.27894226,0.0,0.0,0.069804616,4.351778,0.0,1.0805423,0.123643935,10.690204,0.47816956,0.06645855,0.007546769,0.4814074,0.0,0.19200687,0.0,0.5923009,0.33620328,0.62727875,1.735227,7.182868,3.0014234,2.9397693,5.0955205,0.07466374,0.24726161,2.1152027,0.20864648,0.2795848,3.888554,0.09189236,0.0,0.7846511,0.0,0.0,3.018526,4.898274,0.8444631,0.68039566,1.9294883,1.420852,1.8197484,0.0,0.24696165,1.7744846,0.8634615,0.0,3.5151057,2.5969448,0.3709314,0.4910034,4.815773,1.0811102,0.91170424,0.0,0.0,0.18964833,0.6169176,0.22920837,0.0,0.0,8.854687,4.7849646,0.0,0.69278896,0.0,8.794974,0.029180411,2.7849865,0.10333087,2.3760562,1.0513319,0.83889574,0.0,4.8695283,0.0,0.37816083,0.5963372,0.05220283,3.0231662,3.277741,10.861541,4.5583177,0.12875788,8.092003,5.8995795,0.11318067,0.2302447,5.80469,1.355816,0.035333663,2.4274058,0.0,1.4589256,0.0,0.008337674,0.056794185,0.67818326,0.23275617,0.30492723,0.0,0.03888062,0.0,0.98070127,3.5245311,0.064472236,0.0,0.7144263,0.0,2.7053547,0.048378047,0.86141956,1.2893332,0.51345825,7.620836,0.005830832,1.7384351,0.9546961,4.7781973,0.74109584,9.040736,0.0,1.2037519,3.9157329,0.40743956,1.8311182,1.0261803,5.236738,3.1455948,2.2358575,0.0,10.966049,0.02050886,2.3198588,0.0057365606,0.9877181,0.73059785,0.899798,0.32366183,4.0271564,2.915965,7.71047,0.0,0.47765186,0.0,0.0,0.9394298,1.5252643,1.6366545,2.6790133,0.0069701695,0.81947315,0.0,1.9211565,1.3513644,0.0,0.0,3.9908652,0.39159593,0.009902612,0.0,2.215494,0.0,0.30989146,0.13854943,0.012708846,1.362082,0.0,0.18787599,2.0075576,3.638248,3.0248704,2.8847232,0.0,0.55640036,0.0,0.05954851,0.0,0.74581194,0.0,3.1318593,0.02099697,0.39839393,0.6900185,1.6036885,0.61432284,6.5374627,0.029315297,1.8489829,6.2222,0.08512894,2.288661,0.7297279,4.5417633,0.0,0.5599782,0.0,0.0,0.2541982,0.0,2.8425996,0.0,0.18618032,0.37754142,0.38933957,0.6385549,3.9901986,9.413614,0.13278776,1.2800857,1.119599,0.0,3.3959885,1.4220761,0.0,0.82575494,0.42068207,0.0,2.6942632,4.9376574,0.52622277,1.6726253,0.93129253,0.10429599,1.0813674,0.20501657,2.1741478,1.2829067,3.2710037,1.595327,1.1216038,0.09035005,2.256902,6.348547,3.22828,0.28881666,1.7531539,0.35831764,11.005119,0.36145508,0.0,0.15231328,0.39823815
3 | False,0.0,1.5659888,0.9795387,4.2150273,2.7514215,0.46990424,0.942106,3.0819037,0.7333554,5.240792,3.3389344,1.3541185,1.3338892,0.6663938,0.5487696,0.0,1.3918678,1.1537352,1.1469694,1.137218,0.5315949,1.1478186,0.56462586,0.27681875,0.28172,0.62803036,0.004717092,1.6738908,1.0735782,0.012021471,0.50643593,0.0,0.034927443,0.54222625,1.45291,0.17807603,3.4017353,0.84574264,0.12286,2.1603284,4.9478965,2.4018497,0.55619097,1.0727917,0.09784732,0.7986277,1.6876371,0.3141875,1.413382,2.2161605,2.9333303,6.4016137,4.293491,1.4717547,4.914748,0.14770664,0.9234845,2.9636667,1.0482793,2.9261591,0.0,2.4522955,2.087722,1.2340851,1.597116,0.3951955,0.8834233,2.8590105,0.97714967,3.5808282,0.0,7.9844494,1.293921,5.227065,0.49527055,0.72197825,6.802121,0.52109516,1.6139073,0.28607228,0.4759791,0.06677796,0.12506889,0.1515753,1.1581132,0.21002053,1.2206664,3.208175,0.3185406,0.0,0.5200459,3.624943,3.6210797,2.3661478,0.30301082,9.536854,0.1775064,2.4361207,0.28820038,0.85999274,0.29853684,2.2857106,2.4229867,1.10337,0.5563773,0.505385,0.27907002,3.902735,0.36712894,0.18612202,1.299055,0.35058552,0.23118384,1.1703097,0.39608738,2.1284676,0.87798685,0.62168443,0.0,0.98764026,2.0116363,6.818782,0.09717489,1.5212868,7.931019,0.2340589,2.1220703,1.5370493,1.235108,3.6840394,1.8885325,0.27170816,0.28961405,3.5587764,1.2335792,0.0906846,0.1226984,0.0,0.3691413,1.9427574,1.3072993,2.6549468,1.7671379,0.1919094,1.1802415,2.9149354,1.9638197,0.9867413,4.286894,0.31699765,1.4402238,1.7048683,0.81744355,2.78236,1.9817716,0.45829806,1.6875836,0.7051146,12.437517,1.2465167,0.15638705,3.2915783,3.7153172,0.00037445623,2.6755934,1.5846614,1.3520594,2.0066643,3.5690994,3.3009062,0.21823698,2.5437348,0.29772404,5.157602,4.0000124,1.0213175,0.53319913,0.13398926,0.084509924,1.249874,0.25117996,0.6318734,0.23545043,3.5526493,0.0,0.73910636,0.52041,0.50401884,0.28475827,0.40722707,0.0,1.3661029,6.9649043,1.6108668,0.7728189,1.3216536,2.8952522,0.5173574,3.2593768,0.38281688,0.4525214,0.7768256,5.7829566,2.2125843,1.6979096,0.062168002,1.7114139,0.0,0.13494396,0.017418,0.8761093,2.5021544,1.8988813,0.83362275,6.610062,0.31433657,0.6561014,1.3047681,2.398137,1.0416564,1.2648318,0.48655927,2.6824732,5.979489,0.7534528,0.7870135,0.5019257,0.6378011,1.108926,0.12475406,0.0059041358,0.0,13.54958,1.3913872,1.0996466,0.12400829,3.754011,1.2099261,2.57349,9.399678,1.8044401,5.5763397,0.30568036,4.084769,1.0212308,0.22334807,2.3704414,1.3341213,4.1379967,2.544452,2.829277,1.1347294,5.1544514,2.3694015,1.7976842,1.2571592,0.045517392,1.8260677,0.37298885,0.13971496,3.489783,0.2172307,0.34820753,0.2074296,1.1966202,0.119053856,0.54089326,0.62653285,0.12743191,0.29860196,1.173416,0.018981354,0.10971679,0.92240083,0.04589878,0.16339752,0.023559462,3.935342,2.422161,0.43081057,0.07294032,0.648422,0.07987221,3.6668785,0.0,1.038127,1.8494443,1.1980474,0.8329206,0.6722754,3.5810244,4.5582757,0.0,0.123638526,0.0,4.8873186,0.15728831,0.0,0.9070376,4.103295,2.358815,0.0,0.0,0.0,4.4455,0.25732,1.3554612,1.1573128,0.0,0.2879894,0.7066783,1.2678046,0.083180964,0.0,4.7302194,0.009526005,1.6318493,0.22732513,0.087628685,0.406337,3.676663,2.0678732,0.43814883,1.3990653,0.8248821,0.0,0.1336438,0.06987791,0.0,1.1978002,2.7286007,0.35513905,0.0,0.18630114,0.0,3.9574082,0.0,0.051355492,0.0,0.9697243,0.5038651,0.0,0.0,0.041237272,0.021493886,0.82063997,0.056277256,0.9544746,3.667481,5.2713146,0.18067496,2.1729138,2.1666937,5.877317,3.7127397,0.22856998,0.8399865,2.3051767,2.4559593,2.6168334,3.1110601,0.0,0.23745692,0.0,0.81211483,0.0,0.43130538,1.1038702,0.18849887,0.0,0.0,0.22280723,0.4506722,0.278354,3.8670344,0.27257487,0.0,0.034663774,8.918831,1.1115226,1.5357469,0.9361244,0.5266539,0.03094228,0.0,2.7048798,0.4964706,8.367421,0.1095097,4.103294,1.1361473,4.6312456,1.6571227,0.0,0.9858088,0.98663265,1.2326255,18.273752,0.106127754,0.0,10.132444,7.3140254,13.0637665,0.0,0.54206985,0.838631,0.66567606,0.0,0.67223907,0.8846641,3.690928,0.0,1.4550297,0.0,0.0,0.3765201,0.124779105,0.57198584,0.06769558,0.05194254,2.3791409,0.0,1.0119655,0.9034338,0.0,1.0168633,2.3887832,1.5080258,0.80849445,12.042129,0.9865963,0.0,0.0,0.035975855,4.0963764,0.32174584,0.020602569,0.06682827,4.939119,2.6347206,0.0,3.7265346,0.23705861,0.016764328,0.3655856,4.803521,0.20734458,0.13272938,0.047159385,6.9744997,0.0,0.0,2.8244588,0.0,0.48884863,2.9610062,0.056871142,0.037993588,2.6344845,1.8345705,5.6257424,1.9977221,2.0507917,5.414329,5.937831,0.17503321,0.6394768,8.901436,1.3387647,0.082600646,0.0,0.50507915,0.067801915,0.28612956,0.28773987,0.0009720632,9.509638,2.8477495,0.25585645,8.762377,0.0,1.0540804,1.1605431,10.134096,0.0,0.062995434,1.7530464,3.8067064,2.5039506,0.27154094,1.2373917,0.23179904,0.5515947,1.9493744,2.6501715,0.41633463,1.2599964,0.23946893,0.7492251,0.0,0.1563604,1.931312,6.6867003,0.15866813,0.03338933,0.18354198,3.7537577,1.3273323,0.51614493,0.0,0.45749947,0.04283701
4 | False,0.46112114,2.1335888,1.8241048,3.7351613,0.36953086,1.5293108,2.1601055,2.860932,5.1433387,0.28087416,3.7314804,0.3581663,0.091056935,0.003450069,6.037664,0.30612454,0.571893,1.5549864,0.7488508,10.657478,0.7966928,1.4103812,0.42179903,2.5708578,3.5698192,0.34951842,0.020087833,2.7209818,1.200906,2.6880674,3.3300486,0.08889564,0.4264115,1.9196259,2.153557,0.08119932,1.6303731,0.52323073,0.07305842,4.9688535,2.0254424,0.68028307,3.0709374,6.11421,0.015177524,0.0,1.4988465,0.1980059,0.16609585,1.0889233,1.5761944,3.9454832,9.707764,0.44393125,1.1122248,0.061205845,0.79469895,5.656348,1.6394545,4.451493,3.022262,0.44380793,1.9415722,0.0,0.56014353,1.6221204,0.0,0.652658,0.5011004,1.7223866,0.0021700696,5.4243283,0.69782317,4.4391007,0.0,0.3059343,1.3109063,0.030020507,2.2842977,0.2192384,0.9240655,0.04335497,0.0,0.3255787,2.90101,0.49511334,0.8484899,2.643336,0.7712128,0.286951,6.749533,3.020384,2.1298018,0.22571988,2.5373983,0.81481665,2.4230592,2.378148,1.2698388,0.47444567,0.12968856,2.7633832,0.6543257,1.1585423,0.40002802,0.97725105,2.413279,1.6619308,0.040581264,0.0,2.0613706,0.540403,0.7375542,1.8673608,2.4644318,1.8475541,0.36726853,0.5496293,0.8709604,0.9798058,0.78019166,2.4870117,0.109242216,0.33667758,0.60807127,0.63456565,4.31613,7.425311,1.6955297,3.8750064,0.0,1.5589504,1.6156995,0.7165218,1.11664,0.29832062,0.61164075,0.06982304,4.015175,6.425628,0.84591556,3.3241649,2.3341613,2.5000346,1.9833434,0.0,1.1131458,2.7022448,2.1387928,0.3581163,0.9521044,0.12547801,0.39652684,5.151849,0.9570625,2.0943348,0.9473914,5.9737678,7.1045117,3.8365586,0.011891012,7.778302,5.3617887,0.16430879,0.09030357,1.3474579,0.9955001,1.5114158,0.96471924,3.8530567,0.19509202,0.8279502,0.0,3.236872,2.087074,1.0473642,2.5557902,0.12543155,0.27207533,2.0083,0.0,2.331559,2.0011897,1.253072,0.7369683,0.0,3.8982956,0.0,0.08348985,5.4233794,0.7739589,2.419749,2.9180753,7.94402,3.5744588,0.47413462,4.6088147,1.874471,5.8800855,0.6224164,4.213573,0.6233635,1.9506603,0.1731375,2.1621401,0.09919829,3.030944,0.70585084,1.572602,2.2551274,0.7554109,0.48613983,0.011739742,0.0,4.6881547,2.9190836,3.6938877,0.8468688,1.9456602,1.380586,0.1180727,0.08764773,0.6397943,12.080511,0.040237863,2.866631,0.5557986,3.6685414,1.9213517,1.4565915,0.026319146,0.44326526,9.5060625,2.3732884,0.0796136,0.48028627,0.96748525,1.3613396,5.516922,4.1135426,11.771184,4.3025475,0.028500572,0.8796897,4.3576713,2.7081985,0.52707255,0.0,0.20732209,4.192343,0.0011305375,0.95325685,1.8918582,2.6601713,0.16889462,0.23276515,0.0,2.1605675,0.39623815,2.9520898,12.896067,6.8731117,0.051315077,0.37148917,15.056107,0.63663244,0.0,3.582499,0.0,1.6672244,1.2374235,0.100534566,0.3597434,9.280933,2.664385,0.14248258,0.63993657,11.7247,0.71052706,1.1474961,0.0,0.08538999,0.19263059,6.153173,0.0,0.11669634,1.5751977,0.32482773,1.5950497,0.1672559,2.7469232,0.56592935,0.3329131,1.0864764,1.5494595,13.196359,0.024275294,0.35035914,2.1386523,0.0,0.0,0.85830605,0.23369512,0.0,1.1287621,0.14058255,0.46435067,0.5499137,5.0106955,3.0786238,1.4176838,0.75445217,0.6832051,0.70677876,2.7583437,3.7263772,13.683774,2.5339792,1.268514,1.1097827,1.3701215,4.8274655,0.86168194,2.4353619,0.22178395,0.0018151865,0.028063435,0.18894655,0.0,1.4722105,4.137401,2.1483555,0.0,0.4736592,0.0,14.141827,0.8513631,0.33892128,0.0,0.34540272,4.4188547,1.0770575,2.5925288,0.33556724,0.0039636865,5.5812016,1.2223202,0.013105068,1.0221738,4.018209,0.5878394,4.442058,0.57266945,8.175587,2.5914788,0.0,0.3444457,9.432242,3.55173,0.095683545,0.3256337,0.0029114995,0.0,0.27841532,0.045093197,1.3534012,0.65515906,0.5296211,1.1997733,0.0,0.0,0.0,1.2729379,0.829383,0.2642946,4.731426,2.6540146,0.01989702,9.20689,0.5719824,0.07834564,0.14849903,0.514003,7.3862925,1.1155077,3.555706,3.9550712,4.4527264,3.5506215,5.755327,3.5920541,0.044972666,2.1465847,0.024345243,2.2532241,0.45542118,3.2165465,5.4006534,0.46695796,0.0,18.149057,1.4007834,4.6173263,1.1946156,0.0,1.2031934,3.549035,7.8613353,2.3801801,2.3311281,5.995833,2.5379546,10.0461445,0.22278792,0.7071859,0.20086314,1.9559548,4.94324,8.251469,0.40846524,0.96566707,0.0,3.3403916,0.12761915,0.16489993,0.21057117,4.756474,1.182665,1.3473257,4.2571015,2.1792533,1.1000628,3.8973396,3.8577735,0.17022908,1.943259,0.011295123,0.05758635,6.0523705,2.6862469,5.957788,1.2351397,2.718024,0.07126258,2.2447028,0.8245993,5.0258527,0.15583189,4.517081,2.8265631,0.0,1.6555821,1.4330915,1.2179266,1.3872147,1.2570627,0.4305941,4.3483677,3.3759675,1.6939688,5.180839,2.5947,3.4261672,0.0,4.666795,0.0,0.0,10.839356,2.3502908,2.4827735,0.43250966,0.07325214,2.6113148,1.4340428,0.4222705,0.2289919,3.2848847,0.25043586,1.1298453,4.430055,3.0049922,5.276356,1.8888803,2.1993637,0.0,1.827844,9.044594,10.208017,8.133966,0.8523788,3.3054745,0.21871397,2.0441742,4.3706307,4.0342016,0.21408726,4.178865,1.5641925,0.0,4.0655494,1.3194618,3.9595556,14.100289,2.7498658,0.28389466,0.30229512,1.483878,4.3026614,0.0,2.7556431,0.9810146,0.32740286
5 | 


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/csv_checking/squeezenet_check_csv_mult.csv:
--------------------------------------------------------------------------------
1 | images_1_missing,images_1_feat_0,images_1_feat_1,images_1_feat_2,images_1_feat_3,images_1_feat_4,images_1_feat_5,images_1_feat_6,images_1_feat_7,images_1_feat_8,images_1_feat_9,images_1_feat_10,images_1_feat_11,images_1_feat_12,images_1_feat_13,images_1_feat_14,images_1_feat_15,images_1_feat_16,images_1_feat_17,images_1_feat_18,images_1_feat_19,images_1_feat_20,images_1_feat_21,images_1_feat_22,images_1_feat_23,images_1_feat_24,images_1_feat_25,images_1_feat_26,images_1_feat_27,images_1_feat_28,images_1_feat_29,images_1_feat_30,images_1_feat_31,images_1_feat_32,images_1_feat_33,images_1_feat_34,images_1_feat_35,images_1_feat_36,images_1_feat_37,images_1_feat_38,images_1_feat_39,images_1_feat_40,images_1_feat_41,images_1_feat_42,images_1_feat_43,images_1_feat_44,images_1_feat_45,images_1_feat_46,images_1_feat_47,images_1_feat_48,images_1_feat_49,images_1_feat_50,images_1_feat_51,images_1_feat_52,images_1_feat_53,images_1_feat_54,images_1_feat_55,images_1_feat_56,images_1_feat_57,images_1_feat_58,images_1_feat_59,images_1_feat_60,images_1_feat_61,images_1_feat_62,images_1_feat_63,images_1_feat_64,images_1_feat_65,images_1_feat_66,images_1_feat_67,images_1_feat_68,images_1_feat_69,images_1_feat_70,images_1_feat_71,images_1_feat_72,images_1_feat_73,images_1_feat_74,images_1_feat_75,images_1_feat_76,images_1_feat_77,images_1_feat_78,images_1_feat_79,images_1_feat_80,images_1_feat_81,images_1_feat_82,images_1_feat_83,images_1_feat_84,images_1_feat_85,images_1_feat_86,images_1_feat_87,images_1_feat_88,images_1_feat_89,images_1_feat_90,images_1_feat_91,images_1_feat_92,images_1_feat_93,images_1_feat_94,images_1_feat_95,images_1_feat_96,images_1_feat_97,images_1_feat_98,images_1_feat_99,images_1_feat_100,images_1_feat_101,images_1_feat_102,images_1_feat_103,images_1_feat_104,images_1_feat_105,images_1_feat_106,images_1_feat_107,images_1_feat_108,images_1_feat_109,images_1_feat_110,images_1_feat_111,images_1_feat_112,images_1_feat_113,images_1_feat_114,images_1_feat_115,images_1_feat_116,images_1_feat_117,images_1_feat_118,images_1_feat_119,images_1_feat_120,images_1_feat_121,images_1_feat_122,images_1_feat_123,images_1_feat_124,images_1_feat_125,images_1_feat_126,images_1_feat_127,images_1_feat_128,images_1_feat_129,images_1_feat_130,images_1_feat_131,images_1_feat_132,images_1_feat_133,images_1_feat_134,images_1_feat_135,images_1_feat_136,images_1_feat_137,images_1_feat_138,images_1_feat_139,images_1_feat_140,images_1_feat_141,images_1_feat_142,images_1_feat_143,images_1_feat_144,images_1_feat_145,images_1_feat_146,images_1_feat_147,images_1_feat_148,images_1_feat_149,images_1_feat_150,images_1_feat_151,images_1_feat_152,images_1_feat_153,images_1_feat_154,images_1_feat_155,images_1_feat_156,images_1_feat_157,images_1_feat_158,images_1_feat_159,images_1_feat_160,images_1_feat_161,images_1_feat_162,images_1_feat_163,images_1_feat_164,images_1_feat_165,images_1_feat_166,images_1_feat_167,images_1_feat_168,images_1_feat_169,images_1_feat_170,images_1_feat_171,images_1_feat_172,images_1_feat_173,images_1_feat_174,images_1_feat_175,images_1_feat_176,images_1_feat_177,images_1_feat_178,images_1_feat_179,images_1_feat_180,images_1_feat_181,images_1_feat_182,images_1_feat_183,images_1_feat_184,images_1_feat_185,images_1_feat_186,images_1_feat_187,images_1_feat_188,images_1_feat_189,images_1_feat_190,images_1_feat_191,images_1_feat_192,images_1_feat_193,images_1_feat_194,images_1_feat_195,images_1_feat_196,images_1_feat_197,images_1_feat_198,images_1_feat_199,images_1_feat_200,images_1_feat_201,images_1_feat_202,images_1_feat_203,images_1_feat_204,images_1_feat_205,images_1_feat_206,images_1_feat_207,images_1_feat_208,images_1_feat_209,images_1_feat_210,images_1_feat_211,images_1_feat_212,images_1_feat_213,images_1_feat_214,images_1_feat_215,images_1_feat_216,images_1_feat_217,images_1_feat_218,images_1_feat_219,images_1_feat_220,images_1_feat_221,images_1_feat_222,images_1_feat_223,images_1_feat_224,images_1_feat_225,images_1_feat_226,images_1_feat_227,images_1_feat_228,images_1_feat_229,images_1_feat_230,images_1_feat_231,images_1_feat_232,images_1_feat_233,images_1_feat_234,images_1_feat_235,images_1_feat_236,images_1_feat_237,images_1_feat_238,images_1_feat_239,images_1_feat_240,images_1_feat_241,images_1_feat_242,images_1_feat_243,images_1_feat_244,images_1_feat_245,images_1_feat_246,images_1_feat_247,images_1_feat_248,images_1_feat_249,images_1_feat_250,images_1_feat_251,images_1_feat_252,images_1_feat_253,images_1_feat_254,images_1_feat_255,images_2_missing,images_2_feat_0,images_2_feat_1,images_2_feat_2,images_2_feat_3,images_2_feat_4,images_2_feat_5,images_2_feat_6,images_2_feat_7,images_2_feat_8,images_2_feat_9,images_2_feat_10,images_2_feat_11,images_2_feat_12,images_2_feat_13,images_2_feat_14,images_2_feat_15,images_2_feat_16,images_2_feat_17,images_2_feat_18,images_2_feat_19,images_2_feat_20,images_2_feat_21,images_2_feat_22,images_2_feat_23,images_2_feat_24,images_2_feat_25,images_2_feat_26,images_2_feat_27,images_2_feat_28,images_2_feat_29,images_2_feat_30,images_2_feat_31,images_2_feat_32,images_2_feat_33,images_2_feat_34,images_2_feat_35,images_2_feat_36,images_2_feat_37,images_2_feat_38,images_2_feat_39,images_2_feat_40,images_2_feat_41,images_2_feat_42,images_2_feat_43,images_2_feat_44,images_2_feat_45,images_2_feat_46,images_2_feat_47,images_2_feat_48,images_2_feat_49,images_2_feat_50,images_2_feat_51,images_2_feat_52,images_2_feat_53,images_2_feat_54,images_2_feat_55,images_2_feat_56,images_2_feat_57,images_2_feat_58,images_2_feat_59,images_2_feat_60,images_2_feat_61,images_2_feat_62,images_2_feat_63,images_2_feat_64,images_2_feat_65,images_2_feat_66,images_2_feat_67,images_2_feat_68,images_2_feat_69,images_2_feat_70,images_2_feat_71,images_2_feat_72,images_2_feat_73,images_2_feat_74,images_2_feat_75,images_2_feat_76,images_2_feat_77,images_2_feat_78,images_2_feat_79,images_2_feat_80,images_2_feat_81,images_2_feat_82,images_2_feat_83,images_2_feat_84,images_2_feat_85,images_2_feat_86,images_2_feat_87,images_2_feat_88,images_2_feat_89,images_2_feat_90,images_2_feat_91,images_2_feat_92,images_2_feat_93,images_2_feat_94,images_2_feat_95,images_2_feat_96,images_2_feat_97,images_2_feat_98,images_2_feat_99,images_2_feat_100,images_2_feat_101,images_2_feat_102,images_2_feat_103,images_2_feat_104,images_2_feat_105,images_2_feat_106,images_2_feat_107,images_2_feat_108,images_2_feat_109,images_2_feat_110,images_2_feat_111,images_2_feat_112,images_2_feat_113,images_2_feat_114,images_2_feat_115,images_2_feat_116,images_2_feat_117,images_2_feat_118,images_2_feat_119,images_2_feat_120,images_2_feat_121,images_2_feat_122,images_2_feat_123,images_2_feat_124,images_2_feat_125,images_2_feat_126,images_2_feat_127,images_2_feat_128,images_2_feat_129,images_2_feat_130,images_2_feat_131,images_2_feat_132,images_2_feat_133,images_2_feat_134,images_2_feat_135,images_2_feat_136,images_2_feat_137,images_2_feat_138,images_2_feat_139,images_2_feat_140,images_2_feat_141,images_2_feat_142,images_2_feat_143,images_2_feat_144,images_2_feat_145,images_2_feat_146,images_2_feat_147,images_2_feat_148,images_2_feat_149,images_2_feat_150,images_2_feat_151,images_2_feat_152,images_2_feat_153,images_2_feat_154,images_2_feat_155,images_2_feat_156,images_2_feat_157,images_2_feat_158,images_2_feat_159,images_2_feat_160,images_2_feat_161,images_2_feat_162,images_2_feat_163,images_2_feat_164,images_2_feat_165,images_2_feat_166,images_2_feat_167,images_2_feat_168,images_2_feat_169,images_2_feat_170,images_2_feat_171,images_2_feat_172,images_2_feat_173,images_2_feat_174,images_2_feat_175,images_2_feat_176,images_2_feat_177,images_2_feat_178,images_2_feat_179,images_2_feat_180,images_2_feat_181,images_2_feat_182,images_2_feat_183,images_2_feat_184,images_2_feat_185,images_2_feat_186,images_2_feat_187,images_2_feat_188,images_2_feat_189,images_2_feat_190,images_2_feat_191,images_2_feat_192,images_2_feat_193,images_2_feat_194,images_2_feat_195,images_2_feat_196,images_2_feat_197,images_2_feat_198,images_2_feat_199,images_2_feat_200,images_2_feat_201,images_2_feat_202,images_2_feat_203,images_2_feat_204,images_2_feat_205,images_2_feat_206,images_2_feat_207,images_2_feat_208,images_2_feat_209,images_2_feat_210,images_2_feat_211,images_2_feat_212,images_2_feat_213,images_2_feat_214,images_2_feat_215,images_2_feat_216,images_2_feat_217,images_2_feat_218,images_2_feat_219,images_2_feat_220,images_2_feat_221,images_2_feat_222,images_2_feat_223,images_2_feat_224,images_2_feat_225,images_2_feat_226,images_2_feat_227,images_2_feat_228,images_2_feat_229,images_2_feat_230,images_2_feat_231,images_2_feat_232,images_2_feat_233,images_2_feat_234,images_2_feat_235,images_2_feat_236,images_2_feat_237,images_2_feat_238,images_2_feat_239,images_2_feat_240,images_2_feat_241,images_2_feat_242,images_2_feat_243,images_2_feat_244,images_2_feat_245,images_2_feat_246,images_2_feat_247,images_2_feat_248,images_2_feat_249,images_2_feat_250,images_2_feat_251,images_2_feat_252,images_2_feat_253,images_2_feat_254,images_2_feat_255
2 | False,0.032151844,3.5865228,1.315752,0.804667,0.9892403,3.570376,0.9755024,0.06260121,0.57173294,2.2252495,0.04281974,3.7095346,2.0525513,0.39772993,0.44111317,0.10065977,3.1001778,2.2169387,1.0426998,0.56015956,2.8175716,2.8769276,0.14155844,1.7427983,0.6826789,0.906466,3.653263,2.8122704,2.056664,0.55359733,0.481095,0.42757812,2.7485971,2.0675704,1.59769,2.4581704,0.88472706,2.3425405,2.0865939,1.2073997,4.3178005,0.0,0.87065583,1.3854185,0.0,2.7837539,3.3492265,3.1671135,0.6037793,1.2034793,0.40124676,1.9381127,0.2644626,2.158427,2.626845,0.06318747,0.926091,0.120606825,0.33433396,0.041057963,1.1146489,0.30635342,5.105906,2.7624483,2.1196234,1.2709795,3.7733057,0.15652497,0.060139555,3.8668485,0.58167386,3.4411852,0.38403928,3.1890352,0.95553064,0.16235399,2.2984002,0.19702272,2.8443837,2.64459,3.6794007,1.4597358,4.6320596,2.750855,2.5951371,0.17585137,1.304515,0.86632884,0.75128,0.02251982,0.13168602,0.50655293,0.5846592,3.4358816,0.24995285,5.4886694,1.1982974,1.8626316,0.059135407,2.1056836,2.0951126,0.699353,1.8674519,2.1154482,0.5804168,1.5129423,0.4627264,1.7317951,1.7663311,1.8727616,0.27453607,5.1676955,1.366602,1.6635038,1.1775745,0.3147263,7.648309,0.2309112,0.7246571,2.0933318,4.5853605,2.7120612,1.4000283,0.2896651,2.2420053,0.6363425,0.56835556,0.09605834,4.0282965,1.3860903,5.6501994,9.260081,3.448799,0.0,0.57330173,0.0,2.2107913,0.54027116,5.406924,0.27231407,0.2444771,0.096003436,0.29615045,0.481741,4.4590473,2.9705963,2.585092,1.1812321,0.24411564,1.9902232,0.39232555,0.0,3.9584,0.76242936,1.6751702,0.9098742,1.0107231,0.43173075,3.0560253,0.4309674,2.9484415,0.45585212,0.094824165,0.42306298,0.0,6.8198256,0.34639448,4.397487,1.4070835,1.2396935,0.9451138,2.4347641,0.18908042,0.32427,3.1504536,7.7099295,4.11038,3.00638,3.0174673,0.6955748,1.2137029,0.7294628,0.03256593,0.45546973,0.15246361,0.01944031,2.2526162,0.032236118,0.35721314,1.3768663,1.0753764,4.0671473,0.872133,2.8664467,4.890916,0.60187596,2.1615863,1.4286492,4.1911664,1.1179287,5.493279,1.1627977,0.859158,0.6117299,3.4715607,3.855235,0.23882593,0.4697149,1.5809593,1.3429917,0.40973657,1.6362605,0.0,2.1912305,0.004951306,1.107747,0.22422045,0.68739545,0.09393799,2.8229027,2.9547968,0.27820018,0.029774254,0.37290597,1.5659297,0.20969544,1.1468534,3.5758927,0.93914914,3.1536644,1.5091945,2.2708817,0.2799891,0.1270991,1.4212998,0.09309016,0.3834405,2.3143768,4.773201,1.1998423,1.6979942,0.71103805,0.62321854,1.3471316,2.73194,1.3019589,0.5928317,1.1895822,2.2769551,1.3584654,1.1736261,4.7884135,1.0209852,5.6817183,0.18072754,0.2752757,False,0.7829944,2.597283,1.6106628,2.0120049,2.9870737,2.3465264,1.0001415,0.2743848,1.2728014,1.1420937,0.8397067,0.4207223,0.45487517,0.839304,0.54279983,0.25321797,0.28857684,0.815493,2.123739,1.1415942,3.674873,0.81449133,0.4482375,1.0009123,1.8147713,4.667472,2.8826227,2.5312274,1.9435756,1.9872192,1.2261478,1.6609036,0.99615574,1.8712169,2.2789888,3.9922247,3.260493,0.6086244,3.6616082,0.9499898,0.27137852,0.13832209,0.6840669,2.2144208,0.1592703,2.0724945,2.9936137,4.9199324,1.3068136,0.57409656,1.2921237,1.7631783,0.53088117,2.0909026,0.27662548,0.8248203,0.7007468,1.2622775,0.7498356,0.49382013,4.415209,0.80923086,4.082539,1.8295598,2.4595737,1.0801203,1.9241952,0.6621319,0.0613492,1.1559494,1.981123,0.97952366,2.0475883,1.4752805,2.3019457,1.572546,1.7999018,1.2200348,1.1963491,6.842017,1.7239827,1.8578459,2.1301274,1.6793618,3.4350028,1.3809859,2.727663,2.510665,0.3335942,0.667192,0.4415267,1.8940499,0.36955318,0.5122144,0.34599268,0.68305147,4.2878857,1.0472362,1.7063048,1.8210968,0.6146735,3.9977703,0.8800388,0.85570693,0.07618098,1.6891319,1.3662521,3.4621994,0.9804348,1.7198968,0.8756955,4.3309813,0.77023315,0.56986344,0.61684006,0.0029520679,7.4704833,0.61182743,2.4819684,5.986584,3.6903899,2.1952245,0.6222894,1.8522813,3.3412242,1.9820032,3.7619264,1.5274217,0.93579257,0.2563519,1.8535069,0.27781856,0.65783703,0.58371305,0.21301693,0.5961987,0.5160588,0.10464815,1.9794508,1.4264858,0.36068118,1.8733753,0.5190635,1.5237458,0.75259805,4.06965,0.061819263,2.4436593,0.07864416,2.5051663,1.1794075,0.0,2.35141,1.256387,0.1439947,0.98724145,0.041590482,2.3698726,0.9295872,0.24698284,2.8722682,0.91860706,0.41244105,0.10176086,0.5989001,1.5418699,0.09315057,1.9787041,0.025677746,0.48486215,0.25193256,0.020618636,0.42106694,0.5053759,4.4693975,1.1767944,4.022005,1.9706548,1.5725816,2.5363965,1.5555301,0.11872846,0.40605742,0.7675878,0.094249435,0.111403614,0.3645131,2.0698047,0.017331887,5.015177,1.2359357,0.27879807,1.3524399,4.431946,2.1064017,2.8836966,0.82856137,0.9862207,9.753189,0.053063877,8.723235,6.5318832,0.6903504,0.33283803,0.77845156,1.845464,0.72751486,0.18826005,0.34838247,0.059819058,1.1895704,0.95769966,0.5084317,1.9484046,6.4253116,0.49329814,0.017987927,2.2090611,0.04371542,3.7869196,1.8632673,0.12691146,2.5845535,0.17003697,3.5108294,0.0,1.4122294,1.7249274,0.047432363,2.2345276,3.8117323,3.7325604,3.056432,4.7704563,0.71068263,0.25253958,0.17696574,0.14435597,6.178694,4.5091166,0.5270402,5.64732,0.031497717,2.7798765,1.3877457,0.73459536,1.2504846,1.5332531,0.7497327,0.37461254,1.0438362,3.4226842,0.10846566,2.540545,0.25807247,0.25016823
3 | False,1.2973549,2.779633,0.9494208,2.5105188,2.7121065,2.0448234,0.0472535,3.1718943,1.0634396,5.7031646,1.103537,1.4963284,1.9596689,1.3705348,1.9444867,1.7094721,1.1730187,1.1173782,1.0768019,2.520956,1.3528627,4.5925736,0.007588762,0.8484262,0.6275096,2.7608387,5.0758476,0.58671534,3.2255235,3.0454736,1.733035,0.9707861,1.0911319,0.326329,1.1117435,2.7132492,2.568462,0.15296715,0.6704634,1.2517681,0.48371023,0.16278934,1.6980617,1.745913,0.52908194,4.8849587,1.1777608,1.6761075,2.4006038,0.87214226,1.4465358,0.906434,0.6886395,2.0376048,0.020290632,1.3008869,1.3024576,2.155993,0.4584489,0.9253831,1.6336017,0.2229599,0.62131846,5.870721,2.785268,0.7794752,1.1661106,0.7074803,0.3407319,5.220402,2.08504,2.417098,0.9916717,1.9076953,1.2484546,0.5387912,2.7741878,1.5256987,3.4605796,5.4705353,3.8950965,2.7630486,0.7188807,1.253458,2.4088879,0.5115211,1.618436,1.5672191,1.3406109,1.1401877,1.1657795,1.6271309,0.36848414,1.9491478,2.7534347,1.596854,5.4310474,2.0242968,3.241643,3.251251,2.4184682,1.061899,1.1306692,1.8683975,1.9138647,0.62077534,0.005869871,3.8036191,2.2703784,1.6631231,0.10286021,6.3601527,1.4534345,2.11217,1.6889715,0.2347922,5.9396753,0.27994993,1.1644124,4.8152323,8.036865,0.45409513,3.532935,0.26353627,2.1998327,0.47719368,2.2760148,0.2008299,1.0802838,1.6741639,9.884589,0.21140212,7.8463697,1.7912495,0.8336122,0.66897905,4.8203382,1.4034338,6.182318,0.9290116,0.042694993,3.1729019,0.05834817,0.9500127,0.8811528,1.6564263,0.70969474,7.372909,0.18731722,1.0693262,0.42915303,0.11684756,0.63467234,0.5071322,4.0446596,1.086068,0.69499195,3.2423606,8.108876,1.1891484,3.0987935,1.6485219,0.11179957,0.10850499,0.73610526,3.1428783,0.2368296,7.0709133,0.5951422,0.17270136,2.747956,1.464048,2.7925825,0.6177126,2.5201914,2.5149488,4.374128,1.2957394,4.888344,1.8237067,0.1642726,0.13920766,0.6992472,0.59239006,0.59988666,0.0,1.0511605,2.4978602,1.3369558,4.8894362,0.113422334,3.9501476,2.3356068,4.203899,4.652974,1.8185134,1.085465,1.3543227,4.3086,0.23347898,9.77492,2.905971,0.6015967,5.705185,2.3556542,4.266894,5.134466,0.45402452,3.4495974,4.329967,0.48283353,1.7340055,0.18773556,2.9695694,2.8022137,1.6396581,3.8775566,1.0567441,0.034440737,4.3693085,3.596464,1.3946433,1.534651,2.5908422,3.671822,0.82779104,1.3255091,1.3221387,2.3894808,2.5349681,3.8877697,1.7130836,2.3333974,5.419678,2.416532,0.2528809,2.0226789,0.3256312,1.7676603,2.7799501,4.140674,2.044122,0.913922,9.626306,4.4931726,1.7620943,3.2074025,2.1241446,2.8715286,2.0327747,2.6395087,8.425077,0.29309487,2.8932698,1.3778216,0.6542087,True,0.0,0.13000804,0.0,0.0,0.002037752,0.0,0.0,0.0,0.0,0.06111543,0.0,6.702346,1.4267697,3.938085,0.7040212,0.0,0.0,1.0116627,0.0,4.539293,0.0,1.1017358,2.827432,0.0,0.0,0.0,1.3913443,0.33522812,0.0,3.5802555,0.0,0.0,3.273904,0.0,0.0,0.108908735,0.44315967,0.0,0.048082244,1.808021,2.3631117,0.93263125,0.25839823,0.0,0.0,1.5541325,4.5023203,2.4522083,0.5370079,0.0,3.1309,0.0,0.0,0.00025149493,0.0,0.0,1.598844,0.0,0.0,0.0,3.6755223,1.5220081,0.17540026,4.2726383,0.0,0.0,1.0369809,1.8130282,1.1478732,0.070069,0.0,0.0,0.37161943,0.0,2.6920598,1.8049885,0.011324564,0.1839397,0.0,0.0,0.14800237,3.2332604,0.0,0.0,0.0,5.175657,0.3173036,1.5077751,3.795833,0.0,0.0,0.0,0.002702519,0.60783273,7.8407364,0.6542296,0.011128779,0.0,0.0,3.0155509,0.0,0.1969728,0.2911376,1.9353493,0.0,0.0,0.31412342,0.06156551,0.0,0.0,2.501566,0.0,3.511222,0.54997545,0.0,6.5137396,4.60137,3.5791976,3.2295272,0.030881694,0.0,0.0,0.0,0.044022877,1.2871318,1.7206844,1.423317,0.00012220179,0.0,0.018707046,0.0,0.063907124,7.7610717,1.8263488,0.0,2.0830753,4.024913,3.679568,0.26151466,1.841901,0.09919314,0.19268763,0.28817415,0.12974192,0.14141828,4.436533,0.0,0.46092474,0.10102605,2.2220867,0.0,6.958406,0.1396151,2.0149689,5.698128,0.35020724,4.1179953,0.033450875,0.0048411437,0.0,0.876317,0.9527694,0.6799472,0.0,1.1751864,0.009888711,0.41615263,4.371997,0.0,0.0029504623,6.440926,9.185331,6.884625,5.5525703,0.0,0.3216756,0.16928916,0.34024408,0.0,0.1840653,0.00047650983,9.906516e-05,0.0,0.17830971,0.40046695,3.0138555,0.0,0.3414072,2.1366544,2.0524268,0.059395626,0.0,0.7721342,0.0024322374,0.030976947,0.0,1.9868054,0.045436963,0.0,0.0,0.009095855,0.695086,0.00048830017,6.8302617,1.7030824,0.011468042,0.0,2.9505427,0.16068885,0.0001173876,0.0,0.18750118,0.036273684,0.0,0.018228091,4.979061,2.84645,3.6533518,0.023549175,0.7373421,2.2333076,3.1135373,0.0,7.839826,0.09752249,4.4298525,1.2061259,0.0,0.09014438,1.6528475,0.0586081,0.0,4.721425,0.1638548,0.0016590456,0.16397965,0.085280664,0.0,0.5875555,0.020818662,0.019216344,4.4206805,0.69836605,0.0,0.021653343,0.042921916,0.02777873,0.0011217439,0.4927751,0.0,0.0,0.28798205,0.0,9.798935,0.27450448,0.045702275
4 | True,0.0,0.13000804,0.0,0.0,0.002037752,0.0,0.0,0.0,0.0,0.06111543,0.0,6.702346,1.4267697,3.938085,0.7040212,0.0,0.0,1.0116627,0.0,4.539293,0.0,1.1017358,2.827432,0.0,0.0,0.0,1.3913443,0.33522812,0.0,3.5802555,0.0,0.0,3.273904,0.0,0.0,0.108908735,0.44315967,0.0,0.048082244,1.808021,2.3631117,0.93263125,0.25839823,0.0,0.0,1.5541325,4.5023203,2.4522083,0.5370079,0.0,3.1309,0.0,0.0,0.00025149493,0.0,0.0,1.598844,0.0,0.0,0.0,3.6755223,1.5220081,0.17540026,4.2726383,0.0,0.0,1.0369809,1.8130282,1.1478732,0.070069,0.0,0.0,0.37161943,0.0,2.6920598,1.8049885,0.011324564,0.1839397,0.0,0.0,0.14800237,3.2332604,0.0,0.0,0.0,5.175657,0.3173036,1.5077751,3.795833,0.0,0.0,0.0,0.002702519,0.60783273,7.8407364,0.6542296,0.011128779,0.0,0.0,3.0155509,0.0,0.1969728,0.2911376,1.9353493,0.0,0.0,0.31412342,0.06156551,0.0,0.0,2.501566,0.0,3.511222,0.54997545,0.0,6.5137396,4.60137,3.5791976,3.2295272,0.030881694,0.0,0.0,0.0,0.044022877,1.2871318,1.7206844,1.423317,0.00012220179,0.0,0.018707046,0.0,0.063907124,7.7610717,1.8263488,0.0,2.0830753,4.024913,3.679568,0.26151466,1.841901,0.09919314,0.19268763,0.28817415,0.12974192,0.14141828,4.436533,0.0,0.46092474,0.10102605,2.2220867,0.0,6.958406,0.1396151,2.0149689,5.698128,0.35020724,4.1179953,0.033450875,0.0048411437,0.0,0.876317,0.9527694,0.6799472,0.0,1.1751864,0.009888711,0.41615263,4.371997,0.0,0.0029504623,6.440926,9.185331,6.884625,5.5525703,0.0,0.3216756,0.16928916,0.34024408,0.0,0.1840653,0.00047650983,9.906516e-05,0.0,0.17830971,0.40046695,3.0138555,0.0,0.3414072,2.1366544,2.0524268,0.059395626,0.0,0.7721342,0.0024322374,0.030976947,0.0,1.9868054,0.045436963,0.0,0.0,0.009095855,0.695086,0.00048830017,6.8302617,1.7030824,0.011468042,0.0,2.9505427,0.16068885,0.0001173876,0.0,0.18750118,0.036273684,0.0,0.018228091,4.979061,2.84645,3.6533518,0.023549175,0.7373421,2.2333076,3.1135373,0.0,7.839826,0.09752249,4.4298525,1.2061259,0.0,0.09014438,1.6528475,0.0586081,0.0,4.721425,0.1638548,0.0016590456,0.16397965,0.085280664,0.0,0.5875555,0.020818662,0.019216344,4.4206805,0.69836605,0.0,0.021653343,0.042921916,0.02777873,0.0011217439,0.4927751,0.0,0.0,0.28798205,0.0,9.798935,0.27450448,0.045702275,True,0.0,0.13000804,0.0,0.0,0.002037752,0.0,0.0,0.0,0.0,0.06111543,0.0,6.702346,1.4267697,3.938085,0.7040212,0.0,0.0,1.0116627,0.0,4.539293,0.0,1.1017358,2.827432,0.0,0.0,0.0,1.3913443,0.33522812,0.0,3.5802555,0.0,0.0,3.273904,0.0,0.0,0.108908735,0.44315967,0.0,0.048082244,1.808021,2.3631117,0.93263125,0.25839823,0.0,0.0,1.5541325,4.5023203,2.4522083,0.5370079,0.0,3.1309,0.0,0.0,0.00025149493,0.0,0.0,1.598844,0.0,0.0,0.0,3.6755223,1.5220081,0.17540026,4.2726383,0.0,0.0,1.0369809,1.8130282,1.1478732,0.070069,0.0,0.0,0.37161943,0.0,2.6920598,1.8049885,0.011324564,0.1839397,0.0,0.0,0.14800237,3.2332604,0.0,0.0,0.0,5.175657,0.3173036,1.5077751,3.795833,0.0,0.0,0.0,0.002702519,0.60783273,7.8407364,0.6542296,0.011128779,0.0,0.0,3.0155509,0.0,0.1969728,0.2911376,1.9353493,0.0,0.0,0.31412342,0.06156551,0.0,0.0,2.501566,0.0,3.511222,0.54997545,0.0,6.5137396,4.60137,3.5791976,3.2295272,0.030881694,0.0,0.0,0.0,0.044022877,1.2871318,1.7206844,1.423317,0.00012220179,0.0,0.018707046,0.0,0.063907124,7.7610717,1.8263488,0.0,2.0830753,4.024913,3.679568,0.26151466,1.841901,0.09919314,0.19268763,0.28817415,0.12974192,0.14141828,4.436533,0.0,0.46092474,0.10102605,2.2220867,0.0,6.958406,0.1396151,2.0149689,5.698128,0.35020724,4.1179953,0.033450875,0.0048411437,0.0,0.876317,0.9527694,0.6799472,0.0,1.1751864,0.009888711,0.41615263,4.371997,0.0,0.0029504623,6.440926,9.185331,6.884625,5.5525703,0.0,0.3216756,0.16928916,0.34024408,0.0,0.1840653,0.00047650983,9.906516e-05,0.0,0.17830971,0.40046695,3.0138555,0.0,0.3414072,2.1366544,2.0524268,0.059395626,0.0,0.7721342,0.0024322374,0.030976947,0.0,1.9868054,0.045436963,0.0,0.0,0.009095855,0.695086,0.00048830017,6.8302617,1.7030824,0.011468042,0.0,2.9505427,0.16068885,0.0001173876,0.0,0.18750118,0.036273684,0.0,0.018228091,4.979061,2.84645,3.6533518,0.023549175,0.7373421,2.2333076,3.1135373,0.0,7.839826,0.09752249,4.4298525,1.2061259,0.0,0.09014438,1.6528475,0.0586081,0.0,4.721425,0.1638548,0.0016590456,0.16397965,0.085280664,0.0,0.5875555,0.020818662,0.019216344,4.4206805,0.69836605,0.0,0.021653343,0.042921916,0.02777873,0.0011217439,0.4927751,0.0,0.0,0.28798205,0.0,9.798935,0.27450448,0.045702275
5 | 


--------------------------------------------------------------------------------
/tests/image_featurizer_testing/csv_checking/testing_data.csv:
--------------------------------------------------------------------------------
1 | images_1,images_2,derp
2 | arendt.bmp,borges.jpg,4
3 | sappho.png,,3
4 | ,,
5 | 


--------------------------------------------------------------------------------
/tests/test_build_featurizer.py:
--------------------------------------------------------------------------------
  1 | """Test the build_featurizer code."""
  2 | import os
  3 | import random
  4 | import warnings
  5 | import logging
  6 | 
  7 | import keras.backend as K
  8 | import numpy as np
  9 | import pytest
 10 | from keras.layers import Dense, Activation, Input
 11 | from keras.layers.merge import add
 12 | from keras.models import Sequential, Model
 13 | 
 14 | from pic2vec.build_featurizer import (_decapitate_model, _find_pooling_constant,
 15 |                                       _splice_layer, _downsample_model_features,
 16 |                                       _initialize_model, _check_downsampling_mismatch,
 17 |                                       build_featurizer)
 18 | 
 19 | from pic2vec.squeezenet import SqueezeNet
 20 | from pic2vec.enums import MODELS, ATOL
 21 | 
 22 | random.seed(5102020)
 23 | 
 24 | # Create tensor for splicing
 25 | SPLICING_TENSOR = K.constant(3, shape=(3, 12))
 26 | 
 27 | # Create featurization for finding the pooling constant
 28 | POOLING_FEATURES = K.constant(2, shape=(3, 60))
 29 | 
 30 | # Path to checking prediction arrays for each model in _initialize_model
 31 | INITIALIZED_MODEL_TEST_ARRAY = 'tests/build_featurizer_testing/{}_test_prediction.npy'
 32 | 
 33 | 
 34 | 
 35 | @pytest.fixture(scope='module')
 36 | def check_model():
 37 |     # Building the checking model
 38 |     input_layer = Input(shape=(100, ))
 39 |     layer = Dense(40)(input_layer)
 40 |     layer = Activation('relu')(layer)
 41 |     layer = Dense(20)(layer)
 42 |     layer = Activation('relu')(layer)
 43 |     layer = Dense(10)(layer)
 44 |     layer = Activation('relu')(layer)
 45 |     layer = Dense(5)(layer)
 46 |     output_layer = Activation('softmax')(layer)
 47 | 
 48 |     check_model = Model(inputs=input_layer, outputs=output_layer)
 49 | 
 50 |     return check_model
 51 | 
 52 | 
 53 | def test_decapitate_model_lazy_input():
 54 |     """Test an error is raised when the model has a lazy input layer initialization"""
 55 |     # Raise warning when model has lazy input layer initialization
 56 |     error_model = Sequential([
 57 |         Dense(40, input_shape=(100,)),
 58 |         Dense(20),
 59 |         Activation('softmax')])
 60 | 
 61 |     with warnings.catch_warnings(record=True) as warning_check:
 62 |         _decapitate_model(error_model, 1)
 63 |         assert len(warning_check) == 1
 64 |         assert "depth issues" in str(warning_check[-1].message)
 65 | 
 66 | 
 67 | def test_decapitate_model_too_deep(check_model):
 68 |     """Test error raised when model is decapitated too deep"""
 69 |     # Check for Value Error when passed a depth >= (# of layers in network) - 1
 70 |     with pytest.raises(ValueError):
 71 |         _decapitate_model(check_model, 8)
 72 | 
 73 | 
 74 | def test_decapitate_model(check_model):
 75 |     """
 76 |     This test creates a toy network, and checks that it calls the right errors
 77 |     and checks that it decapitates the network correctly:
 78 |     """
 79 |     # Create test model
 80 |     test_model = _decapitate_model(check_model, 5)
 81 | 
 82 |     # Make checks for all of the necessary features: the model outputs, the
 83 |     # last layer, the last layer's connections, and the last layer's shape
 84 |     assert test_model.layers[-1] == test_model.layers[3]
 85 |     assert test_model.layers[3].outbound_nodes == []
 86 |     assert test_model.outputs == [test_model.layers[3].output]
 87 |     assert test_model.layers[-1].output_shape == (None, 20)
 88 | 
 89 | 
 90 | def test_splice_layer_bad_split():
 91 |     """Check error with bad split on the tensor"""
 92 |     with pytest.raises(ValueError):
 93 |         _splice_layer(SPLICING_TENSOR, 5)
 94 | 
 95 | 
 96 | def test_splice_layer():
 97 |     """Test method splices tensors correctly"""
 98 |     # Create spliced and added layers via splicing function
 99 |     list_of_spliced_layers = _splice_layer(SPLICING_TENSOR, 3)
100 |     # Add each of the layers together
101 |     x = add(list_of_spliced_layers)
102 |     # Create the spliced and added layers by hand
103 |     check_layer = K.constant(9, shape=(3, 4))
104 |     # Check the math
105 |     assert np.allclose(K.eval(check_layer), K.eval(x), atol=ATOL)
106 | 
107 | 
108 | def test_find_pooling_constant_upsample():
109 |     """Test error when trying to upsample"""
110 |     with pytest.raises(ValueError):
111 |         _find_pooling_constant(POOLING_FEATURES, 120)
112 | 
113 | 
114 | def test_find_pooling_constant_bad_divisor():
115 |     """Test error when trying to downsample to a non-divisor of the features"""
116 |     with pytest.raises(ValueError):
117 |         _find_pooling_constant(POOLING_FEATURES, 40)
118 | 
119 |     with pytest.raises(ValueError):
120 |         _find_pooling_constant(POOLING_FEATURES, 0)
121 | 
122 | 
123 | def test_find_pooling_constant():
124 |     """Test that pooling constant given correct answer with good inputs"""
125 |     assert _find_pooling_constant(POOLING_FEATURES, 6) == 10
126 | 
127 | 
128 | def test_downsample_model_features():
129 |     """
130 |     Test creates a toy numpy array, and checks that the method
131 |     correctly downsamples the array into a hand-checked tensor
132 |     """
133 |     # Create the spliced and averaged tensor via downsampling function
134 |     array = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
135 |                       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
136 |                       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
137 |                       ])
138 |     tensor = K.variable(array)
139 | 
140 |     x = _downsample_model_features(tensor, 5)
141 | 
142 |     # Create the spliced and averaged tensor by hand
143 |     check_array = np.array([[1.5, 3.5, 5.5, 7.5, 9.5],
144 |                             [11.5, 13.5, 15.5, 17.5, 19.5],
145 |                             [21.5, 23.5, 25.5, 27.5, 29.5]
146 |                             ])
147 |     check_tensor = K.variable(check_array)
148 |     # Check that they are equal: that it returns the correct tensor
149 |     assert np.allclose(K.eval(check_tensor), K.eval(x), atol=ATOL)
150 | 
151 | 
152 | def test_check_downsampling_mismatch_bad_num_features():
153 |     """Raises error with autodownsampling an odd number of features"""
154 |     with pytest.raises(ValueError):
155 |         _check_downsampling_mismatch(True, 0, 2049)
156 | 
157 | 
158 | def test_check_downsampling_mismatch_autosample():
159 |     """Test method correctly autosamples"""
160 |     # Testing automatic downsampling
161 |     assert _check_downsampling_mismatch(True, 0, 2048) == (True, 1024)
162 | 
163 | 
164 | def test_check_downsampling_mismatch_no_sample():
165 |     """Test method correctly returns with no sampling"""
166 |     # Testing no downsampling
167 |     assert _check_downsampling_mismatch(False, 0, 2048) == (False, 0)
168 | 
169 | 
170 | def test_check_downsampling_mismatch_manual_sample():
171 |     """Test method correctly returns with manual sampling"""
172 |     # Testing manual downsampling
173 |     assert _check_downsampling_mismatch(False, 512, 2048) == (True, 512)
174 | 
175 | 
176 | def check_model_equal(model1, model2):
177 |     """Check whether two models are equal"""
178 |     # Testing models are the same from loaded weights and downloaded from keras
179 |     assert len(model1.layers) == len(model2.layers)
180 | 
181 |     for layer in range(len(model1.layers)):
182 |         for array in range(len(model1.layers[layer].get_weights())):
183 |             assert np.allclose(model1.layers[layer].get_weights()[array],
184 |                                model2.layers[layer].get_weights()[array], atol=ATOL)
185 | 
186 | 
187 | def test_initialize_model_weights_not_found():
188 |     """Test error raised when the model can't find weights to load"""
189 |     error_weight = 'htraenoytinutroppodnocesaevahtondideduti/losfosraeyderdnuhenootdenmednocsecar'
190 |     try:
191 |         assert not os.path.isfile(error_weight)
192 |     except AssertionError:
193 |         logging.error('Whoops, that mirage exists. '
194 |                       'Change error_weight to a file path that does not exist.')
195 | 
196 |     with pytest.raises(IOError):
197 |         _initialize_model('squeezenet', error_weight)
198 | 
199 | 
200 | def test_initialize_model_bad_weights():
201 |     """
202 |     Test error raised when the model finds the weights file,
203 |     but it's not the right format
204 |     """
205 |     bad_weights_file = open('bad_weights_test', 'w')
206 |     bad_weights_file.write('this should fail')
207 |     bad_weights_file.close()
208 |     error_weight = 'bad_weights_test'
209 | 
210 |     try:
211 |         with pytest.raises(IOError):
212 |             _initialize_model('squeezenet', error_weight)
213 |     finally:
214 |         os.remove(error_weight)
215 | 
216 | 
217 | def test_initialize_model_wrong_weights():
218 |     """Test error raised when weights exist but don't match model"""
219 |     squeeze_weight_path = 'pic2vec/saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5'
220 |     assert os.path.isfile(squeeze_weight_path)
221 | 
222 |     with pytest.raises(ValueError):
223 |         _initialize_model('vgg16', squeeze_weight_path)
224 | 
225 | 
226 | INITIALIZE_MODEL_CASES = [
227 |     ('squeezenet', [67], (1, 227, 227, 3)),
228 |     ('vgg16', [23], (1, 224, 224, 3)),
229 |     ('vgg19', [26], (1, 224, 224, 3)),
230 |     ('resnet50', [176, 177], (1, 224, 224, 3)),
231 |     ('inceptionv3', [313], (1, 299, 299, 3)),
232 |     ('xception', [134], (1, 299, 299, 3)),
233 | ]
234 | 
235 | 
236 | @pytest.mark.parametrize('model_str, expected_layers, test_size',
237 |                          INITIALIZE_MODEL_CASES, ids=MODELS)
238 | def test_initialize_model(model_str, expected_layers, test_size):
239 |     """Test the initializations of each model"""
240 |     model = _initialize_model(model_str)
241 | 
242 |     if model_str == 'squeezenet':
243 |         try:
244 |             model_downloaded_weights = SqueezeNet()
245 |         except Exception:
246 |             raise AssertionError('Problem loading SqueezeNet weights.')
247 |         check_model_equal(model, model_downloaded_weights)
248 | 
249 |     # Versions of Keras 2.1.5 and later sometimes use different numbers of layers for these models,
250 |     # without changing any behavior for predictions.
251 |     # This checks that the model uses at least one of the expected numbers of layers.
252 |     assert len(model.layers) in expected_layers
253 | 
254 |     # Create the test image to be predicted on
255 |     blank_image = np.zeros(test_size)
256 | 
257 |     # Pre-checked prediction
258 |     existing_test_array = np.load(INITIALIZED_MODEL_TEST_ARRAY.format(model_str))
259 | 
260 |     generated_test_array = model.predict_on_batch(blank_image)
261 | 
262 |     # Check that each model predicts correctly to see if weights were correctly loaded
263 |     assert np.allclose(generated_test_array, existing_test_array, atol=ATOL)
264 |     del model
265 | 
266 | 
267 | FEATURIZER_MODEL_DICT = dict.fromkeys(MODELS)
268 | FEAT_CASES = [  # squeezenet
269 |     (1, False, 128, 128, 'squeezenet'), (1, False, 0, 512, 'squeezenet'),
270 |     (1, True, 0, 256, 'squeezenet'), (2, True, 0, 256, 'squeezenet'),
271 |     (2, False, 128, 128, 'squeezenet'), (2, False, 0, 512, 'squeezenet'),
272 |     (3, False, 96, 96, 'squeezenet'), (3, False, 0, 384, 'squeezenet'),
273 |     (3, True, 0, 192, 'squeezenet'), (4, True, 0, 192, 'squeezenet'),
274 |     (4, False, 96, 96, 'squeezenet'), (4, False, 0, 384, 'squeezenet'),
275 | 
276 |     # vgg16
277 |     (1, False, 1024, 1024, 'vgg16'), (1, False, 0, 4096, 'vgg16'),
278 |     (1, True, 0, 2048, 'vgg16'), (2, True, 0, 2048, 'vgg16'),
279 |     (2, False, 1024, 1024, 'vgg16'), (2, False, 0, 4096, 'vgg16'),
280 |     (3, False, 128, 128, 'vgg16'), (3, False, 0, 512, 'vgg16'),
281 |     (3, True, 0, 256, 'vgg16'), (4, True, 0, 256, 'vgg16'),
282 |     (4, False, 128, 128, 'vgg16'), (4, False, 0, 512, 'vgg16'),
283 | 
284 |     # vgg19
285 |     (1, False, 1024, 1024, 'vgg19'), (1, False, 0, 4096, 'vgg19'),
286 |     (1, True, 0, 2048, 'vgg19'), (2, True, 0, 2048, 'vgg19'),
287 |     (2, False, 1024, 1024, 'vgg19'), (2, False, 0, 4096, 'vgg19'),
288 |     (3, False, 128, 128, 'vgg19'), (3, False, 0, 512, 'vgg19'),
289 |     (3, True, 0, 256, 'vgg19'), (4, True, 0, 256, 'vgg19'),
290 |     (4, False, 128, 128, 'vgg19'), (4, False, 0, 512, 'vgg19'),
291 | 
292 |     # resnet50
293 |     (1, False, 512, 512, 'resnet50'), (1, False, 0, 2048, 'resnet50'),
294 |     (1, True, 0, 1024, 'resnet50'), (2, True, 0, 1024, 'resnet50'),
295 |     (2, False, 512, 512, 'resnet50'), (2, False, 0, 2048, 'resnet50'),
296 |     (3, False, 512, 512, 'resnet50'), (3, False, 0, 2048, 'resnet50'),
297 |     (3, True, 0, 1024, 'resnet50'), (4, True, 0, 1024, 'resnet50'),
298 |     (4, False, 512, 512, 'resnet50'), (4, False, 0, 2048, 'resnet50'),
299 | 
300 |     # inceptionv3
301 |     (1, False, 512, 512, 'inceptionv3'), (1, False, 0, 2048, 'inceptionv3'),
302 |     (1, True, 0, 1024, 'inceptionv3'), (2, True, 0, 1024, 'inceptionv3'),
303 |     (2, False, 512, 512, 'inceptionv3'), (2, False, 0, 2048, 'inceptionv3'),
304 |     (3, False, 512, 512, 'inceptionv3'), (3, False, 0, 2048, 'inceptionv3'),
305 |     (3, True, 0, 1024, 'inceptionv3'), (4, True, 0, 640, 'inceptionv3'),
306 |     (4, False, 320, 320, 'inceptionv3'), (4, False, 0, 1280, 'inceptionv3'),
307 | 
308 |     # xception
309 |     (1, False, 512, 512, 'xception'), (1, False, 0, 2048, 'xception'),
310 |     (1, True, 0, 1024, 'xception'), (2, True, 0, 512, 'xception'),
311 |     (2, False, 256, 256, 'xception'), (2, False, 0, 1024, 'xception'),
312 |     (3, False, 182, 182, 'xception'), (3, False, 0, 728, 'xception'),
313 |     (3, True, 0, 364, 'xception'), (4, True, 0, 364, 'xception'),
314 |     (4, False, 182, 182, 'xception'), (4, False, 0, 728, 'xception')
315 | ]
316 | 
317 | 
318 | @pytest.mark.parametrize('depth, autosample, sample_size, expected_size, model_str', FEAT_CASES)
319 | def test_build_featurizer(depth, autosample, sample_size, expected_size, model_str):
320 |     """Test all of the model iterations"""
321 |     if FEATURIZER_MODEL_DICT[model_str] is None:
322 |         FEATURIZER_MODEL_DICT[model_str] = _initialize_model(model_str)
323 | 
324 |     model = build_featurizer(depth, autosample, sample_size,
325 |                              model_str=model_str, loaded_model=FEATURIZER_MODEL_DICT[model_str])
326 |     assert model.layers[-1].output_shape == (None, expected_size)
327 |     del model
328 | 
329 | 
330 | if __name__ == '__main__':
331 |     test_decapitate_model()
332 |     test_splice_layer()
333 |     test_find_pooling_constant()
334 |     test_downsample_model_features()
335 |     test_initialize_model()
336 |     test_build_featurizer()
337 | 


--------------------------------------------------------------------------------
/tests/test_data_featurizing.py:
--------------------------------------------------------------------------------
  1 | """Test data_featurizing module"""
  2 | import numpy as np
  3 | import pandas as pd
  4 | import pytest
  5 | from keras.layers import Conv2D, Dense, Flatten
  6 | from keras.models import Sequential
  7 | 
  8 | from pic2vec.enums import ATOL
  9 | from pic2vec.data_featurizing import (featurize_data,
 10 |                                       create_features,
 11 |                                       _create_features_df_helper)
 12 | 
 13 | np.random.seed(5102020)
 14 | 
 15 | # The paths to the test csvs
 16 | CHECK_CSV_IMAGES_PATH = 'tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_images'
 17 | CHECK_CSV_FULL_PATH = 'tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full'
 18 | CHECK_CSV_FEATURES_ONLY_PATH = ('tests/data_featurizing_testing/csv_testing/'
 19 |                                 'featurize_data_check_csv_features_only')
 20 | 
 21 | 
 22 | # The mock array being treated as the vectorized data
 23 | check_data_temp = np.ones((4, 2, 2, 2))
 24 | check_data_temp[2] = np.zeros((2, 2, 2))
 25 | CHECK_DATA = check_data_temp
 26 | 
 27 | # The mock array being treated as the "full featurized data"
 28 | CHECK_ARRAY = np.array([[1., 2., 3.],
 29 |                         [4., 5., 6.],
 30 |                         [0., 0., 0.],
 31 |                         [7., 8., 9.]
 32 |                         ])
 33 | 
 34 | # Create model
 35 | MODEL = Sequential([
 36 |     Conv2D(5, (3, 3), input_shape=(5, 5, 3), activation='relu'),
 37 |     Flatten(),
 38 |     Dense(5)
 39 | ])
 40 | 
 41 | 
 42 | def test_featurize_data_bad_array():
 43 |     """Test errors with a badly formatted array"""
 44 |     error_array = np.ones((5, 5, 10))
 45 | 
 46 |     with pytest.raises(ValueError):
 47 |         featurize_data(MODEL, error_array)
 48 | 
 49 | 
 50 | def test_featurize_data():
 51 |     """
 52 |     Test that the featurize_data model correctly outputs the features of a toy
 53 |     network on a toy tensor
 54 |     """
 55 |     # Create the checked array
 56 |     init_array = np.ones((5, 5, 5, 3))
 57 | 
 58 |     for i in range(5):
 59 |         init_array[i] = init_array[i] * i
 60 | 
 61 |     # Check the prediction vs. the saved array
 62 |     check_array = np.load('tests/data_featurizing_testing/array_testing/check_featurize.npy')
 63 |     assert np.allclose(featurize_data(MODEL, init_array), check_array, atol=ATOL)
 64 | 
 65 | 
 66 | def test_create_features_bad_feature_array():
 67 |     """
 68 |     Test that the model raises an error when a bad array
 69 |     is passed in (i.e. wrong shape)
 70 |     """
 71 |     # An error array with the wrong size
 72 |     error_feature_array = np.zeros((4, 3, 2))
 73 |     with pytest.raises(ValueError):
 74 |         create_features(CHECK_DATA, error_feature_array,
 75 |                         'image')
 76 | 
 77 | 
 78 | def test_features_to_csv_bad_data_array():
 79 |     """Raise error when a bad data array is passed (i.e. wrong shape)"""
 80 |     # An error array with the wrong size
 81 |     error_array = np.zeros((4, 3, 2))
 82 |     with pytest.raises(ValueError):
 83 |         create_features(error_array, CHECK_ARRAY, 'image')
 84 | 
 85 | 
 86 | def test_create_features_df_helper():
 87 |     """Test that the correct full array is created to be passed to the create_features function"""
 88 |     full_df_test = _create_features_df_helper(CHECK_DATA, CHECK_ARRAY, 'image')
 89 |     assert full_df_test.equals(pd.read_csv(CHECK_CSV_FULL_PATH))
 90 | 
 91 | 
 92 | def test_features_to_csv():
 93 |     """Test that the model creates the correct csvs from a toy array, csv, and image list"""
 94 |     # Create the test
 95 |     full_test_dataframe = create_features(CHECK_DATA, CHECK_ARRAY,
 96 |                                           'image')
 97 | 
 98 |     # Assert that the dataframes returned are correct
 99 |     assert full_test_dataframe.equals(pd.read_csv(CHECK_CSV_FULL_PATH))
100 | 


--------------------------------------------------------------------------------
/tests/test_feature_preprocessing.py:
--------------------------------------------------------------------------------
  1 | """Test feature_preprocessing module"""
  2 | import logging
  3 | import os
  4 | import random
  5 | import pandas as pd
  6 | import numpy as np
  7 | import pytest
  8 | 
  9 | from pic2vec.enums import ATOL
 10 | 
 11 | from pic2vec.feature_preprocessing import (_create_df_with_image_paths,
 12 |                                            _find_directory_image_paths,
 13 |                                            _find_csv_image_paths,
 14 |                                            _find_combined_image_paths,
 15 |                                            _image_paths_finder, _convert_single_image,
 16 |                                            preprocess_data,
 17 |                                            natural_key)
 18 | 
 19 | # Initialize seed to cut out any randomness (such as in image interpolation, etc)
 20 | random.seed(5102020)
 21 | 
 22 | # List of images used in testing
 23 | IMAGE_LIST_SINGLE = ['arendt.bmp', 'borges.jpg', 'sappho.png']
 24 | 
 25 | # Shared paths
 26 | IMAGE_PATH = 'tests/feature_preprocessing_testing/test_images/'
 27 | CSV_PATH = 'tests/feature_preprocessing_testing/csv_testing/'
 28 | IMAGE_ARRAY_PATH = 'tests/feature_preprocessing_testing/test_image_arrays/'
 29 | URL_PATH = '{}url_test'.format(CSV_PATH)
 30 | TEST_ARRAY = 'tests/feature_preprocessing_testing/test_preprocessing_arrays/{}.npy'
 31 | 
 32 | # Column headers
 33 | IMG_COL_HEAD = 'images'
 34 | NEW_IMG_COL_HEAD = 'new_images'
 35 | 
 36 | # Image lists for directory and url
 37 | URL_LIST = ['https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/borges.jpg',
 38 |             'https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/arendt.bmp',
 39 |             'https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/sappho.png'
 40 |             ]
 41 | 
 42 | # Preprocessing paths
 43 | DIRECTORY_CSV_PATH_PREPROCESS = '{}directory_preprocess_system_test'.format(CSV_PATH)
 44 | ERROR_NEW_CSV_PATH_PREPROCESS = '{}generated_error_preprocess_system_test'.format(CSV_PATH)
 45 | NEW_CSV_PATH_PREPROCESS = '{}generated_preprocess_system_test'.format(CSV_PATH)
 46 | COMBINED_LIST_PREPROCESS = ['', 'arendt.bmp', 'sappho.png', 'arendt.bmp']
 47 | ERROR_ROW_CSV = '{}error_row'.format(CSV_PATH)
 48 | 
 49 | # Loading image arrays
 50 | arendt_array = np.load(TEST_ARRAY.format('arendt'))
 51 | borges_array = np.load(TEST_ARRAY.format('borges'))
 52 | sappho_array = np.load(TEST_ARRAY.format('sappho'))
 53 | arendt_grayscale_array = np.load(TEST_ARRAY.format('arendt_grayscale'))
 54 | sappho_grayscale_array = np.load(TEST_ARRAY.format('sappho_grayscale'))
 55 | 
 56 | # Test arrays for build_featurizer
 57 | DIRECTORY_ARRAYS = [arendt_array, borges_array, sappho_array]
 58 | CSV_ARRAYS = [borges_array, arendt_array, sappho_array]
 59 | COMBINED_ARRAYS = [np.zeros((borges_array.shape)), arendt_array, sappho_array, arendt_array]
 60 | GRAYSCALE_ARRAYS = [np.zeros((arendt_grayscale_array.shape)), arendt_grayscale_array,
 61 |                     sappho_grayscale_array, arendt_grayscale_array]
 62 | BATCH_ARRAYS_DIR = [arendt_array, borges_array]
 63 | 
 64 | # ---- TESTING ---- #
 65 | 
 66 | 
 67 | def test_create_df_with_image_paths():
 68 |     """Test method creates csv correctly from list of images"""
 69 |     df = _create_df_with_image_paths(IMAGE_LIST_SINGLE, IMG_COL_HEAD)
 70 | 
 71 |     assert pd.read_csv('{}create_csv_check'.format(CSV_PATH)).equals(df)
 72 | 
 73 | 
 74 | def test_natural_sort():
 75 |     """Test the natural sort function"""
 76 |     unsorted_alphanumeric = ['1.jpg', '10.jpg', '2.jpg', '15.jpg', '20.jpg', '5.jpg']
 77 |     natural_sort = ['1.jpg', '2.jpg', '5.jpg', '10.jpg', '15.jpg', '20.jpg']
 78 |     assert natural_sort == sorted(unsorted_alphanumeric, key=natural_key)
 79 | 
 80 | 
 81 | def test_find_directory_image_paths():
 82 |     """
 83 |     Test method returns a sorted list of valid image files
 84 |     to be fed into the featurizer from a directory.
 85 |     """
 86 |     test_image_paths = _find_directory_image_paths(IMAGE_PATH)
 87 | 
 88 |     assert test_image_paths == IMAGE_LIST_SINGLE
 89 | 
 90 | 
 91 | def test_find_csv_image_paths():
 92 |     """Test method correctly finds image paths in the csv, and in the right order"""
 93 |     check_image_paths = ['borges.jpg', 'arendt.bmp', 'sappho.png']
 94 |     test_image_paths, df = _find_csv_image_paths('{}csv_image_path_check'.format(CSV_PATH),
 95 |                                                  IMG_COL_HEAD)
 96 | 
 97 |     with pytest.raises(ValueError):
 98 |         _find_csv_image_paths('{}csv_image_path_check'.format(CSV_PATH), 'Error Column')
 99 | 
100 |     assert test_image_paths == check_image_paths
101 |     assert pd.read_csv('{}csv_image_path_check'.format(CSV_PATH)).equals(df)
102 | 
103 | 
104 | def test_find_combined_image_paths():
105 |     """Test that method only returns images that overlap between directory and csv"""
106 |     check_image_paths = ['', 'arendt.bmp', 'sappho.png']
107 | 
108 |     invalid_csv_image_path = 'heidegger.png'
109 |     invalid_directory_image_path = 'borges.jpg'
110 | 
111 |     test_path, df = _find_combined_image_paths(IMAGE_PATH,
112 |                                                '{}directory_combined_image_path_test'
113 |                                                .format(CSV_PATH), IMG_COL_HEAD)
114 | 
115 |     with pytest.raises(ValueError):
116 |         _find_combined_image_paths(IMAGE_PATH,
117 |                                    '{}error_directory_combined_test'.format(CSV_PATH),
118 |                                    IMG_COL_HEAD)
119 | 
120 |     assert invalid_csv_image_path not in test_path
121 |     assert invalid_directory_image_path not in test_path
122 | 
123 |     assert check_image_paths == test_path
124 |     assert pd.read_csv('{}directory_combined_image_path_test'.format(CSV_PATH)).equals(df)
125 | 
126 | 
127 | CONVERT_IMAGE_CASES = [
128 |     ('url', URL_LIST[0]),
129 |     ('directory', '{}borges.jpg'.format(IMAGE_PATH))
130 | ]
131 | 
132 | 
133 | @pytest.mark.parametrize('grayscale', [None, True], ids=['RGB', 'grayscale'])
134 | @pytest.mark.parametrize('size', [(299, 299), (299, 467)], ids=['scaled', 'isotropic'])
135 | @pytest.mark.parametrize('image_source,image_path', CONVERT_IMAGE_CASES, ids=['url', 'directory'])
136 | def test_convert_single_image(image_source, image_path, size, grayscale):
137 |     """Test converting images from url and directory with options for size and grayscale"""
138 |     iso = ''
139 |     gscale = ''
140 | 
141 |     if size != (299, 299):
142 |         iso = '_isotropic'
143 |     if grayscale is not None:
144 |         gscale = '_grayscale'
145 | 
146 |     check_array = np.load('{path}image_test{isotropic}{grayscale}.npy'
147 |                           .format(path=IMAGE_ARRAY_PATH,
148 |                                   isotropic=iso,
149 |                                   grayscale=gscale))
150 | 
151 |     converted_image = _convert_single_image(image_source, 'xception', image_path, size, grayscale)
152 | 
153 |     assert np.allclose(check_array, converted_image, atol=ATOL)
154 | 
155 | 
156 | PATHS_FINDER_CASES = [
157 |     (IMAGE_PATH, '', NEW_IMG_COL_HEAD, IMAGE_LIST_SINGLE),
158 | 
159 |     ('', URL_PATH, IMG_COL_HEAD, URL_LIST),
160 | 
161 |     (IMAGE_PATH, '{}directory_combined_image_path_test'.format(CSV_PATH),
162 |      IMG_COL_HEAD, ['', 'arendt.bmp', 'sappho.png'])
163 | ]
164 | 
165 | 
166 | @pytest.mark.parametrize('image_path, csv_path, image_column_header, check_images',
167 |                          PATHS_FINDER_CASES, ids=['directory_only', 'csv_only', 'combined'])
168 | def test_image_paths_finder(image_path, csv_path, image_column_header, check_images):
169 |     """
170 |     Test the correct image paths returns for all three cases: directory only,
171 |     csv only, and combined csv + directory
172 |     """
173 |     # check the new csv doesn't already exist
174 |     # generated image lists
175 |     case, df = _image_paths_finder(image_path, csv_path, image_column_header)
176 | 
177 |     # Check the image lists match
178 |     assert case == check_images
179 | 
180 | 
181 | def test_preprocess_data_no_input():
182 |     """Raise error if no csv or directory is passed"""
183 |     with pytest.raises(ValueError):
184 |         preprocess_data(IMG_COL_HEAD, 'xception', [''])
185 | 
186 | 
187 | def test_preprocess_data_fake_dir():
188 |     """Raise an error if the image_path doesn't point to a real directory"""
189 |     error_dir = 'egaugnalymgnidnatsrednufoerusuoyera/emdaerohwuoy/'
190 |     try:
191 |         assert not os.path.isdir(error_dir)
192 |     except AssertionError:
193 |         logging.error('Whoops, that labyrinth exists. '
194 |                       'Change error_dir to a directory path that does not exist.')
195 |     with pytest.raises(TypeError):
196 |         preprocess_data(IMG_COL_HEAD, 'xception', list_of_images=IMAGE_LIST_SINGLE,
197 |                         image_path=error_dir)
198 | 
199 |     assert not os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS)
200 | 
201 | 
202 | @pytest.mark.xfail
203 | def test_preprocess_data_fake_csv():
204 |     """Raise an error if the csv_path doesn't point to a file"""
205 |     error_file = 'rehtonaybtmaerdecnaraeppaeremasawootehtahtdootsrednueh'
206 |     try:
207 |         assert not os.path.isfile(error_file)
208 |     except AssertionError:
209 |         logging.error(
210 |             'Whoops, that dreamer exists. change to error_file to a file path that does not exist.')
211 |     with pytest.raises(TypeError):
212 |         preprocess_data(IMG_COL_HEAD, 'xception', csv_path=error_file,
213 |                         list_of_images=IMAGE_LIST_SINGLE)
214 | 
215 |     assert not os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS)
216 | 
217 | 
218 | def test_preprocess_data_invalid_url_or_dir():
219 |     """Raise an error if the image in the column is an invalid path"""
220 |     preprocess_data(IMG_COL_HEAD, 'xception', list_of_images=IMAGE_LIST_SINGLE,
221 |                     csv_path=ERROR_ROW_CSV)
222 | 
223 | 
224 | def test_preprocess_data_invalid_model_str():
225 |     """Raise an error if the model_str is not a valid model"""
226 |     with pytest.raises(ValueError):
227 |         preprocess_data(IMG_COL_HEAD, 'derp', [''], csv_path=DIRECTORY_CSV_PATH_PREPROCESS)
228 | 
229 | 
230 | def compare_preprocessing(case, csv_path, check_arrays, image_list):
231 |     """Compare a case from a full preprocessing step with the expected values of that case"""
232 |     # Check correct number of images vectorized
233 |     assert len(case[0]) == len(check_arrays)
234 | 
235 |     for image in range(len(check_arrays)):
236 |         # Check all data vectors correctly generated
237 |         assert np.allclose(case[0][image], check_arrays[image], atol=ATOL)
238 | 
239 |     # csv path correctly returned as non-existent, and correct image list returned
240 |     assert case[1] == image_list
241 | 
242 | 
243 | @pytest.mark.xfail
244 | def test_preprocess_data_grayscale():
245 |     # Ensure the new csv doesn't already exist
246 |     if os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS):
247 |         os.remove(ERROR_NEW_CSV_PATH_PREPROCESS)
248 | 
249 |     # Create the full (data, csv_path, image_list) for each of the three cases
250 |     preprocessed_case = preprocess_data(IMG_COL_HEAD, 'xception', grayscale=True,
251 |                                         image_path=IMAGE_PATH,
252 |                                         csv_path=DIRECTORY_CSV_PATH_PREPROCESS)
253 | 
254 |     # Ensure a new csv wasn't created when they weren't needed
255 |     assert not os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS)
256 | 
257 |     compare_preprocessing(preprocessed_case, DIRECTORY_CSV_PATH_PREPROCESS,
258 |                           GRAYSCALE_ARRAYS, COMBINED_LIST_PREPROCESS)
259 | 
260 | 
261 | PREPROCESS_DATA_CASES = [
262 |     # Tests an image directory-only preprocessing step
263 |     (IMAGE_PATH, '',
264 |      DIRECTORY_ARRAYS, IMAGE_LIST_SINGLE),
265 | 
266 |     # Tests a CSV-only URL-based preprocessing step
267 |     ('', URL_PATH,
268 |      CSV_ARRAYS, URL_LIST),
269 | 
270 |     # Tests a combined directory+csv preprocessing step
271 |     (IMAGE_PATH, DIRECTORY_CSV_PATH_PREPROCESS,
272 |      COMBINED_ARRAYS, COMBINED_LIST_PREPROCESS),
273 | ]
274 | 
275 | 
276 | @pytest.mark.parametrize('image_path, csv_path, check_arrays, image_list',
277 |                          PREPROCESS_DATA_CASES, ids=['dir_only', 'csv_only', 'combined'])
278 | def test_preprocess_data(image_path, csv_path, check_arrays, image_list):
279 |     """
280 |     Full integration test: check for Type and Value errors for badly passed variables,
281 |     and make sure that the network preprocesses data correctly for all three cases.
282 |     """
283 | 
284 |     # Create the full (data, csv_path, image_list) for each of the three cases
285 |     preprocessed_case = preprocess_data(IMG_COL_HEAD, 'xception', list_of_images=image_list,
286 |                                         grayscale=False,
287 |                                         image_path=image_path, csv_path=csv_path)
288 | 
289 |     compare_preprocessing(preprocessed_case, csv_path, check_arrays, image_list)
290 | 


--------------------------------------------------------------------------------
/tests/test_image_featurizer.py:
--------------------------------------------------------------------------------
  1 | """Test the full featurizer class"""
  2 | import os
  3 | import pytest
  4 | import shutil
  5 | from mock import patch
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from pic2vec.image_featurizer import ImageFeaturizer, _create_csv_path, _named_path_finder
 11 | from pic2vec.enums import MODELS,  ATOL
 12 | 
 13 | # Constant paths
 14 | CSV_NAME = 'tests/image_featurizer_testing/csv_checking/testing_data.csv'
 15 | 
 16 | # List of images for datasets with single and multiple image columns
 17 | IMAGE_LIST_SINGLE = ['arendt.bmp', 'borges.jpg', 'sappho.png']
 18 | IMAGE_LIST_MULT = [['arendt.bmp', 'sappho.png', ''], ['borges.jpg', '', '']]
 19 | 
 20 | # CSVs and arrays for testing predictions on datasets with a single image column
 21 | CHECK_CSV_SINGLE = 'tests/image_featurizer_testing/csv_checking/{}_check_csv.csv'
 22 | CHECK_ARRAY_SINGLE = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}.npy'
 23 | 
 24 | # CSVs and arrays for testing predictions on datasets with multiple image columns
 25 | CHECK_CSV_MULT = 'tests/image_featurizer_testing/csv_checking/{}_check_csv_mult.csv'
 26 | CHECK_ARRAY_MULT = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}_mult.npy'
 27 | 
 28 | # Arguments to load the data into the featurizers
 29 | LOAD_DATA_ARGS_SINGLE = {
 30 |     'image_columns': 'images',
 31 |     'image_path': 'tests/feature_preprocessing_testing/test_images'
 32 | }
 33 | 
 34 | # Static expected attributes to compare with the featurizer attributes
 35 | COMPARE_ARGS = {
 36 |     'downsample_size': 0,
 37 |     'image_columns': ['images'],
 38 |     'automatic_downsample': False,
 39 |     'csv_path': '',
 40 |     'image_dict': {'images': IMAGE_LIST_SINGLE},
 41 |     'depth': 1
 42 | }
 43 | 
 44 | LOAD_DATA_ARGS_MULT_ERROR = {
 45 |     'image_columns': ['images_1', 'images_2'],
 46 |     'image_path': 'tests/feature_preprocessing_testing/test_images',
 47 | }
 48 | 
 49 | LOAD_DATA_ARGS_MULT = {
 50 |     'image_columns': ['images_1', 'images_2'],
 51 |     'image_path': 'tests/feature_preprocessing_testing/test_images',
 52 |     'csv_path': CSV_NAME
 53 | }
 54 | 
 55 | COMPARE_ARGS_MULT = {
 56 |     'downsample_size': 0,
 57 |     'image_columns': ['images_1', 'images_2'],
 58 |     'automatic_downsample': True,
 59 |     'csv_path': CSV_NAME,
 60 |     'image_dict': {'images_1': IMAGE_LIST_MULT[0], 'images_2': IMAGE_LIST_MULT[1]},
 61 |     'depth': 1,
 62 | }
 63 | # Variable attributes to load the featurizer with
 64 | LOAD_PARAMS = [
 65 |     ('squeezenet', (227, 227), CHECK_ARRAY_SINGLE.format('squeezenet')),
 66 |     ('vgg16', (224, 224), CHECK_ARRAY_SINGLE.format('vgg16')),
 67 |     ('vgg19', (224, 224), CHECK_ARRAY_SINGLE.format('vgg19')),
 68 |     ('resnet50', (224, 224), CHECK_ARRAY_SINGLE.format('resnet50')),
 69 |     ('inceptionv3', (299, 299), CHECK_ARRAY_SINGLE.format('inceptionv3')),
 70 |     ('xception', (299, 299), CHECK_ARRAY_SINGLE.format('xception'))
 71 | ]
 72 | 
 73 | LOAD_PARAMS_MULT = [
 74 |     ('squeezenet', (227, 227), CHECK_ARRAY_MULT.format('squeezenet')),
 75 |     ('vgg16', (224, 224), CHECK_ARRAY_MULT.format('vgg16')),
 76 |     ('vgg19', (224, 224), CHECK_ARRAY_MULT.format('vgg19')),
 77 |     ('resnet50', (224, 224), CHECK_ARRAY_MULT.format('resnet50')),
 78 |     ('inceptionv3', (299, 299), CHECK_ARRAY_MULT.format('inceptionv3')),
 79 |     ('xception', (299, 299), CHECK_ARRAY_MULT.format('xception'))
 80 | ]
 81 | 
 82 | 
 83 | # Remove path to the generated csv if it currently exists
 84 | if os.path.isdir('tests/image_featurizer_testing/csv_tests'):
 85 |     shutil.rmtree('tests/image_featurizer_testing/csv_tests')
 86 | 
 87 | 
 88 | def compare_featurizer_class(featurizer,
 89 |                              scaled_size,
 90 |                              featurized_data,
 91 |                              downsample_size,
 92 |                              image_columns,
 93 |                              automatic_downsample,
 94 |                              csv_path,
 95 |                              image_dict,
 96 |                              depth,
 97 |                              featurized=False,
 98 |                              check_csv='',
 99 |                              saved_data=True):
100 |     """Check the necessary assertions for a featurizer image."""
101 |     print(featurizer.features)
102 |     assert featurizer.scaled_size == scaled_size
103 |     assert np.allclose(featurizer.features.astype(float).values, featurized_data, atol=ATOL)
104 |     assert featurizer.downsample_size == downsample_size
105 |     assert featurizer.image_columns == image_columns
106 |     assert featurizer.autosample == automatic_downsample
107 |     assert featurizer.csv_path == csv_path
108 |     assert featurizer.image_dict == image_dict
109 |     assert featurizer.depth == depth
110 |     if featurized:
111 |         assert np.array_equal(pd.read_csv(check_csv).columns, featurizer.features.columns)
112 |         assert np.allclose(featurizer.features.astype(float), pd.read_csv(check_csv).astype(float),
113 |                            atol=ATOL)
114 | 
115 | 
116 | def compare_empty_input(featurizer):
117 |     assert np.array_equal(featurizer.data, np.zeros((1)))
118 |     assert featurizer.features.equals(pd.DataFrame())
119 |     assert featurizer.full_dataframe.equals(pd.DataFrame())
120 |     assert featurizer.csv_path == ''
121 |     assert featurizer.image_list == ''
122 |     assert featurizer.image_columns == ''
123 |     assert featurizer.image_path == ''
124 | 
125 | 
126 | @pytest.fixture()
127 | def featurizer():
128 |     featurizer = ImageFeaturizer()
129 |     return featurizer
130 | 
131 | 
132 | @pytest.fixture()
133 | def featurizer_with_data():
134 |     featurizer_with_data = ImageFeaturizer()
135 |     featurizer_with_data.featurize(save_features=True, **LOAD_DATA_ARGS_SINGLE)
136 |     return featurizer_with_data
137 | 
138 | 
139 | @pytest.fixture()
140 | def featurizer_autosample():
141 |     featurizer = ImageFeaturizer(autosample=True)
142 |     return featurizer
143 | 
144 | 
145 | def test_featurize_first(featurizer):
146 |     """Test that the featurizer raises an error if featurize is called before loading data"""
147 |     # Raise error if attempting to featurize before loading data
148 |     with pytest.raises(IOError):
149 |         featurizer.featurize_preloaded_data()
150 | 
151 | 
152 | def test_featurize_without_image_or_csv(featurizer):
153 |     with pytest.raises(ValueError, match='Must specify either image_path or csv_path as input'):
154 |         return featurizer.featurize(image_columns=['images_1', 'images_2'])
155 | 
156 | 
157 | def test_featurizer_build(featurizer):
158 |     """Test that the featurizer saves empty attributes correctly after initializing"""
159 |     compare_featurizer_class(featurizer, (0, 0), np.zeros((1)), 0, '', False, '', {}, 1)
160 | 
161 | 
162 | def test_load_data_single_column(featurizer):
163 |     """Test that the featurizer saves attributes correctly after loading data"""
164 |     featurizer.load_data(**LOAD_DATA_ARGS_SINGLE)
165 |     compare_featurizer_class(featurizer, (227, 227), np.zeros((1)), **COMPARE_ARGS)
166 | 
167 | 
168 | def test_load_data_multiple_columns_no_csv(featurizer):
169 |     """Test featurizer raises error if multiple columns passed with only a directory"""
170 |     with pytest.raises(ValueError):
171 |         featurizer.load_data(**LOAD_DATA_ARGS_MULT_ERROR)
172 | 
173 | 
174 | def test_create_csv_path():
175 |     test_csv = 'test.csv'
176 |     test_dir = 'tests/image_featurizer_testing/create_csv_test/'
177 |     try:
178 |         assert not os.path.isdir(test_dir)
179 |         _create_csv_path('{}{}'.format(test_dir, test_csv))
180 |         assert os.path.isdir(test_dir)
181 |     finally:
182 |         shutil.rmtree(test_dir)
183 |         assert not os.path.isdir(test_dir)
184 | 
185 | 
186 | def test_named_path_finder():
187 |     """Check that named_path_finder produces the correct output"""
188 |     check_named_path = 'csv_name_modelstring_depth-n_output-x_(now)'
189 |     with patch('time.strftime', return_value='now'):
190 |         test_named_path = _named_path_finder('csv_name', 'modelstring', 'n', 'x',
191 |                                              omit_model=False, omit_depth=False, omit_output=False,
192 |                                              omit_time=False)
193 |         assert check_named_path == test_named_path
194 | 
195 | 
196 | def test_named_path_finder_time_only_omitted():
197 |     """Check that named_path_finder produces the correct output (without time)"""
198 |     check_named_path = 'csv_name_modelstring_depth-n_output-x'
199 |     test_named_path = _named_path_finder('csv_name', 'modelstring', 'n', 'x',
200 |                                          omit_model=False, omit_depth=False, omit_output=False,
201 |                                          omit_time=True)
202 |     assert check_named_path == test_named_path
203 | 
204 | 
205 | def test_named_path_finder_all_omitted():
206 |     """Check that named_path_finder produces the correct output (without time)"""
207 |     check_named_path = 'csv_name'
208 |     test_named_path = _named_path_finder('csv_name', 'modelstring', 'n', 'x',
209 |                                          omit_model=True, omit_depth=True, omit_output=True,
210 |                                          omit_time=True)
211 |     assert check_named_path == test_named_path
212 | 
213 | 
214 | def test_save_csv(featurizer_with_data):
215 |     with patch('pandas.DataFrame.to_csv') as mock:
216 |         with patch('pic2vec.image_featurizer._create_csv_path'):
217 |             featurizer_with_data.save_csv()
218 |     assert 'images_featurized_squeezenet_depth-1_output-512_(' in mock.call_args[0][0]
219 | 
220 | 
221 | def test_save_csv_with_named_path(featurizer_with_data):
222 |     with patch('pandas.DataFrame.to_csv') as mock:
223 |         with patch('pic2vec.image_featurizer._create_csv_path'):
224 |             featurizer_with_data.save_csv(new_csv_path='foo/bar.csv')
225 |     assert mock.call_args[0][0] == 'foo/bar.csv'
226 | 
227 | 
228 | def test_save_csv_no_df(featurizer):
229 |     with pytest.raises(AttributeError, match='No dataframe has been featurized.'):
230 |         featurizer.save_csv()
231 | 
232 | 
233 | def test_load_data_multiple_columns(featurizer_autosample):
234 |     """Test featurizer loads data correctly with multiple image columns"""
235 |     featurizer_autosample.load_data(**LOAD_DATA_ARGS_MULT)
236 |     compare_featurizer_class(featurizer_autosample, (227, 227), np.zeros((1)), **COMPARE_ARGS_MULT)
237 | 
238 | 
239 | def test_featurize_save_csv_and_features(featurizer):
240 |     """Make sure the featurizer writes the name correctly to csv with robust naming config"""
241 | 
242 |     name, ext = os.path.splitext(CSV_NAME)
243 |     check_array_path = '{}_featurized_squeezenet_depth-1_output-512'.format(name)
244 |     featurizer.featurize(save_csv=True, save_features=True, omit_time=True,
245 |                          **LOAD_DATA_ARGS_MULT)
246 | 
247 |     full_check = '{}{}'.format(check_array_path, ext)
248 |     feature_check = '{}{}{}'.format(check_array_path, '_features_only', ext)
249 | 
250 |     try:
251 |         assert os.path.isfile(full_check)
252 |         assert os.path.isfile(feature_check)
253 | 
254 |     finally:
255 |         if os.path.isfile(feature_check):
256 |             os.remove(feature_check)
257 |         if os.path.isfile(full_check):
258 |             os.remove(full_check)
259 | 
260 | 
261 | def test_load_then_featurize_save_csv(featurizer):
262 |     """Make sure the featurizer writes the name correctly to csv with robust naming config"""
263 | 
264 |     name, ext = os.path.splitext(CSV_NAME)
265 |     check_array_path = '{}_featurized_squeezenet_depth-1_output-512'.format(name)
266 |     featurizer.load_data(**LOAD_DATA_ARGS_MULT)
267 |     featurizer.featurize_preloaded_data(save_csv=True, save_features=True, omit_time=True,
268 |                                         batch_processing=False)
269 | 
270 |     full_check = '{}{}'.format(check_array_path, ext)
271 |     feature_check = '{}{}{}'.format(check_array_path, '_features_only', ext)
272 | 
273 |     try:
274 |         assert os.path.isfile(full_check)
275 |         assert os.path.isfile(feature_check)
276 | 
277 |     finally:
278 |         if os.path.isfile(feature_check):
279 |             os.remove(feature_check)
280 |         if os.path.isfile(full_check):
281 |             os.remove(full_check)
282 | 
283 | 
284 | def test_clear_input(featurizer):
285 |     featurizer.featurize(save_features=True, omit_time=True, omit_model=True,
286 |                          omit_depth=True, omit_output=True, **LOAD_DATA_ARGS_SINGLE)
287 | 
288 |     featurizer.clear_input(confirm=True)
289 |     compare_empty_input(featurizer)
290 | 
291 | 
292 | def test_clear_input_no_confirm(featurizer):
293 |     with pytest.raises(ValueError):
294 |         featurizer.clear_input()
295 | 
296 | 
297 | def test_load_then_featurize_data_single_column(featurizer):
298 |     """Test featurizations and attributes for each model are correct with a single image column"""
299 |     featurizer.load_data(**LOAD_DATA_ARGS_SINGLE)
300 |     featurizer.featurize_preloaded_data(save_features=True)
301 |     check_array = np.load(CHECK_ARRAY_SINGLE.format('squeezenet'))
302 |     compare_featurizer_class(featurizer, (227, 227), check_array, featurized=True,
303 |                              check_csv=CHECK_CSV_SINGLE.format('squeezenet'), **COMPARE_ARGS)
304 | 
305 | 
306 | def test_load_then_featurize_data_multiple_columns(featurizer_autosample):
307 |     """
308 |     Test that the base featurizer works properly if you first load the data,
309 |     and then featurize it later
310 |     """
311 |     featurizer_autosample.load_data(**LOAD_DATA_ARGS_MULT)
312 |     featurizer_autosample.featurize_preloaded_data(save_features=True)
313 |     check_array = np.load(CHECK_ARRAY_MULT.format('squeezenet'))
314 | 
315 |     compare_featurizer_class(featurizer_autosample, (227, 227), check_array, featurized=True,
316 |                              check_csv=CHECK_CSV_MULT.format('squeezenet'), **COMPARE_ARGS_MULT)
317 | 
318 | 
319 | def test_featurize_data_no_save_features(featurizer):
320 |     """
321 |     Test that the base featurizer works properly if you first load the data,
322 |     and then featurize it later, without saving the features in the intermediate step.
323 |     """
324 |     featurizer.load_data(**LOAD_DATA_ARGS_SINGLE)
325 |     featurizer.featurize_preloaded_data()
326 |     check_array = np.array([])
327 | 
328 |     compare_featurizer_class(featurizer, (227, 227), check_array,
329 |                              check_csv=CHECK_CSV_SINGLE.format('squeezenet'), **COMPARE_ARGS)
330 | 
331 | 
332 | def test_featurize_data_single_column_batch_overflow(featurizer):
333 |     """
334 |     Test that the base featurizer works properly if the batch size overflows the remaining rows
335 |     with a single image column
336 |     """
337 |     featurizer.featurize(save_features=True, **LOAD_DATA_ARGS_SINGLE)
338 |     check_array = np.load(CHECK_ARRAY_SINGLE.format('squeezenet'))
339 | 
340 |     compare_featurizer_class(featurizer, (227, 227), check_array, featurized=True,
341 |                              check_csv=CHECK_CSV_SINGLE.format('squeezenet'), **COMPARE_ARGS)
342 | 
343 | 
344 | def test_featurize_data_multiple_columns_batch_overflow(featurizer_autosample):
345 |     """
346 |     Test that the base featurizer works properly if the batch size overflows the remaining rows
347 |     with multiple image columns
348 |     """
349 |     featurizer_autosample.featurize(save_features=True, **LOAD_DATA_ARGS_MULT)
350 |     check_array = np.load(CHECK_ARRAY_MULT.format('squeezenet'))
351 | 
352 |     compare_featurizer_class(featurizer_autosample, (227, 227), check_array, featurized=True,
353 |                              check_csv=CHECK_CSV_MULT.format('squeezenet'), **COMPARE_ARGS_MULT)
354 | 
355 | 
356 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS, ids=MODELS)
357 | def test_featurize_single_column_no_batch_processing(model, size, array_path):
358 |     """Test that all of the featurizations and attributes for each model are correct"""
359 |     feat = ImageFeaturizer(model=model)
360 |     feat.featurize(batch_size=0, save_features=True, **LOAD_DATA_ARGS_SINGLE)
361 | 
362 |     check_array = np.load(array_path)
363 | 
364 |     compare_featurizer_class(feat, size, check_array, featurized=True,
365 |                              check_csv=CHECK_CSV_SINGLE.format(model), **COMPARE_ARGS)
366 | 
367 | 
368 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS_MULT, ids=MODELS)
369 | def test_featurize_data_multiple_columns_no_batch_processing(model, size, array_path):
370 |     """Test featurizations and attributes for each model are correct with multiple image columns"""
371 |     feat = ImageFeaturizer(model=model, autosample=True)
372 |     feat.featurize(batch_processing=False, save_features=True, **LOAD_DATA_ARGS_MULT)
373 |     check_array = np.load(array_path)
374 | 
375 |     compare_featurizer_class(feat, size, check_array, featurized=True,
376 |                              check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT)
377 | 
378 | 
379 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS, ids=MODELS)
380 | def test_featurize_single_column_with_batch_processing(model, size, array_path):
381 |     """Test that all of the featurizations and attributes for each model are correct"""
382 |     feat = ImageFeaturizer(model=model)
383 |     feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_SINGLE)
384 | 
385 |     check_array = np.load(array_path)
386 | 
387 |     compare_featurizer_class(feat, size, check_array, featurized=True,
388 |                              check_csv=CHECK_CSV_SINGLE.format(model), **COMPARE_ARGS)
389 | 
390 | 
391 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS_MULT, ids=MODELS)
392 | def test_featurize_data_multiple_columns_with_batch_processing(model, size, array_path):
393 |     """Test featurizations and attributes for each model are correct with multiple image columns"""
394 |     feat = ImageFeaturizer(model=model, autosample=True)
395 |     feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_MULT)
396 |     check_array = np.load(array_path)
397 | 
398 |     compare_featurizer_class(feat, size, check_array, featurized=True,
399 |                              check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT)
400 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py34, py35, py36, py37, flake8
 3 | 
 4 | [travis]
 5 | python =
 6 |     3.7: py37
 7 |     3.6: py36
 8 |     3.5: py35
 9 |     3.4: py34
10 |     2.7: py27
11 | 
12 | [testenv:flake8]
13 | basepython=python
14 | deps=flake8
15 | commands=flake8 pic2vec
16 | 
17 | [testenv]
18 | setenv =
19 |     PYTHONPATH = {toxinidir}
20 | deps =
21 |     -r{toxinidir}/requirements_dev.txt
22 | commands =
23 |     pip install -U pip
24 |     py.test --basetemp={envtmpdir}
25 | 
26 | 
27 | ; If you want to make tox run the tests with the same versions, create a
28 | ; requirements.txt with the pinned versions and uncomment the following lines:
29 | ; deps =
30 | ;     -r{toxinidir}/requirements.txt
31 | 


--------------------------------------------------------------------------------
/travis_pypi_setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """Update encrypted deploy password in Travis config file."""
  4 | 
  5 | 
  6 | from __future__ import print_function
  7 | import base64
  8 | import json
  9 | import os
 10 | from getpass import getpass
 11 | import yaml
 12 | from cryptography.hazmat.primitives.serialization import load_pem_public_key
 13 | from cryptography.hazmat.backends import default_backend
 14 | from cryptography.hazmat.primitives.asymmetric.padding import PKCS1v15
 15 | 
 16 | 
 17 | try:
 18 |     from urllib import urlopen
 19 | except ImportError:
 20 |     from urllib.request import urlopen
 21 | 
 22 | 
 23 | GITHUB_REPO = 'datarobot/imagefeaturizer'
 24 | TRAVIS_CONFIG_FILE = os.path.join(
 25 |     os.path.dirname(os.path.abspath(__file__)), '.travis.yml')
 26 | 
 27 | 
 28 | def load_key(pubkey):
 29 |     """Load public RSA key.
 30 | 
 31 |     Work around keys with incorrect header/footer format.
 32 | 
 33 |     Read more about RSA encryption with cryptography:
 34 |     https://cryptography.io/latest/hazmat/primitives/asymmetric/rsa/
 35 |     """
 36 |     try:
 37 |         return load_pem_public_key(pubkey.encode(), default_backend())
 38 |     except ValueError:
 39 |         # workaround for https://github.com/travis-ci/travis-api/issues/196
 40 |         pubkey = pubkey.replace('BEGIN RSA', 'BEGIN').replace('END RSA', 'END')
 41 |         return load_pem_public_key(pubkey.encode(), default_backend())
 42 | 
 43 | 
 44 | def encrypt(pubkey, password):
 45 |     """Encrypt password using given RSA public key and encode it with base64.
 46 | 
 47 |     The encrypted password can only be decrypted by someone with the
 48 |     private key (in this case, only Travis).
 49 |     """
 50 |     key = load_key(pubkey)
 51 |     encrypted_password = key.encrypt(password, PKCS1v15())
 52 |     return base64.b64encode(encrypted_password)
 53 | 
 54 | 
 55 | def fetch_public_key(repo):
 56 |     """Download RSA public key Travis will use for this repo.
 57 | 
 58 |     Travis API docs: http://docs.travis-ci.com/api/#repository-keys
 59 |     """
 60 |     keyurl = 'https://api.travis-ci.org/repos/{0}/key'.format(repo)
 61 |     data = json.loads(urlopen(keyurl).read().decode())
 62 |     if 'key' not in data:
 63 |         errmsg = "Could not find public key for repo: {}.\n".format(repo)
 64 |         errmsg += "Have you already added your GitHub repo to Travis?"
 65 |         raise ValueError(errmsg)
 66 |     return data['key']
 67 | 
 68 | 
 69 | def prepend_line(filepath, line):
 70 |     """Rewrite a file adding a line to its beginning."""
 71 |     with open(filepath) as f:
 72 |         lines = f.readlines()
 73 | 
 74 |     lines.insert(0, line)
 75 | 
 76 |     with open(filepath, 'w') as f:
 77 |         f.writelines(lines)
 78 | 
 79 | 
 80 | def load_yaml_config(filepath):
 81 |     """Load yaml config file at the given path."""
 82 |     with open(filepath) as f:
 83 |         return yaml.load(f)
 84 | 
 85 | 
 86 | def save_yaml_config(filepath, config):
 87 |     """Save yaml config file at the given path."""
 88 |     with open(filepath, 'w') as f:
 89 |         yaml.dump(config, f, default_flow_style=False)
 90 | 
 91 | 
 92 | def update_travis_deploy_password(encrypted_password):
 93 |     """Put `encrypted_password` into the deploy section of .travis.yml."""
 94 |     config = load_yaml_config(TRAVIS_CONFIG_FILE)
 95 | 
 96 |     config['deploy']['password'] = dict(secure=encrypted_password)
 97 | 
 98 |     save_yaml_config(TRAVIS_CONFIG_FILE, config)
 99 | 
100 |     line = ('# This file was autogenerated and will overwrite'
101 |             ' each time you run travis_pypi_setup.py\n')
102 |     prepend_line(TRAVIS_CONFIG_FILE, line)
103 | 
104 | 
105 | def main(args):
106 |     """Add a PyPI password to .travis.yml so that Travis can deploy to PyPI.
107 | 
108 |     Fetch the Travis public key for the repo, and encrypt the PyPI password
109 |     with it before adding, so that only Travis can decrypt and use the PyPI
110 |     password.
111 |     """
112 |     public_key = fetch_public_key(args.repo)
113 |     password = args.password or getpass('PyPI password: ')
114 |     update_travis_deploy_password(encrypt(public_key, password.encode()))
115 |     print("Wrote encrypted password to .travis.yml -- you're ready to deploy")
116 | 
117 | 
118 | if '__main__' == __name__:
119 |     import argparse
120 |     parser = argparse.ArgumentParser(description=__doc__)
121 |     parser.add_argument('--repo', default=GITHUB_REPO,
122 |                         help='GitHub repo (default: %s)' % GITHUB_REPO)
123 |     parser.add_argument('--password',
124 |                         help='PyPI password (will prompt if not provided)')
125 | 
126 |     args = parser.parse_args()
127 |     main(args)
128 | 


--------------------------------------------------------------------------------
/utils/create_test_files.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a script that is used to update test files with current versions of the scientific
  3 | libraries. Whenever scientific libraries are upgraded, this can be run to check whether predictions
  4 | have changed for any of the models, and update them if need be.
  5 | """
  6 | import numpy as np
  7 | import pandas as pd
  8 | import logging
  9 | 
 10 | from tests.test_build_featurizer import INITIALIZE_MODEL_CASES, INITIALIZED_MODEL_TEST_ARRAY
 11 | from pic2vec.build_featurizer import _initialize_model
 12 | from pic2vec.enums import MODELS
 13 | from pic2vec import ImageFeaturizer
 14 | 
 15 | TEST_DATA_NAME = 'tests/image_featurizer_testing/csv_checking/testing_data.csv'
 16 | 
 17 | LOAD_DATA_ARGS_SINGLE = {
 18 |     'image_columns': 'images',
 19 |     'image_path': 'tests/feature_preprocessing_testing/test_images',
 20 |     'save_features': True
 21 | }
 22 | 
 23 | LOAD_DATA_ARGS_MULT = {
 24 |     'image_columns': ['images_1', 'images_2'],
 25 |     'image_path': 'tests/feature_preprocessing_testing/test_images',
 26 |     'csv_path': TEST_DATA_NAME,
 27 |     'save_features': True
 28 | }
 29 | 
 30 | # Arrays used to test model predictions on single and multiple image columns
 31 | CHECK_ARRAY_SINGLE = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}.npy'
 32 | CHECK_ARRAY_MULT = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}_mult.npy'
 33 | 
 34 | # CSVs used to test model predictions on single and multiple image columns
 35 | CHECK_CSV_SINGLE = 'tests/image_featurizer_testing/csv_checking/{}_check_csv.csv'
 36 | CHECK_CSV_MULT = 'tests/image_featurizer_testing/csv_checking/{}_check_csv_mult.csv'
 37 | 
 38 | # This creates a dictionary mapping from each model to the required image size for the test file
 39 | MODEL_TO_IMAGE_SIZE_DICT = {model_map[0]: model_map[2] for model_map in INITIALIZE_MODEL_CASES}
 40 | 
 41 | 
 42 | def update_test_files(model, multiple_image_columns=False):
 43 |     """
 44 |     This function takes a model string as the main argument, initializes the appropriate
 45 |     ImageFeaturizer model, and uses it to predict on the test array and CSV. It logs
 46 |     whether the predictions have changed, and then updates the arrays and CSVs accordingly.
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     model : str
 51 |         The name of one of pic2vec's supported models
 52 | 
 53 |     multiple_image_columns : bool
 54 |         A boolean that determines whether to update the csvs and arrays for single or multiple
 55 |         image columns
 56 | 
 57 |     Returns
 58 |     -------
 59 |     None
 60 |     """
 61 |     # Only autosample if updating the csvs and arrays for multiple image columns
 62 |     f = ImageFeaturizer(model=model, autosample=multiple_image_columns)
 63 | 
 64 |     # Load and featurize the data corresponding to either the single or multiple image columns
 65 |     load_data = LOAD_DATA_ARGS_MULT if multiple_image_columns else LOAD_DATA_ARGS_SINGLE
 66 |     f.featurize(**load_data)
 67 | 
 68 |     # Updating test CSVs
 69 |     features = f.features
 70 |     test_csv = CHECK_CSV_MULT if multiple_image_columns else CHECK_CSV_SINGLE
 71 | 
 72 |     # Have to convert to float32
 73 |     current_csv = pd.read_csv(test_csv.format(model))
 74 |     cols = current_csv.select_dtypes(include='float64').columns
 75 |     current_csv = current_csv.astype({col: 'float32' for col in cols})
 76 | 
 77 |     # Check prediction consistency and update files for test CSVs if necessary
 78 |     test_csv_identical = features.equals(current_csv)
 79 |     logging.INFO("Test csv identical for {}?".format(model))
 80 |     logging.INFO(test_csv_identical)
 81 | 
 82 |     if not test_csv_identical:
 83 |         features.to_csv(test_csv.format(model), index=False)
 84 | 
 85 |     # Updating test arrays
 86 |     features = f.features.astype(float).values
 87 |     test_array = CHECK_ARRAY_MULT if multiple_image_columns else CHECK_ARRAY_SINGLE
 88 | 
 89 |     # Check prediction consistency and update files for test arrays if necessary
 90 |     test_array_identical = np.array_equal(features, np.load(test_array.format(model)))
 91 | 
 92 |     logging.INFO("Test array identical for {}?".format(model))
 93 |     logging.INFO(test_array_identical)
 94 | 
 95 |     if not test_array_identical:
 96 |         np.save(test_array.format(model), features)
 97 | 
 98 | 
 99 | def update_zeros_testing(model):
100 |     """
101 |     This function is used to update arrays in a lower-level part of testing (build_featurizer) than
102 |     the final ImageFeaturizer. This test does not use decapitated models, but rather downloads the
103 |     full Keras pretrained model and checks its baseline predictions on a single blank
104 |     (i.e. all-zeros) image.
105 | 
106 |     This function initializes the model, and uses it to predict on a single blank image. It logs
107 |     whether the predictions have changed, and then updates the test arrays if necessary.
108 | 
109 |     Parameters
110 |     ----------
111 |     model : str
112 |         The name of one of pic2vec's supported models
113 | 
114 |     Returns
115 |     -------
116 |     None
117 |     """
118 | 
119 |     # Create the test image to be predicted on
120 |     m = _initialize_model(model)
121 | 
122 |     # Initialize a blank image of the appropriate size for the model
123 |     blank_image = np.zeros(MODEL_TO_IMAGE_SIZE_DICT[model])
124 | 
125 |     # Compare the generated predictions against the existing test array, and update if necessary
126 |     existing_test_array = np.load(INITIALIZED_MODEL_TEST_ARRAY.format(model))
127 |     generated_array = m.predict_on_batch(blank_image)
128 | 
129 |     blank_prediction_identical = np.array_equal(generated_array, existing_test_array)
130 | 
131 |     logging.INFO("Is a blank image prediction unchanged for {}?".format(model))
132 |     logging.INFO(blank_prediction_identical)
133 | 
134 |     if not blank_prediction_identical:
135 |         np.save(INITIALIZED_MODEL_TEST_ARRAY.format(model), generated_array)
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     for model in MODELS:
140 |         update_test_files(model)
141 |         update_test_files(model, multiple_image_columns=True)
142 |         update_zeros_testing(model)
143 | 


--------------------------------------------------------------------------------