├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── CONTRIBUTING.rst ├── DRCODEOWNERS ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── .gitignore ├── Makefile ├── authors.rst ├── conf.py ├── contributing.rst ├── guides │ └── installation.md ├── history.rst ├── index.rst ├── make.bat ├── markdowns │ └── parameters.md ├── readme.rst └── usage.rst ├── examples └── cats_vs_dogs │ ├── Cats_v_Dogs_Test_Example.ipynb │ └── cats_vs_dogs.csv ├── pic2vec ├── README.md ├── __init__.py ├── build_featurizer.py ├── data_featurizing.py ├── enums.py ├── feature_preprocessing.py ├── image_featurizer.py ├── saved_models │ └── squeezenet_weights_tf_dim_ordering_tf_kernels.h5 └── squeezenet.py ├── requirements.txt ├── requirements_dev.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── build_featurizer_testing │ ├── inceptionv3_test_prediction.npy │ ├── resnet50_test_prediction.npy │ ├── squeezenet_test_prediction.npy │ ├── vgg16_test_prediction.npy │ ├── vgg19_test_prediction.npy │ └── xception_test_prediction.npy ├── data_featurizing_testing │ ├── array_testing │ │ └── check_featurize.npy │ └── csv_testing │ │ ├── featurize_data_check_csv_full │ │ └── featurize_data_check_csv_images ├── feature_preprocessing_testing │ ├── csv_testing │ │ ├── create_csv_check │ │ ├── csv_image_path_check │ │ ├── directory_combined_image_path_test │ │ ├── directory_preprocess_system_test │ │ ├── error_directory_combined_test │ │ ├── error_row │ │ └── url_test │ ├── test_image_arrays │ │ ├── image_test.npy │ │ ├── image_test_grayscale.npy │ │ ├── image_test_isotropic.npy │ │ └── image_test_isotropic_grayscale.npy │ ├── test_images │ │ ├── arendt.bmp │ │ ├── borges.jpg │ │ ├── heidegger.gif │ │ └── sappho.png │ └── test_preprocessing_arrays │ │ ├── arendt.npy │ │ ├── arendt_grayscale.npy │ │ ├── borges.npy │ │ ├── sappho.npy │ │ └── sappho_grayscale.npy ├── image_featurizer_testing │ ├── array_tests │ │ ├── check_prediction_array_inceptionv3.npy │ │ ├── check_prediction_array_inceptionv3_mult.npy │ │ ├── check_prediction_array_resnet50.npy │ │ ├── check_prediction_array_resnet50_mult.npy │ │ ├── check_prediction_array_squeezenet.npy │ │ ├── check_prediction_array_squeezenet_mult.npy │ │ ├── check_prediction_array_vgg16.npy │ │ ├── check_prediction_array_vgg16_mult.npy │ │ ├── check_prediction_array_vgg19.npy │ │ ├── check_prediction_array_vgg19_mult.npy │ │ ├── check_prediction_array_xception.npy │ │ └── check_prediction_array_xception_mult.npy │ └── csv_checking │ │ ├── inceptionv3_check_csv.csv │ │ ├── inceptionv3_check_csv_mult.csv │ │ ├── resnet50_check_csv.csv │ │ ├── resnet50_check_csv_mult.csv │ │ ├── squeezenet_check_csv.csv │ │ ├── squeezenet_check_csv_mult.csv │ │ ├── testing_data.csv │ │ ├── vgg16_check_csv.csv │ │ ├── vgg16_check_csv_mult.csv │ │ ├── vgg19_check_csv.csv │ │ ├── vgg19_check_csv_mult.csv │ │ ├── xception_check_csv.csv │ │ └── xception_check_csv_mult.csv ├── test_build_featurizer.py ├── test_data_featurizing.py ├── test_feature_preprocessing.py └── test_image_featurizer.py ├── tox.ini ├── travis_pypi_setup.py └── utils └── create_test_files.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | .pytest_cache 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # DS_Store Mac Nightmare 105 | .DS_Store 106 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | # Enable 3.7 without globally enabling sudo and xenial dist for other build jobs 4 | matrix: 5 | include: 6 | - python: 2.7 7 | - python: 3.4 8 | - python: 3.5 9 | - python: 3.6 10 | - python: 3.7 11 | dist: xenial 12 | sudo: true 13 | 14 | # command to install dependencies 15 | install: 16 | - pip install -r requirements.txt 17 | - pip install coveralls pytest-xdist pytest-cov 18 | # Turn off email notifications 19 | notifications: 20 | email: false 21 | # command to run tests 22 | script: 23 | py.test --cov-report= --cov=pic2vec --boxed tests/ -vvvvs 24 | # Coveralls 25 | after_success: 26 | coveralls 27 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * Jett Oristaglio 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | Contributions are welcome, and they are greatly appreciated! Every 8 | little bit helps, and credit will always be given. 9 | 10 | You can contribute in many ways: 11 | 12 | Types of Contributions 13 | ---------------------- 14 | 15 | Report Bugs 16 | ~~~~~~~~~~~ 17 | 18 | Report bugs at https://github.com/datarobot/pic2vec/issues. 19 | 20 | If you are reporting a bug, please include: 21 | 22 | * Your operating system name and version. 23 | * Any details about your local setup that might be helpful in troubleshooting. 24 | * Detailed steps to reproduce the bug. 25 | 26 | Fix Bugs 27 | ~~~~~~~~ 28 | 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" 30 | and "help wanted" is open to whoever wants to implement it. 31 | 32 | Implement Features 33 | ~~~~~~~~~~~~~~~~~~ 34 | 35 | Look through the GitHub issues for features. Anything tagged with "enhancement" 36 | and "help wanted" is open to whoever wants to implement it. 37 | 38 | Write Documentation 39 | ~~~~~~~~~~~~~~~~~~~ 40 | 41 | pic2vec could always use more documentation, whether as part of the 42 | official pic2vec docs, in docstrings, or even on the web in blog posts, 43 | articles, and such. 44 | 45 | Submit Feedback 46 | ~~~~~~~~~~~~~~~ 47 | 48 | The best way to send feedback is to file an issue at https://github.com/datarobot/pic2vec/issues. 49 | 50 | If you are proposing a feature: 51 | 52 | * Explain in detail how it would work. 53 | * Keep the scope as narrow as possible, to make it easier to implement. 54 | * Remember that this is a volunteer-driven project, and that contributions 55 | are welcome :) 56 | 57 | Get Started! 58 | ------------ 59 | 60 | Ready to contribute? Here's how to set up `pic2vec` for local development. 61 | 62 | 1. Fork the `pic2vec` repo on GitHub. 63 | 2. Clone your fork locally:: 64 | 65 | $ git clone git@github.com:your_name_here/pic2vec.git 66 | 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 68 | 69 | $ mkvirtualenv pic2vec 70 | $ cd pic2vec/ 71 | $ python setup.py develop 72 | 73 | 4. Create a branch for local development:: 74 | 75 | $ git checkout -b name-of-your-bugfix-or-feature 76 | 77 | Now you can make your changes locally. 78 | 79 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: 80 | 81 | $ flake8 pic2vec tests 82 | $ python setup.py test or py.test 83 | $ tox 84 | 85 | To get flake8 and tox, just pip install them into your virtualenv. 86 | 87 | 6. Commit your changes and push your branch to GitHub:: 88 | 89 | $ git add . 90 | $ git commit -m "Your detailed description of your changes." 91 | $ git push origin name-of-your-bugfix-or-feature 92 | 93 | 7. Submit a pull request through the GitHub website. 94 | 95 | Pull Request Guidelines 96 | ----------------------- 97 | 98 | Before you submit a pull request, check that it meets these guidelines: 99 | 100 | 1. The pull request should include tests. 101 | 2. If the pull request adds functionality, the docs should be updated. Put 102 | your new functionality into a function with a docstring, and add the 103 | feature to the list in README.rst. 104 | 3. The pull request should work for Python 2.7, 3.3, 3.4, 3.5, 3.6, and for PyPy. Check 105 | https://travis-ci.org/datarobot/pic2vec/pull_requests 106 | and make sure that the tests pass for all supported Python versions. 107 | 108 | Tips 109 | ---- 110 | 111 | To run tests:: 112 | 113 | $ py.test tests 114 | -------------------------------------------------------------------------------- /DRCODEOWNERS: -------------------------------------------------------------------------------- 1 | # This file defines which domain owns what parts of this repository. 2 | # This repository is a central place for defining Jenkins Jobs and Jarvis Suites to orchestrate 3 | # build and test workflows for pull requests and pipelines. As such, it is expected 4 | # that this repository will have many owners and some shared areas (reusable macros, etc.) 5 | # 6 | # The syntax is the same as defined in 7 | # https://help.github.com/articles/about-codeowners/ 8 | # 9 | # Important Rules: 10 | # 1. The last matching pattern in this file takes precedence 11 | # 12 | # 2. Only domains (github team) own code, not individuals 13 | # see list at https://github.com/orgs/datarobot/teams 14 | # 15 | # Default owners for everything in the repo. 16 | # Unless a later match takes precedence, these groups will be requested for 17 | # review when someone opens a pull request. 18 | / @datarobot/core-modeling 19 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | History 3 | ======= 4 | 0.101.1 (2019-9-25) 5 | ------------------ 6 | * Limited Keras version to pre-2.3.0 to fix issues from Keras' breaking changes 7 | 8 | 0.101.0 (2019-3-25) 9 | ------------------ 10 | * Updated version of Trafaret to a non-beta version 11 | * Updated keras to 2.2.3 or greater 12 | * This library upgrade changes prediction consistency across past versions of pic2vec. 13 | ResNet50 is the model that has changed the most, due to changes in implementation. 14 | Other models have small floating point changes, but still pass np.testing.assert_allclose tests. 15 | 16 | 0.100.1 (2019-3-24) 17 | ------------------ 18 | * Updated version of Pillow to 5.4.1, in order to support Python 3.7 19 | * Updated the README 20 | 21 | 0.100.0 (2018-12-10) 22 | ------------------ 23 | * Added test coverage and increased error checking 24 | * Changed default csv name 25 | * Changed `image_column_headers` to `image_columns` everywhere 26 | * Updated examples 27 | * Updated version of scipy to 1.1 and numpy to 1.15 28 | 29 | 30 | 0.99.2 (2018-08-01) 31 | ------------------ 32 | * Updated the notebook example 33 | * Some code cleanup 34 | 35 | 0.99.1 (2018-06-20) 36 | ------------------ 37 | * Lots of code cleanup 38 | * Changed new_csv_name argument to new_csv_path everywhere for consistency 39 | * Removed '_full' from the saved csv_name for the full dataframe. Features-only csv still has 40 | '_features_only' in csv name. 41 | * Added '_featurized_' to saved csv names 42 | * Removed new_csv_path as argument to functions that do not actually require it 43 | 44 | 0.99.0 (2018-04-02) 45 | ------------------ 46 | * Added batch processing 47 | * Made pic2vec more programmatic (removed automatic csv-writing, etc.) 48 | * Bound keras to <2.1.5 to remove resnet problem 49 | 50 | 0.9.0 (2017-09-24) 51 | ------------------ 52 | * Fixed Keras backwards compatibility issues (include_top deprecated, require_flatten added) 53 | * Fixed ResNet50 update issues (removed a zero-padding layer, updated weights) 54 | 55 | 0.8.2 (2017-08-14) 56 | ------------------ 57 | * Updated trafaret requirement for PyPi package 58 | * Updated cats vs. dogs example 59 | 60 | 0.8.1 (2017-08-07) 61 | ------------------ 62 | * Fixed bugs with robust naming 63 | * Added error message for failed image conversion 64 | 65 | 0.8.0 (2017-08-02) 66 | ------------------ 67 | * Added robust naming options to the generated csv files 68 | 69 | 0.7.1 (2017-08-02) 70 | ------------------ 71 | * Fixed PIL truncated image bug 72 | 73 | 0.7.0 (2017-08-02) 74 | ------------------ 75 | * Fixed bug with CSV badly formed URLs 76 | * Fixed mistake with InceptionV3 preprocessing happening for every model 77 | 78 | 0.6.3 (2017-07-25) 79 | ------------------ 80 | * Added Travis and Coveralls for testing and coverage automation 81 | * Repo went public 82 | * Python 3.x compatibility 83 | 84 | 0.6.2 (2017-07-14) 85 | ------------------ 86 | * Fixed image format recognition. 87 | 88 | 0.6.1 (2017-07-12) 89 | ------------------ 90 | * Directory-only now natural sorted. 91 | 92 | 0.6.0 (2017-07-11) 93 | ------------------ 94 | * Added multi-column support 95 | * Added missing image column to csv 96 | 97 | 0.5.0 (2017-07-06) 98 | ------------------ 99 | * Renamed to pic2vec 100 | * Tests parametrized 101 | 102 | 0.4.3 (2017-07-03) 103 | ------------------ 104 | * Second round of code review- optimized code, better type checking with trafaret 105 | 106 | 0.4.2 (2017-06-30) 107 | ------------------ 108 | * Improved README test examples 109 | 110 | 0.4.1 (2017-06-30) 111 | ------------------ 112 | * Fixed documentation 113 | 114 | 0.4.0 (2017-06-29) 115 | ------------------ 116 | * Added ability to call multiple models, and packaged in SqueezeNet with weights. 117 | 118 | 0.3.0 (2017-06-26) 119 | ------------------ 120 | * Created installation instructions and readme files, ready for prototype distribution 121 | 122 | 0.2.9(2017-06-25) 123 | ------------------ 124 | * Fixed import problem that prevented generated csvs from saving 125 | 126 | 0.2.8(2017-06-25) 127 | ------------------ 128 | * Fixed variable name bugs 129 | 130 | 0.2.7(2017-06-25) 131 | ------------------ 132 | * Changed image_directory_path to the more manageable image_path 133 | * Made testing module and preprocessing module slightly more robust. 134 | 135 | 0.2.6(2017-06-23) 136 | ------------------ 137 | * Added features-only csv test, and got rid of the column headers in the file 138 | * Added Documentation to data featurization modeules 139 | 140 | 0.2.5(2017-06-23) 141 | ------------------ 142 | * 100% test coverage 143 | * Fixed a problem where a combined directory + csv was appending to the wrong 144 | rows when there was a mismatch between the directory and the csv. 145 | 146 | 0.2.4(2017-06-22) 147 | ------------------ 148 | * Fixed more bugs in build_featurizer 149 | 150 | 0.2.3(2017-06-22) 151 | ------------------ 152 | * Fixed build_featurizer troubles with building new csv paths in current directory 153 | 154 | 0.2.2(2017-06-22) 155 | ------------------ 156 | * Full requirements for keras imported 157 | 158 | 0.2.1 (2017-06-22) 159 | ------------------ 160 | * Bug fixes 161 | 162 | 0.2.0 (2017-06-22) 163 | ------------------ 164 | * Second release on PyPI. 165 | * Install keras with tensorflow backend specifically 166 | 167 | 0.1.0 (2017-06-14) 168 | ------------------ 169 | * First release on PyPI. 170 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | BSD License 3 | 4 | Copyright (c) 2017, Jett Oristaglio 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, this 14 | list of conditions and the following disclaimer in the documentation and/or 15 | other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from this 19 | software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 29 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 30 | OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.rst 2 | include CONTRIBUTING.rst 3 | include HISTORY.rst 4 | include LICENSE 5 | include README.md 6 | 7 | recursive-include tests * 8 | recursive-exclude * __pycache__ 9 | recursive-exclude * *.py[co] 10 | 11 | recursive-include pic2vec/saved_models * 12 | 13 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif *.md 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | 32 | clean-build: ## remove build artifacts 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | rm -fr .tox/ 47 | rm -f .coverage 48 | rm -fr htmlcov/ 49 | 50 | lint: ## check style with flake8 51 | flake8 pic2vec tests 52 | 53 | test: ## run tests quickly with the default Python 54 | py.test 55 | 56 | 57 | test-all: ## run tests on every Python version with tox 58 | tox 59 | 60 | coverage: ## check code coverage quickly with the default Python 61 | coverage run --source pic2vec -m pytest 62 | coverage report -m 63 | coverage html 64 | $(BROWSER) htmlcov/index.html 65 | 66 | docs: ## generate Sphinx HTML documentation, including API docs 67 | rm -f docs/pic2vec.rst 68 | rm -f docs/modules.rst 69 | sphinx-apidoc -o docs/ pic2vec 70 | $(MAKE) -C docs clean 71 | $(MAKE) -C docs html 72 | $(BROWSER) docs/_build/html/index.html 73 | 74 | servedocs: docs ## compile the docs watching for changes 75 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 76 | 77 | release: clean ## package and upload a release 78 | python setup.py sdist upload 79 | python setup.py bdist_wheel upload 80 | 81 | dist: clean ## builds source and wheel package 82 | python setup.py sdist 83 | python setup.py bdist_wheel 84 | ls -l dist 85 | 86 | install: clean ## install the package to the active Python's site-packages 87 | python setup.py install 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/datarobot/pic2vec.svg?branch=master)](https://travis-ci.org/datarobot/pic2vec) [![Coverage Status](https://coveralls.io/repos/github/datarobot/pic2vec/badge.svg?branch=master)](https://coveralls.io/github/datarobot/pic2vec?branch=master) 2 | 3 | Pic2Vec 4 | ================ 5 | 6 | Featurize images using a small, contained pre-trained deep learning network 7 | 8 | 9 | * Free software: BSD license 10 | 11 | 12 | Features 13 | -------- 14 | 15 | This is the prototype for image features engineering. Supports Python 2.7, 3.4, 3.5, 3.6, and 3.7 16 | 17 | ``pic2vec`` is a python package that performs automated feature extraction 18 | for image data. It supports feature engineering on new image data, and allows 19 | traditional machine learning algorithms (such as tree-based algorithms) to 20 | train on image data. 21 | 22 | 23 | ## Input Specification 24 | 25 | ### Data Format 26 | 27 | ``pic2vec`` works on image data represented as either: 28 | 1. A directory of image files. 29 | 2. As URL pointers contained in a CSV. 30 | 3. Or as a directory of images with a CSV containing pointers to the image files. 31 | 32 | If no CSV is provided with the directory, it automatically generates a CSV to store the features with the appropriate images. 33 | 34 | Each row of the CSV represents a different image, and image rows can also have columns containing other data about the images as well. Each image's featurized representation will be appended as a series of new columns at the end of the appropriate image row. 35 | 36 | 37 | ### Constraints Specification 38 | The goal of this project was to make the featurizer as easy to use and hard to break as possible. If working properly, it should be resistant to badly-formatted data, such as missing rows or columns in the csv, image mismatches between a CSV and an image directory, and invalid image formats. 39 | 40 | However, for the featurizer to function optimally, it prefers certain constraints: 41 | * The CSV should have no missing columns or rows, and there should be full overlap between images in the CSV and the image directory 42 | 43 | * If checking predictions on a separate test set (such as on Kaggle), the filesystem needs to sort filepaths consistently with the sorting of the test set labels. The order in the CSV (whether generated automatically or passed in) will be considered the canonical order for the feature vectors. 44 | 45 | The featurizer can only process .png, .jpeg, or .bmp image files. Any other images will be left out of the featurization by being represented by zero vectors in the image batch. 46 | 47 | ## Quick Start 48 | 49 | The following Python code shows a typical usage of `pic2vec`: 50 | 51 | ```python 52 | from pic2vec import ImageFeaturizer 53 | 54 | image_column_name = 'images' 55 | my_csv = 'path/to/data.csv' 56 | my_image_directory = 'path/to/image/directory/' 57 | 58 | my_featurizer = ImageFeaturizer(model='xception', depth=2, autosample=True) 59 | 60 | featurized_df = my_featurizer.featurize(image_column_name, csv_path=my_csv, 61 | image_path=my_image_directory) 62 | 63 | ``` 64 | 65 | ## Examples 66 | 67 | To get started, see the following example: 68 | 69 | 1. [Cats vs. Dogs](examples/Cats_v_Dogs_Test_Example.ipynb): Dataset from combined directory + CSV 70 | 71 | Examples coming soon: 72 | 2. Hot Dog, Not Hot Dog: Dataset from a CSV with URLs and no image directory 73 | 74 | 75 | ## Installation 76 | 77 | See the [Installation Guide](docs/guides/installation.md) for details. 78 | 79 | ### Installing Keras/Tensorflow 80 | If you run into trouble installing Keras or Tensorflow as a dependency, read the [Keras installation guide](https://keras.io/#installation) and [Tensorflow installation guide](https://www.tensorflow.org/install/) for details about installing Keras/Tensorflow on your machine. 81 | 82 | 83 | ## Using Featurizer Output With DataRobot 84 | ``pic2vec`` generates a flat CSV which is ready for supervised modeling, if the data has been labelled with a variable that 85 | can be used as a target. The images are transformed into a set of regular columns containing numeric data. 86 | Additionally, if unlabelled, it can be used for unsupervised learning (such as anomaly detection). 87 | 88 | 89 | ### Running tests 90 | 91 | To run the unit tests with ``pytest``, run 92 | 93 | ``` 94 | py.test tests 95 | ``` 96 | 97 | 98 | 99 | Credits 100 | --------- 101 | 102 | This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) project template. 103 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /pic2vec.rst 2 | /pic2vec.*.rst 3 | /modules.rst 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pic2vec.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pic2vec.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pic2vec" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pic2vec" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # image_featurizer documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Jul 9 22:26:36 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another 20 | # directory, add these directories to sys.path here. If the directory is 21 | # relative to the documentation root, use os.path.abspath to make it 22 | # absolute, like shown here. 23 | #sys.path.insert(0, os.path.abspath('.')) 24 | 25 | # Get the project root dir, which is the parent dir of this 26 | cwd = os.getcwd() 27 | project_root = os.path.dirname(cwd) 28 | 29 | # Insert the project root dir as the first element in the PYTHONPATH. 30 | # This lets us ensure that the source package is imported, and that its 31 | # version is used. 32 | sys.path.insert(0, project_root) 33 | 34 | import image_featurizer 35 | 36 | # -- General configuration --------------------------------------------- 37 | 38 | # If your documentation needs a minimal Sphinx version, state it here. 39 | #needs_sphinx = '1.0' 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 43 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix of source filenames. 49 | source_suffix = '.rst' 50 | 51 | # The encoding of source files. 52 | #source_encoding = 'utf-8-sig' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # General information about the project. 58 | project = u'Image Featurizer' 59 | copyright = u"2017, Jett Oristaglio" 60 | 61 | # The version info for the project you're documenting, acts as replacement 62 | # for |version| and |release|, also used in various other places throughout 63 | # the built documents. 64 | # 65 | # The short X.Y version. 66 | version = image_featurizer.__version__ 67 | # The full version, including alpha/beta/rc tags. 68 | release = image_featurizer.__version__ 69 | 70 | # The language for content autogenerated by Sphinx. Refer to documentation 71 | # for a list of supported languages. 72 | #language = None 73 | 74 | # There are two options for replacing |today|: either, you set today to 75 | # some non-false value, then it is used: 76 | #today = '' 77 | # Else, today_fmt is used as the format for a strftime call. 78 | #today_fmt = '%B %d, %Y' 79 | 80 | # List of patterns, relative to source directory, that match files and 81 | # directories to ignore when looking for source files. 82 | exclude_patterns = ['_build'] 83 | 84 | # The reST default role (used for this markup: `text`) to use for all 85 | # documents. 86 | #default_role = None 87 | 88 | # If true, '()' will be appended to :func: etc. cross-reference text. 89 | #add_function_parentheses = True 90 | 91 | # If true, the current module name will be prepended to all description 92 | # unit titles (such as .. function::). 93 | #add_module_names = True 94 | 95 | # If true, sectionauthor and moduleauthor directives will be shown in the 96 | # output. They are ignored by default. 97 | #show_authors = False 98 | 99 | # The name of the Pygments (syntax highlighting) style to use. 100 | pygments_style = 'sphinx' 101 | 102 | # A list of ignored prefixes for module index sorting. 103 | #modindex_common_prefix = [] 104 | 105 | # If true, keep warnings as "system message" paragraphs in the built 106 | # documents. 107 | #keep_warnings = False 108 | 109 | 110 | # -- Options for HTML output ------------------------------------------- 111 | 112 | # The theme to use for HTML and HTML Help pages. See the documentation for 113 | # a list of builtin themes. 114 | html_theme = 'default' 115 | 116 | # Theme options are theme-specific and customize the look and feel of a 117 | # theme further. For a list of options available for each theme, see the 118 | # documentation. 119 | #html_theme_options = {} 120 | 121 | # Add any paths that contain custom themes here, relative to this directory. 122 | #html_theme_path = [] 123 | 124 | # The name for this set of Sphinx documents. If None, it defaults to 125 | # " v documentation". 126 | #html_title = None 127 | 128 | # A shorter title for the navigation bar. Default is the same as 129 | # html_title. 130 | #html_short_title = None 131 | 132 | # The name of an image file (relative to this directory) to place at the 133 | # top of the sidebar. 134 | #html_logo = None 135 | 136 | # The name of an image file (within the static path) to use as favicon 137 | # of the docs. This file should be a Windows icon file (.ico) being 138 | # 16x16 or 32x32 pixels large. 139 | #html_favicon = None 140 | 141 | # Add any paths that contain custom static files (such as style sheets) 142 | # here, relative to this directory. They are copied after the builtin 143 | # static files, so a file named "default.css" will overwrite the builtin 144 | # "default.css". 145 | html_static_path = ['_static'] 146 | 147 | # If not '', a 'Last updated on:' timestamp is inserted at every page 148 | # bottom, using the given strftime format. 149 | #html_last_updated_fmt = '%b %d, %Y' 150 | 151 | # If true, SmartyPants will be used to convert quotes and dashes to 152 | # typographically correct entities. 153 | #html_use_smartypants = True 154 | 155 | # Custom sidebar templates, maps document names to template names. 156 | #html_sidebars = {} 157 | 158 | # Additional templates that should be rendered to pages, maps page names 159 | # to template names. 160 | #html_additional_pages = {} 161 | 162 | # If false, no module index is generated. 163 | #html_domain_indices = True 164 | 165 | # If false, no index is generated. 166 | #html_use_index = True 167 | 168 | # If true, the index is split into individual pages for each letter. 169 | #html_split_index = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | #html_show_sourcelink = True 173 | 174 | # If true, "Created using Sphinx" is shown in the HTML footer. 175 | # Default is True. 176 | #html_show_sphinx = True 177 | 178 | # If true, "(C) Copyright ..." is shown in the HTML footer. 179 | # Default is True. 180 | #html_show_copyright = True 181 | 182 | # If true, an OpenSearch description file will be output, and all pages 183 | # will contain a tag referring to it. The value of this option 184 | # must be the base URL from which the finished HTML is served. 185 | #html_use_opensearch = '' 186 | 187 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 188 | #html_file_suffix = None 189 | 190 | # Output file base name for HTML help builder. 191 | htmlhelp_basename = 'image_featurizerdoc' 192 | 193 | 194 | # -- Options for LaTeX output ------------------------------------------ 195 | 196 | latex_elements = { 197 | # The paper size ('letterpaper' or 'a4paper'). 198 | #'papersize': 'letterpaper', 199 | 200 | # The font size ('10pt', '11pt' or '12pt'). 201 | #'pointsize': '10pt', 202 | 203 | # Additional stuff for the LaTeX preamble. 204 | #'preamble': '', 205 | } 206 | 207 | # Grouping the document tree into LaTeX files. List of tuples 208 | # (source start file, target name, title, author, documentclass 209 | # [howto/manual]). 210 | latex_documents = [ 211 | ('index', 'image_featurizer.tex', 212 | u'Image Featurizer Documentation', 213 | u'Jett Oristaglio', 'manual'), 214 | ] 215 | 216 | # The name of an image file (relative to this directory) to place at 217 | # the top of the title page. 218 | #latex_logo = None 219 | 220 | # For "manual" documents, if this is true, then toplevel headings 221 | # are parts, not chapters. 222 | #latex_use_parts = False 223 | 224 | # If true, show page references after internal links. 225 | #latex_show_pagerefs = False 226 | 227 | # If true, show URL addresses after external links. 228 | #latex_show_urls = False 229 | 230 | # Documents to append as an appendix to all manuals. 231 | #latex_appendices = [] 232 | 233 | # If false, no module index is generated. 234 | #latex_domain_indices = True 235 | 236 | 237 | # -- Options for manual page output ------------------------------------ 238 | 239 | # One entry per manual page. List of tuples 240 | # (source start file, name, description, authors, manual section). 241 | man_pages = [ 242 | ('index', 'image_featurizer', 243 | u'Image Featurizer Documentation', 244 | [u'Jett Oristaglio'], 1) 245 | ] 246 | 247 | # If true, show URL addresses after external links. 248 | #man_show_urls = False 249 | 250 | 251 | # -- Options for Texinfo output ---------------------------------------- 252 | 253 | # Grouping the document tree into Texinfo files. List of tuples 254 | # (source start file, target name, title, author, 255 | # dir menu entry, description, category) 256 | texinfo_documents = [ 257 | ('index', 'image_featurizer', 258 | u'Image Featurizer Documentation', 259 | u'Jett Oristaglio', 260 | 'image_featurizer', 261 | 'One line description of project.', 262 | 'Miscellaneous'), 263 | ] 264 | 265 | # Documents to append as an appendix to all manuals. 266 | #texinfo_appendices = [] 267 | 268 | # If false, no module index is generated. 269 | #texinfo_domain_indices = True 270 | 271 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 272 | #texinfo_show_urls = 'footnote' 273 | 274 | # If true, do not generate a @detailmenu in the "Top" node's menu. 275 | #texinfo_no_detailmenu = False 276 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/guides/installation.md: -------------------------------------------------------------------------------- 1 | Installation: 2 | ============ 3 | 4 | 5 | 1: Setting Up The Virtual Environment 6 | --------------------------------- 7 | 8 | ### VirtualEnv 9 | To install virtualenv, follow this guide: [virtualenv installation guide](http://sourabhbajaj.com/mac-setup/Python/virtualenv.html) 10 | 11 | Once virtualenv is installed, create a new environment to run pic2vec: 12 | 13 | ```bash 14 | $ virtualenv pic2vec 15 | ``` 16 | Then activate the environment: 17 | 18 | ```bash 19 | $ source pic2vec/bin/activate 20 | ``` 21 | 22 | ### Conda 23 | To install Anaconda, follow this guide: [Anaconda installation guide](https://docs.continuum.io/anaconda/install) 24 | 25 | Once Anaconda is installed, create a new environment to run pic2vec: 26 | 27 | ```bash 28 | $ conda create --name pic2vec 29 | ``` 30 | 31 | When Conda asks for confirmation, type 'y' for 'yes'. 32 | 33 | To activate the environment on OS X or Linux: 34 | 35 | ```bash 36 | $ source activate pic2vec 37 | ``` 38 | 39 | To activate the environment on Windows: 40 | ```bash 41 | $ activate pic2vec 42 | ``` 43 | 44 | 45 | Once in a virtual environment, there are several ways to install the 46 | pic2vec package. 47 | 48 | 49 | 50 | 2: Installing The Pic2Vec Package 51 | ------------------- 52 | 53 | ### Pip Installation 54 | To install pic2vec through pip on OS X or Linux, run this command in your terminal: 55 | 56 | ```bash 57 | $ pip install pic2vec 58 | ``` 59 | To install through pip on Windows, run this command in terminal: 60 | 61 | ```bash 62 | $ python -m pip install pic2vec 63 | ``` 64 | 65 | This is the preferred method to install pic2vec, as it will always install the most recent stable release. 66 | 67 | If you don't have [pip](https://pip.pypa.io) installed, this [Python installation guide](http://docs.python-guide.org/en/latest/starting/installation/) can guide you through the process. 68 | 69 | 70 | ### Installing From setup.py 71 | The sources for pic2vec can be downloaded from the [Github repo](https://github.com/datarobot/pic2vec). 72 | 73 | You can either clone the public repository: 74 | 75 | ```bash 76 | $ git clone git@github.com:datarobot/pic2vec.git 77 | ``` 78 | Or download the [tarball](https://github.com/datarobot/pic2vec/tarball/master): 79 | 80 | ```bash 81 | $ curl -OL https://github.com/datarobot/pic2vec/tarball/master 82 | ``` 83 | 84 | Once you have a copy of the source, you can build a binary distribution and install it from inside the directory with: 85 | 86 | ```bash 87 | $ python setup.py bdist_wheel 88 | $ cd dist/ 89 | $ pip install pic2vec-{VERSION}-py2.py3-none-any.whl 90 | ``` 91 | Check the dist folder to see what you need to fill in for the {VERSION} section. 92 | 93 | 3: Troubleshooting 94 | --------------- 95 | 96 | 1. If you see an error similar to `TypeError: find_packages() got an unexpected 97 | keyword argument 'include'` then you need to upgrade your setuptools. 98 | 99 | ```bash 100 | pip install -U setuptools 101 | ``` 102 | 103 | 2. If you see an error similar to `No local packages or working download links 104 | found for tensorflow` then you need to upgrade your pip. 105 | 106 | ```bash 107 | pip install -U pip 108 | ``` 109 | 110 | 3. If you have problems with tests or strange runtime exceptions, make sure 111 | your Keras installation isn't configured for Theano use. Open `~/.keras/keras.json` 112 | and check that the `backend` parameter value is set to `tensorflow`. If it is `theano` - 113 | simply remove that file, and on next execution Keras will find your Tensorflow install and create the correct configuration file. 114 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Image Featurizer's documentation! 2 | ====================================== 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | readme 10 | installation 11 | usage 12 | modules 13 | contributing 14 | authors 15 | history 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pic2vec.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pic2vec.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/markdowns/parameters.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/docs/markdowns/parameters.md -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.md 2 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Usage 3 | ===== 4 | 5 | To use pic2vec in a project:: 6 | 7 | import pic2vec 8 | -------------------------------------------------------------------------------- /pic2vec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Top-level package for Pic2Vec.""" 4 | 5 | __author__ = """Jett Oristaglio""" 6 | __email__ = 'jettori88@gmail.com' 7 | __version__ = '0.1.0' 8 | 9 | from pic2vec.build_featurizer import (_decapitate_model, _find_pooling_constant, # NOQA 10 | _splice_layer, _downsample_model_features, 11 | _initialize_model, _check_downsampling_mismatch, 12 | build_featurizer) 13 | 14 | from pic2vec.feature_preprocessing import (_create_df_with_image_paths, # NOQA 15 | _find_directory_image_paths, 16 | _find_csv_image_paths, 17 | _find_combined_image_paths, 18 | _image_paths_finder, _convert_single_image, 19 | preprocess_data) 20 | 21 | from pic2vec.data_featurizing import featurize_data, create_features # NOQA 22 | 23 | from pic2vec.squeezenet import SqueezeNet # NOQA 24 | 25 | from pic2vec.image_featurizer import ImageFeaturizer # NOQA 26 | -------------------------------------------------------------------------------- /pic2vec/build_featurizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file deals with building the actual featurizer: 3 | 1. Initializing the InceptionV3 model 4 | 2. Decapitating it to the appropriate depth 5 | 3. Downsampling, if desired 6 | 7 | The integrated function is the build_featurizer function, which takes the depth, 8 | a flag signalling downsampling, and the number of features to downsample to. 9 | """ 10 | 11 | import logging 12 | import os 13 | import warnings 14 | 15 | import trafaret as t 16 | from keras.applications import InceptionV3, ResNet50, VGG16, VGG19, Xception 17 | from keras.engine.topology import InputLayer 18 | from keras.layers import GlobalAvgPool2D, Lambda, average 19 | from keras.models import Model 20 | import keras.backend as K 21 | 22 | from .squeezenet import SqueezeNet 23 | 24 | if K.backend() != 'tensorflow': 25 | logging.warn('Without a tensorflow backend, SqueezeNet and Xception will not be ' 26 | ' available. Please initialize ImageFeaturizer with either vgg16, vgg19, ' 27 | 'resnet50, or inceptionv3.') 28 | 29 | supported_model_types = { 30 | 'squeezenet': { 31 | 'label': 'SqueezeNet', 32 | 'class': SqueezeNet, 33 | 'kwargs': {'weights': None}, 34 | 'depth': {1: 5, 2: 12, 3: 19, 4: 26} 35 | }, 36 | 'inceptionv3': { 37 | 'label': 'InceptionV3', 38 | 'class': InceptionV3, 39 | 'kwargs': {}, 40 | 'depth': {1: 2, 2: 19, 3: 33, 4: 50} 41 | }, 42 | 'vgg16': { 43 | 'label': 'VGG16', 44 | 'class': VGG16, 45 | 'kwargs': {}, 46 | 'depth': {1: 1, 2: 2, 3: 4, 4: 8} 47 | }, 48 | 'vgg19': { 49 | 'label': 'VGG19', 50 | 'class': VGG19, 51 | 'kwargs': {}, 52 | 'depth': {1: 1, 2: 2, 3: 4, 4: 9} 53 | }, 54 | 'resnet50': { 55 | 'label': 'ResNet50', 56 | 'class': ResNet50, 57 | 'kwargs': {}, 58 | 'depth': {1: 2, 2: 5, 3: 13, 4: 23} 59 | }, 60 | 'xception': { 61 | 'label': 'Xception', 62 | 'class': Xception, 63 | 'kwargs': {}, 64 | 'depth': {1: 1, 2: 8, 3: 18, 4: 28} 65 | } 66 | } 67 | 68 | 69 | @t.guard(model_str=t.Enum(*supported_model_types.keys()), 70 | loaded_weights=t.String(allow_blank=True)) 71 | def _initialize_model(model_str, loaded_weights=''): 72 | """ 73 | Initialize the InceptionV3 model with the saved weights, or 74 | if the weight file can't be found, load them automatically through Keras. 75 | 76 | Parameters: 77 | ---------- 78 | model_str : str 79 | String deciding which model to use for the featurizer 80 | 81 | Returns: 82 | ------- 83 | model : keras.models.Model 84 | The initialized model loaded with pre-trained weights 85 | """ 86 | logging.info('Loading/downloading {model_label} model weights. ' 87 | 'This may take a minute first time.' 88 | .format(model_label=supported_model_types[model_str]['label'])) 89 | 90 | if loaded_weights != '': 91 | model = supported_model_types[model_str]['class'](weights=None) 92 | try: 93 | model.load_weights(loaded_weights) 94 | except IOError as err: 95 | logging.error('Problem loading the custom weights. If not an advanced user, please ' 96 | 'leave loaded_weights unconfigured.') 97 | raise err 98 | else: 99 | model = supported_model_types[model_str]['class'](**supported_model_types 100 | [model_str]['kwargs']) 101 | 102 | if model_str == 'squeezenet': 103 | # Special case for squeezenet - we already have weights for it 104 | this_dir, this_filename = os.path.split(__file__) 105 | model_path = os.path.join(this_dir, 106 | 'saved_models', 107 | 'squeezenet_weights_tf_dim_ordering_tf_kernels.h5') 108 | if not os.path.isfile(model_path): 109 | raise ValueError('Could not find the weights. Download another model' 110 | ' or replace the SqueezeNet weights in the model folder.') 111 | model.load_weights(model_path) 112 | 113 | logging.info('Model successfully initialized.') 114 | return model 115 | 116 | 117 | @t.guard(model=t.Type(Model), depth=t.Int(gte=1)) 118 | def _decapitate_model(model, depth): 119 | """ 120 | Cut off end layers of a model equal to the depth of the desired outputs, 121 | and then remove the links connecting the new outer layer to the old ones. 122 | 123 | Parameters: 124 | ---------- 125 | model: keras.models.Model 126 | The model being decapitated. Note: original model is not changed, method returns new model. 127 | depth: int 128 | The number of layers to pop off the top of the network 129 | 130 | Returns: 131 | ------- 132 | model: keras.models.Model 133 | Decapitated model. 134 | """ 135 | # -------------- # 136 | # ERROR CHECKING # 137 | 138 | # Make sure the depth isn't greater than the number of layers (minus input) 139 | if depth >= len(model.layers) - 1: 140 | raise ValueError('Can\'t go deeper than the number of layers in the model. Tried to pop ' 141 | '{} layers, but model only has {}'.format(depth, len(model.layers) - 1)) 142 | 143 | if not isinstance(model.layers[0], InputLayer): 144 | warnings.warn('First layer of the model is not an input layer. Beware of depth issues.') 145 | # -------------------------------------------------------- # 146 | 147 | # Get the intermediate output 148 | new_model_output = model.layers[(depth + 1) * -1].output 149 | new_model = Model(inputs=model.input, outputs=new_model_output) 150 | new_model.layers[-1].outbound_nodes = [] 151 | return new_model 152 | 153 | 154 | @t.guard(features=t.Any(), num_pooled_features=t.Int(gte=1)) 155 | def _find_pooling_constant(features, num_pooled_features): 156 | """ 157 | Given a tensor and an integer divisor for the desired downsampled features, 158 | this will downsample the tensor to the desired number of features 159 | 160 | Parameters: 161 | ---------- 162 | features : Tensor 163 | the layer output being downsampled 164 | num_pooled_features : int 165 | the desired number of features to downsample to 166 | 167 | Returns: 168 | ------- 169 | int 170 | the integer pooling constant required to correctly splice the layer output for downsampling 171 | """ 172 | # Initializing the outputs 173 | num_features = features.shape[-1].__int__() 174 | 175 | # Find the pooling constant 176 | pooling_constant = num_features / float(num_pooled_features) 177 | 178 | # -------------- # 179 | # ERROR CHECKING # 180 | 181 | if pooling_constant < 1: 182 | raise ValueError( 183 | 'You can\'t downsample to a number bigger than the original feature space.') 184 | 185 | # Check that the number of downsampled features is an integer divisor of the original output 186 | if not pooling_constant.is_integer(): 187 | # Store recommended downsample 188 | recommended_downsample = num_features / int(pooling_constant) 189 | raise ValueError('Trying to downsample features to non-integer divisor: ' 190 | 'from {} to {}.\n\n Did you mean to downsample to' 191 | ' {}? Regardless, please choose an integer divisor.' 192 | .format(num_features, num_pooled_features, recommended_downsample)) 193 | # -------------------------------------------------------- # 194 | # Cast the pooling constant back to an int from a float if it passes the tests 195 | return int(pooling_constant) 196 | 197 | 198 | @t.guard(tensor=t.Any(), number_splices=t.Int(gte=1)) 199 | def _splice_layer(tensor, number_splices): 200 | """ 201 | Splice a layer into a number of even slices through skipping. This downsamples the layer, 202 | and allows for operations to be performed over neighbors. 203 | 204 | Parameters: 205 | ---------- 206 | layer: Tensor 207 | the layer output being spliced 208 | number_splices: int 209 | the number of new layers the original layer is being spliced into. 210 | NOTE: must be integer divisor of layer 211 | 212 | Returns: 213 | ------- 214 | list_of_spliced_layers : list of Tensor 215 | a list of the spliced tensor sections of the original layer, with neighboring nodes 216 | occupying the same indices across splices 217 | """ 218 | # -------------- # 219 | # ERROR CHECKING # 220 | # Need to check that the number of splices is an integer divisor of the feature 221 | # size of the layer 222 | num_features = tensor.shape[-1].__int__() 223 | if num_features % number_splices: 224 | raise ValueError('Number of splices needs to be an integer divisor of' 225 | ' the number of features. Tried to split {} features into' 226 | ' {} equal parts.'.format(num_features, number_splices)) 227 | # ------------------------------------------ # 228 | # Split the tensor into equal parts by skipping nodes equal to the number 229 | # of splices. This allows for merge operations over neighbor features 230 | 231 | return [Lambda(lambda features: features[:, i::number_splices])(tensor) for i in 232 | range(number_splices)] 233 | 234 | 235 | @t.guard(features=t.Any(), num_pooled_features=t.Int(gte=1)) 236 | def _downsample_model_features(features, num_pooled_features): 237 | """ 238 | Take in a layer of a model, and downsample the layer to a specified size. 239 | 240 | Parameters: 241 | ---------- 242 | features : Tensor 243 | the final layer output being downsampled 244 | num_pooled_features : int 245 | the desired number of features to downsample to 246 | 247 | Returns: 248 | ------- 249 | downsampled_features : Tensor 250 | a tensor containing the downsampled features with size = (?, num_pooled_features) 251 | """ 252 | # Find the pooling constant needed 253 | pooling_constant = _find_pooling_constant(features, num_pooled_features) 254 | # Splice the top layer into n layers, where n = pooling constant. 255 | list_of_spliced_layers = _splice_layer(features, pooling_constant) 256 | # Average the spliced layers to downsample 257 | downsampled_features = average(list_of_spliced_layers) 258 | return downsampled_features 259 | 260 | 261 | def _check_downsampling_mismatch(downsample, num_pooled_features, output_layer_size): 262 | """ 263 | If downsample is flagged True, but no downsampling size is given, then automatically 264 | downsample model. If downsample flagged false, but there is a size given, set downsample 265 | to true. 266 | 267 | Parameters: 268 | ---------- 269 | downsample : bool 270 | Boolean flagging whether model is being downsampled 271 | num_pooled_features : int 272 | the desired number of features to downsample to 273 | output_layer_size : int 274 | number of nodes in the output layer being downsampled 275 | Returns: 276 | ------- 277 | downsample : boolean 278 | Updated boolean flagging whether model is being downsampled 279 | num_pooled_features : int 280 | Updated number of features model output is being downsample to 281 | """ 282 | # If num_pooled_features left uninitialized, and they want to downsample, 283 | # perform automatic downsampling 284 | if num_pooled_features == 0 and downsample: 285 | if output_layer_size % 2 == 0: 286 | num_pooled_features = output_layer_size // 2 287 | logging.warning('Automatic downsampling to {}. If you would like to set custom ' 288 | 'downsampling, pass in an integer divisor of {} to ' 289 | 'num_pooled_features.'.format(num_pooled_features, output_layer_size)) 290 | else: 291 | raise ValueError('Sorry, no automatic downsampling available for this model.') 292 | 293 | # If they have initialized num_pooled_features, but not turned on 294 | # downsampling, downsample to what they entered 295 | elif num_pooled_features != 0 and not downsample: 296 | logging.info('Downsampling to {}.'.format(num_pooled_features)) 297 | downsample = True 298 | 299 | return downsample, num_pooled_features 300 | 301 | 302 | @t.guard(depth_of_featurizer=t.Int(gte=1, lte=4), 303 | downsample=t.Bool, 304 | num_pooled_features=t.Int(gte=0), 305 | model_str=t.Enum(*supported_model_types.keys()), 306 | loaded_model=t.Type(Model) | t.Null) 307 | def build_featurizer(depth_of_featurizer, downsample, num_pooled_features=0, 308 | model_str='squeezenet', loaded_model=None): 309 | """ 310 | Create the full featurizer. 311 | 312 | Initialize the model, decapitate it to the appropriate depth, and check if downsampling 313 | top-layer featurization. If so, downsample to the desired feature space 314 | 315 | Parameters: 316 | ---------- 317 | depth_of_featurizer : int 318 | How deep to cut the network. Can be 1, 2, 3, or 4. 319 | downsample : bool 320 | Boolean flagging whether to perform downsampling 321 | num_pooled_features : int 322 | If we downsample, integer determining how small to downsample. 323 | NOTE: Must be integer divisor of original number of features 324 | or 0 if we don't want to specify exact number 325 | model_str : str 326 | String deciding which model to use for the featurizer 327 | loaded_model : keras.models.Model, optional 328 | If specified - use the model for featurizing, istead of creating new one. 329 | 330 | Returns: 331 | ------- 332 | model: keras.models.Model 333 | The decapitated, potentially downsampled, pre-trained image featurizer. 334 | With no downsampling, the output features are equal to the top densely- 335 | connected layer of the network, which depends on the depth of the model. 336 | With downsampling, the output is equal to a downsampled average of 337 | multiple splices of the last densely connected layer. 338 | """ 339 | # BUILDING INITIAL MODEL # 340 | if loaded_model is not None: 341 | model = loaded_model 342 | else: 343 | model = _initialize_model(model_str=model_str) 344 | 345 | # DECAPITATING MODEL # 346 | # Find the right depth from the dictionary and decapitate the model 347 | model = _decapitate_model(model, supported_model_types[model_str]['depth'][depth_of_featurizer]) 348 | model_output = model.layers[-1].output 349 | # Add pooling layer to the top of the now-decapitated model as the featurizer, 350 | # if it needs to be downsampled 351 | if len(model.layers[-1].output_shape) > 2: 352 | model_output = GlobalAvgPool2D(name='featurizer')(model_output) 353 | 354 | # Save the model output 355 | num_output_features = model_output.shape[-1].__int__() 356 | logging.info("Model decapitated.") 357 | 358 | # DOWNSAMPLING FEATURES # 359 | # Checking that the user's downsampling flag matches the initialization of the downsampling 360 | (downsample, num_pooled_features) = _check_downsampling_mismatch(downsample, 361 | num_pooled_features, 362 | num_output_features) 363 | 364 | # If we are downsampling the features, we add a pooling layer to the outputs 365 | # to bring it to the correct size. 366 | if downsample: 367 | model_output = _downsample_model_features(model_output, num_pooled_features) 368 | logging.info("Model downsampled.") 369 | 370 | # Finally save the model 371 | model = Model(inputs=model.input, outputs=model_output) 372 | logging.info("Full featurizer is built.") 373 | if downsample: 374 | logging.info("Final layer feature space downsampled to {}".format(num_pooled_features)) 375 | else: 376 | logging.info("No downsampling. Final layer feature space has size {}" 377 | .format(num_output_features)) 378 | 379 | return model 380 | -------------------------------------------------------------------------------- /pic2vec/data_featurizing.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file deals with featurizing the data once the featurizer has been built and the data has been 3 | loaded and vectorized. 4 | 5 | It allows users to featurize the data with model.predict. It also lets the featurizer write the 6 | featurized data to the csv containing the images, appending the features to additional columns 7 | in-line with each image row. Also adds "image_missing" columns automatically for each image_column 8 | which contains binary values of whether the image in that row is missing. 9 | """ 10 | 11 | import logging 12 | 13 | import trafaret as t 14 | import numpy as np 15 | import pandas as pd 16 | 17 | from keras.models import Model 18 | 19 | 20 | @t.guard(model=t.Type(Model), array=t.Type(np.ndarray)) 21 | def featurize_data(model, array): 22 | """ 23 | Given a model and an array, perform error checking and return the prediction 24 | of the full feature array. 25 | 26 | Parameters: 27 | ---------- 28 | model : keras.models.Model 29 | The featurizer model performing predictions 30 | 31 | array : np.ndarray 32 | The vectorized array of images being converted into features 33 | 34 | Returns: 35 | -------- 36 | full_feature_array : np.ndarray 37 | A numpy array containing the featurized images 38 | 39 | """ 40 | # Raise error if the array has the wrong shape 41 | if len(array.shape) != 4: 42 | raise ValueError('Image array must be a 4D tensor, with dimensions: ' 43 | '[batch, height, width, channel]') 44 | 45 | # Perform predictions 46 | logging.info('Creating feature array.') 47 | 48 | # NOTE: No clue why this is here, it's to make the models note break due to 49 | # Keras update: https://github.com/keras-team/keras/issues/9394 50 | model.compile('sgd', 'mse') 51 | full_feature_array = model.predict(array, verbose=1) 52 | 53 | # Return features 54 | logging.info('Feature array created successfully.') 55 | return full_feature_array 56 | 57 | 58 | def _create_features_df_helper(data_array, full_feature_array, image_column_header): 59 | # Log how many photos are missing or blank: 60 | zeros_index = [np.count_nonzero(array_slice) == 0 for array_slice in data_array[:]] 61 | logging.info('Number of missing photos: {}'.format(len(zeros_index))) 62 | 63 | # Create column headers for features, and the features dataframe 64 | array_column_headers = ['{}_feat_{}'.format(image_column_header, feature) for feature in 65 | range(full_feature_array.shape[1])] 66 | 67 | df_features = pd.DataFrame(data=full_feature_array, columns=array_column_headers) 68 | 69 | # Create the missing column 70 | missing_column_header = ['{}_missing'.format(image_column_header)] 71 | df_missing = pd.DataFrame(data=zeros_index, columns=missing_column_header) 72 | 73 | # Create the full combined csv+features dataframe 74 | df_features_full = pd.concat([df_missing, df_features], axis=1) 75 | 76 | return df_features_full 77 | 78 | 79 | def create_features(data_array, new_feature_array, image_column_header): 80 | """ 81 | Create features dataframe, and append the features to the appropriate 82 | rows of the original dataframe. 83 | 84 | Parameters: 85 | ----------- 86 | data_array : np.ndarray 87 | The images contained in a single 2D array. Used to track missing images. 88 | 89 | new_feature_array : np.ndarray 90 | The array of generated features 91 | 92 | image_column_header : str 93 | String containing the name of the image column 94 | 95 | Returns: 96 | -------- 97 | df_features : pandas.DataFrame 98 | The full dataframe containing the features appended to the dataframe of the images 99 | """ 100 | 101 | # -------------- # 102 | # ERROR CHECKING # 103 | # Raise error if the data array has the wrong shape 104 | if len(data_array.shape) != 4: 105 | raise ValueError('Data array must be 4D array, with shape: [batch, height, width, channel].' 106 | ' Gave feature array of shape: {}'.format(data_array.shape)) 107 | 108 | # Raise error if the feature array has the wrong shape 109 | if len(new_feature_array.shape) != 2: 110 | raise ValueError('Feature array must be 2D array, with shape: [batch, num_features]. ' 111 | 'Gave feature array of shape: {}'.format(new_feature_array.shape)) 112 | # --------------------------------------- # 113 | 114 | logging.info('Combining image features with original dataframe.') 115 | 116 | df_features = _create_features_df_helper(data_array, new_feature_array, 117 | image_column_header) 118 | 119 | # Return the full combined dataframe 120 | return df_features 121 | -------------------------------------------------------------------------------- /pic2vec/enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains a list of enums that are used across the entire pic2vec 3 | package. 4 | """ 5 | # List of models supported in pic2vec 6 | MODELS = ['squeezenet', 'vgg16', 'vgg19', 'resnet50', 'inceptionv3', 'xception'] 7 | 8 | # Tolerance for prediction error 9 | ATOL = 0.00001 10 | -------------------------------------------------------------------------------- /pic2vec/feature_preprocessing.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file deals with preprocessing the images for the featurizer. 3 | 4 | It gives the user 3 options: 5 | 1. Upload a CSV with URL pointers. 6 | 2. Upload an image directory with no CSV. The featurizer will generate a CSV automatically. 7 | 3. Upload a CSV with an image directory. The CSV will contain pointers to image in the directory. 8 | 9 | The integrated function is the preprocess_data function, which takes in the input and 10 | generates a 4D tensor containing the vectorized representations of the image to be featurized. 11 | """ 12 | 13 | from PIL import Image, ImageFile 14 | import logging 15 | import os 16 | try: 17 | from urllib import urlretrieve 18 | except ImportError: 19 | from urllib.request import urlretrieve 20 | import re 21 | 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True 23 | Image.DEBUG = 0 24 | 25 | import numpy as np # noqa: E402 26 | import pandas as pd # noqa: E402 27 | import trafaret as t # noqa: E402 28 | import keras.applications as ka # noqa: E402 29 | from keras.preprocessing.image import load_img, img_to_array # noqa: E402 30 | 31 | ############################################## 32 | # FUNCTIONS FOR BUILDING LIST OF IMAGE PATHS # 33 | ############################################## 34 | 35 | # Dictionary for preprocessing algorithms 36 | # Unnecessary 'size' entry, but leaving in case of future use... 37 | preprocessing_dict = { 38 | 'squeezenet': { 39 | 'preprocess': ka.imagenet_utils.preprocess_input, 40 | 'size': (227, 227) 41 | }, 42 | 'vgg16': { 43 | 'preprocess': ka.vgg16.preprocess_input, 44 | 'size': (224, 224) 45 | }, 46 | 'vgg19': { 47 | 'preprocess': ka.vgg19.preprocess_input, 48 | 'size': (224, 224) 49 | }, 50 | 'resnet50': { 51 | 'preprocess': ka.resnet50.preprocess_input, 52 | 'size': (224, 224) 53 | }, 54 | 'inceptionv3': { 55 | 'preprocess': ka.inception_v3.preprocess_input, 56 | 'size': (299, 299) 57 | }, 58 | 59 | 'xception': { 60 | 'preprocess': ka.xception.preprocess_input, 61 | 'size': (299, 299) 62 | }, 63 | } 64 | 65 | 66 | def _create_df_with_image_paths(list_of_images, image_column_header): 67 | """ 68 | Take in a list of image names, and return a DataFrame where each 69 | image name is a new row. 70 | 71 | Parameters: 72 | ---------- 73 | list_of_images: list of str 74 | Full paths to images in a directory 75 | 76 | image_column_header : str 77 | The name of the header for the column of image paths 78 | 79 | Returns: 80 | ------- 81 | df : pandas.DataFrame 82 | The dataframe containing the full list of image names. 83 | 84 | """ 85 | df = pd.DataFrame(list_of_images, columns=[image_column_header]) 86 | return df 87 | 88 | 89 | def natural_key(string_): 90 | """See http://www.codinghorror.com/blog/archives/001018.html""" 91 | return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] 92 | 93 | 94 | def _find_directory_image_paths(image_directory): 95 | """ 96 | Take in a directory and parse which files in it are valid images for 97 | loading into the featurizer. 98 | 99 | List ordering explanation for directory-only featurization: 100 | The list will be sorted in order to create a deterministic file order for 101 | the generated csv, regardless of filesystem ordering. The CSV will then be 102 | used as the canonical order for all data preprocessing, featurizing, and 103 | eventually writing the features back into the csv. 104 | 105 | Parameters: 106 | ---------- 107 | image_directory : str 108 | The filepath to the directory containing the images 109 | 110 | Returns: 111 | ------- 112 | list_of_images : list of str 113 | A sorted list of full paths to each valid image contained in the directory 114 | 115 | """ 116 | image_list = os.listdir(image_directory) 117 | 118 | valid = ['JPEG', 'BMP', 'PNG'] 119 | list_of_images = [] 120 | 121 | for fichier in image_list: 122 | try: 123 | if Image.open(image_directory + fichier).format in valid: 124 | list_of_images.append(fichier) 125 | Image.close() 126 | except Exception: 127 | pass 128 | 129 | return sorted(list_of_images, key=natural_key) 130 | 131 | 132 | def _find_csv_image_paths(csv_path, image_column_header): 133 | """ 134 | Find the image paths in a csv without an image directory. 135 | 136 | List ordering explanation for csv-included featurization: 137 | The list does not need to be sorted, as it is already in a set order in the csv. 138 | The csv will be used as the canonical order for all data preprocessing, 139 | featurizing, and eventually writing the features back into the csv. 140 | 141 | 142 | Parameters: 143 | ---------- 144 | csv_path : str 145 | Full path to the csv 146 | 147 | image_column_header : str 148 | Name of the column containing the image paths 149 | 150 | Returns: 151 | ------- 152 | list_of_images: list of str 153 | Full paths to each valid image contained in the csv 154 | 155 | """ 156 | # Create the dataframe from the csv 157 | df = pd.read_csv(csv_path, error_bad_lines=False) 158 | 159 | # -------------- # 160 | # ERROR CHECKING # 161 | # Raise an error if the image column header isn't in the dataframe 162 | if image_column_header not in df.columns: 163 | raise ValueError('image_column_header error: {} does not exist as a ' 164 | 'column in the csv file.'.format(image_column_header)) 165 | # -------------- # 166 | 167 | # Create the list of image paths from the column in the dataframe 168 | list_of_images = df[image_column_header].tolist() 169 | 170 | return list_of_images, df 171 | 172 | 173 | def _find_combined_image_paths(image_path, csv_path, image_column_header): 174 | """ 175 | Find the image paths of a csv combined with a directory: take only the overlap 176 | to avoid errors. 177 | 178 | List ordering explanation for csv-included featurization: 179 | See docstring for _find_csv_image_paths() method. 180 | 181 | Parameters: 182 | ---------- 183 | image_path : str 184 | Full path to the provided image directory 185 | 186 | csv_path : str 187 | Full path to the provided csv 188 | 189 | image_column_header : str 190 | Name of the column in the csv containing image paths 191 | 192 | Returns: 193 | ------- 194 | list_of_images: list of str 195 | Full paths to each valid image contained in both the csv and directory 196 | 197 | """ 198 | # Find the list of image paths in the csv 199 | csv_list, df = _find_csv_image_paths(csv_path, image_column_header) 200 | 201 | # Find the list of image paths in the directory 202 | directory_list = _find_directory_image_paths(image_path) 203 | 204 | list_of_images = [] 205 | 206 | # Create the list of image paths by finding the overlap between the two, 207 | # keeping the order in the csv 208 | for path in csv_list: 209 | if path in directory_list: 210 | list_of_images.append(path) 211 | 212 | # If the image is in the csv but not the directory, input an empty string 213 | # as a placeholder. This image will eventually get vectorized to zeros. 214 | else: 215 | list_of_images.append('') 216 | 217 | # -------------- # 218 | # ERROR CHECKING # 219 | 220 | # Raise error if there are no shared images between the csv and the directory 221 | if all(path == '' for path in list_of_images): 222 | raise ValueError('Something is wrong. There are no shared images in the' 223 | ' csv and the image directory. Check formatting or files.') 224 | # -------------- # 225 | 226 | return list_of_images, df 227 | 228 | 229 | def _image_paths_finder(image_path, csv_path, image_column_header): 230 | """ 231 | Given an image column header, and either a csv path or an image directory, 232 | find the list of image paths. If just a csv, it's pulled from the column. 233 | If it's just a directory, it's pulled from the directory. If it's both, 234 | the list is checked from the overlap between the directory and the csv. 235 | 236 | Parameters: 237 | ---------- 238 | image_path : str 239 | Path to the image directory, if it exists 240 | 241 | csv_path : str 242 | Path to the csv, if it exists 243 | 244 | image_column_header : str 245 | Name of column header holding image information 246 | 247 | Returns: 248 | ------- 249 | list_of_images : list of str 250 | a list of the paths to all the images being featurized 251 | 252 | """ 253 | # CASE 1: They only give an image directory with no CSV 254 | if csv_path == '': 255 | 256 | # Find list of images from the image directory 257 | list_of_images = _find_directory_image_paths(image_path) 258 | 259 | # Create the new csv in a folder called 'featurizer_csv/' 260 | df = _create_df_with_image_paths(list_of_images, 261 | image_column_header=image_column_header) 262 | 263 | # CASE 2: They only give a CSV with no directory 264 | elif image_path == '': 265 | # Create the list_of_images from the csv 266 | list_of_images, df = _find_csv_image_paths(csv_path, image_column_header) 267 | logging.info('Found image paths from csv.') 268 | 269 | # CASE 3: They give both a CSV and a directory 270 | else: 271 | list_of_images, df = _find_combined_image_paths(image_path, csv_path, image_column_header) 272 | logging.info('Found image paths that overlap between both the directory and the csv.\n') 273 | 274 | return list_of_images, df 275 | 276 | 277 | ##################################### 278 | # FUNCTION FOR IMAGE VECTORIZATION # 279 | #################################### 280 | 281 | def _convert_single_image(image_source, model_str, image_path, target_size=(299, 299), 282 | grayscale=False): 283 | """ 284 | Take in a path to an image (either by URL or in a native directory) 285 | and convert the image to a preprocessed 4D numpy array, ready to be plugged 286 | into the featurizer. 287 | 288 | Parameters: 289 | ---------- 290 | image_source : str 291 | Flag for either url or directory source for image 292 | 293 | model_str : str 294 | Name of the model converting the image 295 | 296 | image_path : str 297 | Either the URL or the full path to the image 298 | 299 | target size : tuple of ints 300 | The desired size of the image 301 | 302 | grayscale : bool 303 | Boolean indicating whether the image is grayscale or not 304 | 305 | Returns: 306 | ------- 307 | image_array : np.ndarray 308 | a numpy array that represents the loaded and preprocessed image 309 | 310 | """ 311 | # Retrieve the image, either from a given url or from a directory 312 | try: 313 | if image_source == 'url': 314 | image_file = urlretrieve(image_path)[0] 315 | elif image_source == 'directory': 316 | image_file = image_path 317 | 318 | # If the image can't be retrieved, return a zeros vector of the appropriate size 319 | except (IOError, ValueError): 320 | # The channel dimension for a missing image is 3 if not grayscale, or 1 if grayscale 321 | im_size = target_size + (3 - 2 * grayscale,) 322 | logging.error('ERROR: Could not load/convert image to numpy array: {}'.format(image_path)) 323 | return np.zeros(im_size) 324 | 325 | # Load the image, and convert it to a numpy array with the target size 326 | image = load_img(image_file, target_size=target_size, grayscale=grayscale) 327 | image_array = img_to_array(image) 328 | 329 | # Expand the dimension for keras preprocessing, and preprocess the data 330 | # according to the InceptionV3 training that they performed. 331 | image_array = np.expand_dims(image_array, axis=0) 332 | image_array = preprocessing_dict[model_str]['preprocess'](image_array) 333 | 334 | # Return the image array 335 | return image_array 336 | 337 | 338 | ################################################ 339 | # FUNCTION FOR END-TO-END DATA PREPROCESSING # 340 | ################################################ 341 | 342 | def _find_image_source(image_path): 343 | 344 | # IMAGE RETRIEVAL AND VECTORIZATION # 345 | # Find image source: whether from url or directory 346 | if image_path == '': 347 | image_source = 'url' 348 | 349 | else: 350 | image_source = 'directory' 351 | 352 | return image_source 353 | 354 | 355 | @t.guard(image_column_header=t.String(allow_blank=False), 356 | model_str=t.String(allow_blank=False), 357 | list_of_images=t.List(t.String(allow_blank=True)), 358 | image_path=t.String(allow_blank=True), 359 | csv_path=t.String(allow_blank=True), 360 | target_size=t.Tuple(t.Int, t.Int), 361 | grayscale=t.Bool) 362 | def preprocess_data(image_column_header, 363 | model_str, 364 | list_of_images, 365 | image_path='', 366 | csv_path='', 367 | target_size=(299, 299), 368 | grayscale=False): 369 | """ 370 | Receive the data (some combination of image directory + csv), find 371 | the list of valid images, and then convert each to an array and adds 372 | them to the full batch. 373 | 374 | Parameters: 375 | ---------- 376 | image_path : str 377 | The path to the image directory, if it is being passed 378 | 379 | csv_path : str 380 | The path to the csv, if it is being passed 381 | 382 | image_column_header : str 383 | The name of the column that contains the image paths in the csv 384 | 385 | target_size : tuple of ints 386 | The size that the images will be scaled to 387 | 388 | grayscale : bool 389 | Boolean indicating whether the images are grayscale or not 390 | 391 | Returns: 392 | ------- 393 | image_data : np.ndarray 394 | a 4D numpy tensor containing the (full or batched) vectorized images, 395 | ready to be pushed through the featurizer 396 | 397 | list_of_images : list of str 398 | the list of image paths in the same order as the batches 399 | of the numpy tensor. This will allow us to add the 400 | features to the correct row of the csv. 401 | 402 | """ 403 | # -------------- # 404 | # ERROR CHECKING # 405 | # -------------- # 406 | 407 | # If there is no image directory or csv, then something is wrong. 408 | if image_path == '' and csv_path == '': 409 | raise ValueError('Need to load either an image directory or a CSV with' 410 | ' URLs, if no image directory included.') 411 | # Raise an error if the image_path doesn't point to a directory 412 | if image_path and not os.path.isdir(image_path): 413 | raise TypeError('image_path must lead to a directory if ' 414 | 'it is initialized. It is where the images are stored.') 415 | 416 | if model_str not in preprocessing_dict.keys(): 417 | raise ValueError('model_str must be one the following: {}'.format(preprocessing_dict.keys)) 418 | # ------------------------------------------------------ # 419 | 420 | # BUILDING IMAGE PATH LIST # 421 | num_images = len(list_of_images) 422 | 423 | image_source = _find_image_source(image_path) 424 | 425 | # Set number of grayscale channels (3 if color, 1 if grayscale) 426 | channels = 3 - (2 * grayscale) 427 | 428 | # Initialize the full batch 429 | image_data = np.ones((num_images, target_size[0], target_size[1], channels)) 430 | 431 | # Create the full image tensor 432 | logging.info('Converting images.') 433 | 434 | image_dict = {} 435 | 436 | index = 0 437 | 438 | # Iterate through each image in the list of image names 439 | for image in list_of_images: 440 | # If the image is in the csv, but not in the directory, set it to all zeros 441 | # This allows the featurizer to correctly append features when there is 442 | # mismatch between the csv and the directory. Otherwise it would lose rows 443 | if image == '': 444 | image_data[index, :, :, :] = 0 445 | index += 1 446 | continue 447 | 448 | # If the image has already been vectorized before, just copy that slice 449 | if image in image_dict: 450 | image_data[index, :, :, :] = image_data[image_dict[image], :, :, :] 451 | 452 | # Otherwise, vectorize the image 453 | else: 454 | # Add the index to the dictionary to check in the future 455 | image_dict[image] = index 456 | 457 | # Append the image path to the image name. If there's none, nothing will change 458 | image = '{}{}'.format(image_path, image) 459 | 460 | # Place the vectorized image into the image data 461 | image_data[index, :, :, :] = _convert_single_image(image_source, model_str, image, 462 | target_size=target_size, 463 | grayscale=grayscale) 464 | 465 | # Progress report at set intervals 466 | if num_images < 1000: 467 | report_step = 100 468 | elif num_images < 5000: 469 | report_step = 500 470 | else: 471 | report_step = 1000 472 | if not index % report_step: 473 | logging.info('Converted {} images in batch. Only {} images left to go.'.format( 474 | index, num_images - index)) 475 | 476 | index += 1 477 | 478 | return image_data, list_of_images 479 | -------------------------------------------------------------------------------- /pic2vec/saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/pic2vec/saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5 -------------------------------------------------------------------------------- /pic2vec/squeezenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for this squeezenet implementation pulled directly from Refik Can Malli's 3 | Keras Squeezenet project: 4 | https://github.com/rcmalli/keras-squeezenet/blob/master/README.md 5 | 6 | Original paper: 7 | https://arxiv.org/abs/1602.07360 8 | 9 | Squeezenet original repo: 10 | https://github.com/DeepScale/SqueezeNet 11 | 12 | Keras documentation: 13 | https://keras.io/ 14 | """ 15 | import warnings 16 | 17 | try: 18 | from keras.applications.imagenet_utils import _obtain_input_shape 19 | except ImportError: 20 | from keras_applications.imagenet_utils import _obtain_input_shape 21 | 22 | from keras import backend as K 23 | from keras.layers import Input, Convolution2D, MaxPooling2D, Activation, concatenate, Dropout, \ 24 | GlobalAveragePooling2D 25 | from keras.models import Model 26 | from keras.engine.topology import get_source_inputs 27 | from keras.utils import get_file 28 | from keras.utils import layer_utils 29 | 30 | sq1x1 = "squeeze1x1" 31 | exp1x1 = "expand1x1" 32 | exp3x3 = "expand3x3" 33 | relu = "relu_" 34 | 35 | WEIGHTS_PATH = "https://github.com/rcmalli/keras-squeezenet/releases/download/v1.0/" \ 36 | "squeezenet_weights_tf_dim_ordering_tf_kernels.h5" 37 | 38 | # Modular function for Fire Node 39 | 40 | 41 | def fire_module(x, fire_id, squeeze=16, expand=64): 42 | """Build special layer for SqueezeNet""" 43 | s_id = 'fire' + str(fire_id) + '/' 44 | 45 | if K.image_data_format() == 'channels_first': 46 | channel_axis = 1 47 | else: 48 | channel_axis = 3 49 | 50 | x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x) 51 | x = Activation('relu', name=s_id + relu + sq1x1)(x) 52 | 53 | left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x) 54 | left = Activation('relu', name=s_id + relu + exp1x1)(left) 55 | 56 | right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x) 57 | right = Activation('relu', name=s_id + relu + exp3x3)(right) 58 | 59 | x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat') 60 | return x 61 | 62 | 63 | # Original SqueezeNet from paper. 64 | 65 | def SqueezeNet(input_tensor=None, input_shape=None, 66 | weights='imagenet', 67 | classes=1000): 68 | """Build SqueezeNet model""" 69 | if weights not in {'imagenet', None}: 70 | raise ValueError('The `weights` argument should be either ' 71 | '`None` (random initialization) or `imagenet` ' 72 | '(pre-training on ImageNet).') 73 | 74 | if weights == 'imagenet' and classes != 1000: 75 | raise ValueError('If using `weights` as imagenet with `include_top`' 76 | ' as true, `classes` should be 1000') 77 | 78 | input_shape = _obtain_input_shape(input_shape, 79 | default_size=227, 80 | min_size=48, 81 | data_format=K.image_data_format(), 82 | require_flatten=False) 83 | 84 | if input_tensor is None: 85 | img_input = Input(shape=input_shape) 86 | else: 87 | if not K.is_keras_tensor(input_tensor): 88 | img_input = Input(tensor=input_tensor, shape=input_shape) 89 | else: 90 | img_input = input_tensor 91 | 92 | x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input) 93 | x = Activation('relu', name='relu_conv1')(x) 94 | x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x) 95 | 96 | x = fire_module(x, fire_id=2, squeeze=16, expand=64) 97 | x = fire_module(x, fire_id=3, squeeze=16, expand=64) 98 | x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x) 99 | 100 | x = fire_module(x, fire_id=4, squeeze=32, expand=128) 101 | x = fire_module(x, fire_id=5, squeeze=32, expand=128) 102 | x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x) 103 | 104 | x = fire_module(x, fire_id=6, squeeze=48, expand=192) 105 | x = fire_module(x, fire_id=7, squeeze=48, expand=192) 106 | x = fire_module(x, fire_id=8, squeeze=64, expand=256) 107 | x = fire_module(x, fire_id=9, squeeze=64, expand=256) 108 | x = Dropout(0.5, name='drop9')(x) 109 | 110 | x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x) 111 | x = Activation('relu', name='relu_conv10')(x) 112 | x = GlobalAveragePooling2D()(x) 113 | out = Activation('softmax', name='loss')(x) 114 | 115 | # Ensure that the model takes into account 116 | # any potential predecessors of `input_tensor`. 117 | if input_tensor is not None: 118 | inputs = get_source_inputs(input_tensor) 119 | else: 120 | inputs = img_input 121 | 122 | model = Model(inputs, out, name='squeezenet') 123 | 124 | # load weights 125 | if weights == 'imagenet': 126 | 127 | weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels.h5', 128 | WEIGHTS_PATH, 129 | cache_subdir='models') 130 | model.load_weights(weights_path) 131 | if K.backend() == 'theano': 132 | layer_utils.convert_all_kernels_in_model(model) 133 | 134 | if K.image_data_format() == 'channels_first': 135 | 136 | if K.backend() == 'tensorflow': 137 | warnings.warn('You are using the TensorFlow backend, yet you ' 138 | 'are using the Theano ' 139 | 'image data format convention ' 140 | '(`image_data_format="channels_first"`). ' 141 | 'For best performance, set ' 142 | '`image_data_format="channels_last"` in ' 143 | 'your Keras config ' 144 | 'at ~/.keras/keras.json.') 145 | return model 146 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.15.4,<2 2 | h5py>=2.7.0,<3 3 | scipy>=1.1,<2 4 | tensorflow>=1.2.0,<2 5 | keras>=2.2.3,<2.3.0 6 | pandas>=0.20.2,<1 7 | Pillow>=5.4.1,<6 8 | trafaret>=1,<2 9 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pip==8.1.2 2 | bumpversion==0.5.3 3 | wheel==0.29.0 4 | watchdog==0.8.3 5 | flake8==2.6.0 6 | tox==2.3.1 7 | coverage==4.1 8 | Sphinx==1.4.8 9 | cryptography==1.7 10 | PyYAML==4.2b1 11 | pytest==2.9.2 12 | pytest-runner==2.11.1 13 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.101.1 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:pic2vec/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs 19 | max-line-length = 100 20 | 21 | [aliases] 22 | test = pytest 23 | # Define setup.py command aliases here 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from setuptools import setup, find_packages 5 | 6 | with open('README.md') as readme_file: 7 | readme = readme_file.read() 8 | 9 | with open('HISTORY.rst') as history_file: 10 | history = history_file.read() 11 | 12 | requirements = [ 13 | 'h5py>=2.7.0,<3', 14 | 'scipy>=1.1,<2', 15 | 'numpy>=1.15.4,<2', 16 | 'tensorflow>=1.2.0,<2', 17 | 'keras>=2.2.3,<2.3.0', 18 | 'pandas>=0.20.2,<1', 19 | 'Pillow>=5.4.1,<6', 20 | 'trafaret>=1,<2' 21 | ] 22 | 23 | setup_requirements = [ 24 | 'pytest-runner', 25 | # Put setup requirements (distutils extensions, etc.) here 26 | ] 27 | 28 | test_requirements = [ 29 | 'numpy', 30 | 'pytest', 31 | 'keras', 32 | ] 33 | 34 | setup( 35 | name='pic2vec', 36 | version='0.101.1', 37 | description='Featurize images using a decapitated, pre-trained deep learning network', 38 | long_description=readme + '\n\n' + history, 39 | long_description_content_type='text/markdown', 40 | author='Jett Oristaglio', 41 | author_email='jettori88@gmail.com', 42 | url='https://github.com/datarobot/pic2vec', 43 | packages=find_packages(include=['pic2vec']), 44 | include_package_data=True, 45 | package_data={ 46 | 'pic2vec': ['saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5'] 47 | }, 48 | install_requires=requirements, 49 | license='BSD license', 50 | zip_safe=False, 51 | keywords=['image_featurizer', 'featurize', 'pic2vec'], 52 | classifiers=[ 53 | 'Development Status :: 3 - Alpha', 54 | 'Intended Audience :: Developers', 55 | 'License :: OSI Approved :: BSD License', 56 | 'Natural Language :: English', 57 | 'Programming Language :: Python :: 2.7', 58 | 'Programming Language :: Python :: 3.4', 59 | 'Programming Language :: Python :: 3.5', 60 | 'Programming Language :: Python :: 3.6', 61 | 'Programming Language :: Python :: 3.7', 62 | ], 63 | test_suite='tests', 64 | tests_require=test_requirements, 65 | setup_requires=setup_requirements, 66 | ) 67 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Unit test package for image_featurizer.""" 4 | -------------------------------------------------------------------------------- /tests/build_featurizer_testing/inceptionv3_test_prediction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/inceptionv3_test_prediction.npy -------------------------------------------------------------------------------- /tests/build_featurizer_testing/resnet50_test_prediction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/resnet50_test_prediction.npy -------------------------------------------------------------------------------- /tests/build_featurizer_testing/squeezenet_test_prediction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/squeezenet_test_prediction.npy -------------------------------------------------------------------------------- /tests/build_featurizer_testing/vgg16_test_prediction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/vgg16_test_prediction.npy -------------------------------------------------------------------------------- /tests/build_featurizer_testing/vgg19_test_prediction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/vgg19_test_prediction.npy -------------------------------------------------------------------------------- /tests/build_featurizer_testing/xception_test_prediction.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/build_featurizer_testing/xception_test_prediction.npy -------------------------------------------------------------------------------- /tests/data_featurizing_testing/array_testing/check_featurize.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/data_featurizing_testing/array_testing/check_featurize.npy -------------------------------------------------------------------------------- /tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full: -------------------------------------------------------------------------------- 1 | image_missing,image_feat_0,image_feat_1,image_feat_2 2 | False,1.0,2.0,3.0 3 | False,4.0,5.0,6.0 4 | True,0.0,0.0,0.0 5 | False,7.0,8.0,9.0 6 | -------------------------------------------------------------------------------- /tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_images: -------------------------------------------------------------------------------- 1 | image 2 | borges.jpg 3 | arendt.bmp 4 | heidegger.jpg 5 | sappho.png 6 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/create_csv_check: -------------------------------------------------------------------------------- 1 | images 2 | arendt.bmp 3 | borges.jpg 4 | sappho.png 5 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/csv_image_path_check: -------------------------------------------------------------------------------- 1 | ,cats,images, 2 | ,foo,borges.jpg,dog 3 | ,bar,arendt.bmp,dog 4 | ,,sappho.png,dog 5 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/directory_combined_image_path_test: -------------------------------------------------------------------------------- 1 | ,cats,images, 2 | ,foo,heidegger.png,dog 3 | ,bar,arendt.bmp,dog 4 | ,,sappho.png,dog 5 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/directory_preprocess_system_test: -------------------------------------------------------------------------------- 1 | ,cats,images, 2 | ,foo,heidegger.png,dog 3 | ,bar,arendt.bmp,dog 4 | ,,sappho.png,dog 5 | ,bar,arendt.bmp,dog 6 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/error_directory_combined_test: -------------------------------------------------------------------------------- 1 | ,cats,images, 2 | ,foo,heidegger.png,dog 3 | ,bar,flynn.bmp,dog 4 | ,,pork.png,dog 5 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/error_row: -------------------------------------------------------------------------------- 1 | ,cats,images, 2 | ,foo,this_is_an_error,dog 3 | ,bar,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/borges.jpg,dog 4 | ,,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/sappho.png,dog 5 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/csv_testing/url_test: -------------------------------------------------------------------------------- 1 | ,cats,images, 2 | ,bar,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/borges.jpg,dog 3 | ,,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/arendt.bmp,dog 4 | ,,https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/sappho.png, 5 | -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_image_arrays/image_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_image_arrays/image_test_grayscale.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test_grayscale.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic_grayscale.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_image_arrays/image_test_isotropic_grayscale.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_images/arendt.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/arendt.bmp -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_images/borges.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/borges.jpg -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_images/heidegger.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/heidegger.gif -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_images/sappho.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_images/sappho.png -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt_grayscale.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/arendt_grayscale.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_preprocessing_arrays/borges.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/borges.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho.npy -------------------------------------------------------------------------------- /tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho_grayscale.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/feature_preprocessing_testing/test_preprocessing_arrays/sappho_grayscale.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3_mult.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_inceptionv3_mult.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50_mult.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_resnet50_mult.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet_mult.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_squeezenet_mult.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16_mult.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg16_mult.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19_mult.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_vgg19_mult.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_xception.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_xception.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/array_tests/check_prediction_array_xception_mult.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datarobot/pic2vec/2dae5093d10414456ca2210c78dc5c3392b70c8e/tests/image_featurizer_testing/array_tests/check_prediction_array_xception_mult.npy -------------------------------------------------------------------------------- /tests/image_featurizer_testing/csv_checking/squeezenet_check_csv.csv: -------------------------------------------------------------------------------- 1 | images_missing,images_feat_0,images_feat_1,images_feat_2,images_feat_3,images_feat_4,images_feat_5,images_feat_6,images_feat_7,images_feat_8,images_feat_9,images_feat_10,images_feat_11,images_feat_12,images_feat_13,images_feat_14,images_feat_15,images_feat_16,images_feat_17,images_feat_18,images_feat_19,images_feat_20,images_feat_21,images_feat_22,images_feat_23,images_feat_24,images_feat_25,images_feat_26,images_feat_27,images_feat_28,images_feat_29,images_feat_30,images_feat_31,images_feat_32,images_feat_33,images_feat_34,images_feat_35,images_feat_36,images_feat_37,images_feat_38,images_feat_39,images_feat_40,images_feat_41,images_feat_42,images_feat_43,images_feat_44,images_feat_45,images_feat_46,images_feat_47,images_feat_48,images_feat_49,images_feat_50,images_feat_51,images_feat_52,images_feat_53,images_feat_54,images_feat_55,images_feat_56,images_feat_57,images_feat_58,images_feat_59,images_feat_60,images_feat_61,images_feat_62,images_feat_63,images_feat_64,images_feat_65,images_feat_66,images_feat_67,images_feat_68,images_feat_69,images_feat_70,images_feat_71,images_feat_72,images_feat_73,images_feat_74,images_feat_75,images_feat_76,images_feat_77,images_feat_78,images_feat_79,images_feat_80,images_feat_81,images_feat_82,images_feat_83,images_feat_84,images_feat_85,images_feat_86,images_feat_87,images_feat_88,images_feat_89,images_feat_90,images_feat_91,images_feat_92,images_feat_93,images_feat_94,images_feat_95,images_feat_96,images_feat_97,images_feat_98,images_feat_99,images_feat_100,images_feat_101,images_feat_102,images_feat_103,images_feat_104,images_feat_105,images_feat_106,images_feat_107,images_feat_108,images_feat_109,images_feat_110,images_feat_111,images_feat_112,images_feat_113,images_feat_114,images_feat_115,images_feat_116,images_feat_117,images_feat_118,images_feat_119,images_feat_120,images_feat_121,images_feat_122,images_feat_123,images_feat_124,images_feat_125,images_feat_126,images_feat_127,images_feat_128,images_feat_129,images_feat_130,images_feat_131,images_feat_132,images_feat_133,images_feat_134,images_feat_135,images_feat_136,images_feat_137,images_feat_138,images_feat_139,images_feat_140,images_feat_141,images_feat_142,images_feat_143,images_feat_144,images_feat_145,images_feat_146,images_feat_147,images_feat_148,images_feat_149,images_feat_150,images_feat_151,images_feat_152,images_feat_153,images_feat_154,images_feat_155,images_feat_156,images_feat_157,images_feat_158,images_feat_159,images_feat_160,images_feat_161,images_feat_162,images_feat_163,images_feat_164,images_feat_165,images_feat_166,images_feat_167,images_feat_168,images_feat_169,images_feat_170,images_feat_171,images_feat_172,images_feat_173,images_feat_174,images_feat_175,images_feat_176,images_feat_177,images_feat_178,images_feat_179,images_feat_180,images_feat_181,images_feat_182,images_feat_183,images_feat_184,images_feat_185,images_feat_186,images_feat_187,images_feat_188,images_feat_189,images_feat_190,images_feat_191,images_feat_192,images_feat_193,images_feat_194,images_feat_195,images_feat_196,images_feat_197,images_feat_198,images_feat_199,images_feat_200,images_feat_201,images_feat_202,images_feat_203,images_feat_204,images_feat_205,images_feat_206,images_feat_207,images_feat_208,images_feat_209,images_feat_210,images_feat_211,images_feat_212,images_feat_213,images_feat_214,images_feat_215,images_feat_216,images_feat_217,images_feat_218,images_feat_219,images_feat_220,images_feat_221,images_feat_222,images_feat_223,images_feat_224,images_feat_225,images_feat_226,images_feat_227,images_feat_228,images_feat_229,images_feat_230,images_feat_231,images_feat_232,images_feat_233,images_feat_234,images_feat_235,images_feat_236,images_feat_237,images_feat_238,images_feat_239,images_feat_240,images_feat_241,images_feat_242,images_feat_243,images_feat_244,images_feat_245,images_feat_246,images_feat_247,images_feat_248,images_feat_249,images_feat_250,images_feat_251,images_feat_252,images_feat_253,images_feat_254,images_feat_255,images_feat_256,images_feat_257,images_feat_258,images_feat_259,images_feat_260,images_feat_261,images_feat_262,images_feat_263,images_feat_264,images_feat_265,images_feat_266,images_feat_267,images_feat_268,images_feat_269,images_feat_270,images_feat_271,images_feat_272,images_feat_273,images_feat_274,images_feat_275,images_feat_276,images_feat_277,images_feat_278,images_feat_279,images_feat_280,images_feat_281,images_feat_282,images_feat_283,images_feat_284,images_feat_285,images_feat_286,images_feat_287,images_feat_288,images_feat_289,images_feat_290,images_feat_291,images_feat_292,images_feat_293,images_feat_294,images_feat_295,images_feat_296,images_feat_297,images_feat_298,images_feat_299,images_feat_300,images_feat_301,images_feat_302,images_feat_303,images_feat_304,images_feat_305,images_feat_306,images_feat_307,images_feat_308,images_feat_309,images_feat_310,images_feat_311,images_feat_312,images_feat_313,images_feat_314,images_feat_315,images_feat_316,images_feat_317,images_feat_318,images_feat_319,images_feat_320,images_feat_321,images_feat_322,images_feat_323,images_feat_324,images_feat_325,images_feat_326,images_feat_327,images_feat_328,images_feat_329,images_feat_330,images_feat_331,images_feat_332,images_feat_333,images_feat_334,images_feat_335,images_feat_336,images_feat_337,images_feat_338,images_feat_339,images_feat_340,images_feat_341,images_feat_342,images_feat_343,images_feat_344,images_feat_345,images_feat_346,images_feat_347,images_feat_348,images_feat_349,images_feat_350,images_feat_351,images_feat_352,images_feat_353,images_feat_354,images_feat_355,images_feat_356,images_feat_357,images_feat_358,images_feat_359,images_feat_360,images_feat_361,images_feat_362,images_feat_363,images_feat_364,images_feat_365,images_feat_366,images_feat_367,images_feat_368,images_feat_369,images_feat_370,images_feat_371,images_feat_372,images_feat_373,images_feat_374,images_feat_375,images_feat_376,images_feat_377,images_feat_378,images_feat_379,images_feat_380,images_feat_381,images_feat_382,images_feat_383,images_feat_384,images_feat_385,images_feat_386,images_feat_387,images_feat_388,images_feat_389,images_feat_390,images_feat_391,images_feat_392,images_feat_393,images_feat_394,images_feat_395,images_feat_396,images_feat_397,images_feat_398,images_feat_399,images_feat_400,images_feat_401,images_feat_402,images_feat_403,images_feat_404,images_feat_405,images_feat_406,images_feat_407,images_feat_408,images_feat_409,images_feat_410,images_feat_411,images_feat_412,images_feat_413,images_feat_414,images_feat_415,images_feat_416,images_feat_417,images_feat_418,images_feat_419,images_feat_420,images_feat_421,images_feat_422,images_feat_423,images_feat_424,images_feat_425,images_feat_426,images_feat_427,images_feat_428,images_feat_429,images_feat_430,images_feat_431,images_feat_432,images_feat_433,images_feat_434,images_feat_435,images_feat_436,images_feat_437,images_feat_438,images_feat_439,images_feat_440,images_feat_441,images_feat_442,images_feat_443,images_feat_444,images_feat_445,images_feat_446,images_feat_447,images_feat_448,images_feat_449,images_feat_450,images_feat_451,images_feat_452,images_feat_453,images_feat_454,images_feat_455,images_feat_456,images_feat_457,images_feat_458,images_feat_459,images_feat_460,images_feat_461,images_feat_462,images_feat_463,images_feat_464,images_feat_465,images_feat_466,images_feat_467,images_feat_468,images_feat_469,images_feat_470,images_feat_471,images_feat_472,images_feat_473,images_feat_474,images_feat_475,images_feat_476,images_feat_477,images_feat_478,images_feat_479,images_feat_480,images_feat_481,images_feat_482,images_feat_483,images_feat_484,images_feat_485,images_feat_486,images_feat_487,images_feat_488,images_feat_489,images_feat_490,images_feat_491,images_feat_492,images_feat_493,images_feat_494,images_feat_495,images_feat_496,images_feat_497,images_feat_498,images_feat_499,images_feat_500,images_feat_501,images_feat_502,images_feat_503,images_feat_504,images_feat_505,images_feat_506,images_feat_507,images_feat_508,images_feat_509,images_feat_510,images_feat_511 2 | False,0.0,0.06430369,5.925252,1.2477938,1.9022676,0.7292365,0.35128284,1.2580512,0.0,1.9784806,6.760506,0.3802457,1.4616345,0.48937023,0.12520242,0.0,0.0,1.1434659,0.030002972,4.420496,0.0,0.08563948,7.4073334,0.011735982,0.2933519,3.811751,0.16320845,0.63225144,0.88222635,0.0,0.19566797,0.005651583,0.0,6.2003555,4.4338775,0.0,1.1506345,0.934765,0.9933638,0.12695538,5.620974,0.014169323,0.19265716,5.561198,0.03258076,0.2505361,1.1689211,2.3166757,1.3553897,0.00996804,1.6288273,0.18410471,6.302098,1.0044285,4.648501,0.9760399,1.512236,2.6010919,0.0,1.1071947,0.2489015,0.7132885,0.50306994,0.3520863,5.2353253,0.26186904,0.5956484,3.5394926,1.4525433,1.7428367,0.12893242,4.7874084,1.0850035,0.6844506,2.8190055,1.8660756,3.3059452,0.86724275,1.912618,0.50218135,7.156048,1.4795531,0.0,0.0,0.5205103,1.2208014,0.013696391,2.7571406,0.0,0.0,0.41602758,5.15148,4.7260184,1.9724343,0.9735211,5.360706,0.0725911,1.1349676,1.6301644,0.77679425,0.357845,0.4446485,1.4450411,2.4311843,0.3848904,0.14403474,1.9345828,2.3822713,5.25369,0.0,0.12637495,0.0,0.0,1.852182,0.0,0.24121365,0.0,0.6686679,0.0,0.082115926,1.0559088,1.1733891,0.0,0.61270684,10.096444,0.115368225,4.761737,0.7631597,1.4007128,2.838534,0.0,2.541959,0.31714454,7.229467,0.30863938,0.004410572,0.12027911,0.0,1.5905406,6.1431565,0.3887413,0.7746064,5.4975033,1.3848671,0.6790592,0.089019366,5.7478013,0.63026905,1.6860456,0.22501561,0.09998074,0.22472723,0.048187234,4.548613,0.39404544,0.0,3.2012563,2.4875112,4.816038,0.47314155,1.9401855,5.418616,2.3376446,0.581827,2.3534772,6.9106417,2.6489549,2.8527553,4.611235,0.57903886,0.11823338,0.23346938,0.0,2.60903,0.86994904,0.8627086,0.0,1.50256,0.002849428,0.042190213,0.13597369,0.12739834,0.0015194238,1.0115864,0.0,1.1693184,6.5084243,0.3633387,0.0,0.4999057,0.8373399,10.139998,0.032343287,2.3642514,3.5328903,0.19237278,0.118270814,0.0,4.096768,0.114599116,0.0,4.190225,0.56754124,0.8311648,3.7349038,0.0,4.2200136,0.01088281,0.1085515,1.0522821,0.41156217,2.6143224,0.28228495,0.64316785,0.91498697,2.548603,2.2334642,1.2991979,1.6584415,2.0870817,0.19921392,0.34985822,0.11114285,10.224248,1.9509667,0.7822371,0.0,3.3270075,0.11672954,2.2384195,0.46649593,0.16295668,12.06565,3.2309685,0.2596637,0.2021587,1.1646566,0.2846576,3.9792683,0.20739509,6.3545895,2.8161318,0.0,5.4241223,0.0835355,2.7165213,0.5605489,0.0187813,0.22815442,4.255856,0.7698795,0.5028055,0.85259837,0.28411278,0.067203335,0.124913335,5.493942,2.5626507,1.5939229,1.1782578,11.209688,0.09071099,3.4199302,15.100232,6.7983665,0.099231154,0.0,0.0,0.86766124,0.27894226,0.0,0.0,0.069804616,4.351778,0.0,1.0805423,0.123643935,10.690204,0.47816956,0.06645855,0.007546769,0.4814074,0.0,0.19200687,0.0,0.5923009,0.33620328,0.62727875,1.735227,7.182868,3.0014234,2.9397693,5.0955205,0.07466374,0.24726161,2.1152027,0.20864648,0.2795848,3.888554,0.09189236,0.0,0.7846511,0.0,0.0,3.018526,4.898274,0.8444631,0.68039566,1.9294883,1.420852,1.8197484,0.0,0.24696165,1.7744846,0.8634615,0.0,3.5151057,2.5969448,0.3709314,0.4910034,4.815773,1.0811102,0.91170424,0.0,0.0,0.18964833,0.6169176,0.22920837,0.0,0.0,8.854687,4.7849646,0.0,0.69278896,0.0,8.794974,0.029180411,2.7849865,0.10333087,2.3760562,1.0513319,0.83889574,0.0,4.8695283,0.0,0.37816083,0.5963372,0.05220283,3.0231662,3.277741,10.861541,4.5583177,0.12875788,8.092003,5.8995795,0.11318067,0.2302447,5.80469,1.355816,0.035333663,2.4274058,0.0,1.4589256,0.0,0.008337674,0.056794185,0.67818326,0.23275617,0.30492723,0.0,0.03888062,0.0,0.98070127,3.5245311,0.064472236,0.0,0.7144263,0.0,2.7053547,0.048378047,0.86141956,1.2893332,0.51345825,7.620836,0.005830832,1.7384351,0.9546961,4.7781973,0.74109584,9.040736,0.0,1.2037519,3.9157329,0.40743956,1.8311182,1.0261803,5.236738,3.1455948,2.2358575,0.0,10.966049,0.02050886,2.3198588,0.0057365606,0.9877181,0.73059785,0.899798,0.32366183,4.0271564,2.915965,7.71047,0.0,0.47765186,0.0,0.0,0.9394298,1.5252643,1.6366545,2.6790133,0.0069701695,0.81947315,0.0,1.9211565,1.3513644,0.0,0.0,3.9908652,0.39159593,0.009902612,0.0,2.215494,0.0,0.30989146,0.13854943,0.012708846,1.362082,0.0,0.18787599,2.0075576,3.638248,3.0248704,2.8847232,0.0,0.55640036,0.0,0.05954851,0.0,0.74581194,0.0,3.1318593,0.02099697,0.39839393,0.6900185,1.6036885,0.61432284,6.5374627,0.029315297,1.8489829,6.2222,0.08512894,2.288661,0.7297279,4.5417633,0.0,0.5599782,0.0,0.0,0.2541982,0.0,2.8425996,0.0,0.18618032,0.37754142,0.38933957,0.6385549,3.9901986,9.413614,0.13278776,1.2800857,1.119599,0.0,3.3959885,1.4220761,0.0,0.82575494,0.42068207,0.0,2.6942632,4.9376574,0.52622277,1.6726253,0.93129253,0.10429599,1.0813674,0.20501657,2.1741478,1.2829067,3.2710037,1.595327,1.1216038,0.09035005,2.256902,6.348547,3.22828,0.28881666,1.7531539,0.35831764,11.005119,0.36145508,0.0,0.15231328,0.39823815 3 | False,0.0,1.5659888,0.9795387,4.2150273,2.7514215,0.46990424,0.942106,3.0819037,0.7333554,5.240792,3.3389344,1.3541185,1.3338892,0.6663938,0.5487696,0.0,1.3918678,1.1537352,1.1469694,1.137218,0.5315949,1.1478186,0.56462586,0.27681875,0.28172,0.62803036,0.004717092,1.6738908,1.0735782,0.012021471,0.50643593,0.0,0.034927443,0.54222625,1.45291,0.17807603,3.4017353,0.84574264,0.12286,2.1603284,4.9478965,2.4018497,0.55619097,1.0727917,0.09784732,0.7986277,1.6876371,0.3141875,1.413382,2.2161605,2.9333303,6.4016137,4.293491,1.4717547,4.914748,0.14770664,0.9234845,2.9636667,1.0482793,2.9261591,0.0,2.4522955,2.087722,1.2340851,1.597116,0.3951955,0.8834233,2.8590105,0.97714967,3.5808282,0.0,7.9844494,1.293921,5.227065,0.49527055,0.72197825,6.802121,0.52109516,1.6139073,0.28607228,0.4759791,0.06677796,0.12506889,0.1515753,1.1581132,0.21002053,1.2206664,3.208175,0.3185406,0.0,0.5200459,3.624943,3.6210797,2.3661478,0.30301082,9.536854,0.1775064,2.4361207,0.28820038,0.85999274,0.29853684,2.2857106,2.4229867,1.10337,0.5563773,0.505385,0.27907002,3.902735,0.36712894,0.18612202,1.299055,0.35058552,0.23118384,1.1703097,0.39608738,2.1284676,0.87798685,0.62168443,0.0,0.98764026,2.0116363,6.818782,0.09717489,1.5212868,7.931019,0.2340589,2.1220703,1.5370493,1.235108,3.6840394,1.8885325,0.27170816,0.28961405,3.5587764,1.2335792,0.0906846,0.1226984,0.0,0.3691413,1.9427574,1.3072993,2.6549468,1.7671379,0.1919094,1.1802415,2.9149354,1.9638197,0.9867413,4.286894,0.31699765,1.4402238,1.7048683,0.81744355,2.78236,1.9817716,0.45829806,1.6875836,0.7051146,12.437517,1.2465167,0.15638705,3.2915783,3.7153172,0.00037445623,2.6755934,1.5846614,1.3520594,2.0066643,3.5690994,3.3009062,0.21823698,2.5437348,0.29772404,5.157602,4.0000124,1.0213175,0.53319913,0.13398926,0.084509924,1.249874,0.25117996,0.6318734,0.23545043,3.5526493,0.0,0.73910636,0.52041,0.50401884,0.28475827,0.40722707,0.0,1.3661029,6.9649043,1.6108668,0.7728189,1.3216536,2.8952522,0.5173574,3.2593768,0.38281688,0.4525214,0.7768256,5.7829566,2.2125843,1.6979096,0.062168002,1.7114139,0.0,0.13494396,0.017418,0.8761093,2.5021544,1.8988813,0.83362275,6.610062,0.31433657,0.6561014,1.3047681,2.398137,1.0416564,1.2648318,0.48655927,2.6824732,5.979489,0.7534528,0.7870135,0.5019257,0.6378011,1.108926,0.12475406,0.0059041358,0.0,13.54958,1.3913872,1.0996466,0.12400829,3.754011,1.2099261,2.57349,9.399678,1.8044401,5.5763397,0.30568036,4.084769,1.0212308,0.22334807,2.3704414,1.3341213,4.1379967,2.544452,2.829277,1.1347294,5.1544514,2.3694015,1.7976842,1.2571592,0.045517392,1.8260677,0.37298885,0.13971496,3.489783,0.2172307,0.34820753,0.2074296,1.1966202,0.119053856,0.54089326,0.62653285,0.12743191,0.29860196,1.173416,0.018981354,0.10971679,0.92240083,0.04589878,0.16339752,0.023559462,3.935342,2.422161,0.43081057,0.07294032,0.648422,0.07987221,3.6668785,0.0,1.038127,1.8494443,1.1980474,0.8329206,0.6722754,3.5810244,4.5582757,0.0,0.123638526,0.0,4.8873186,0.15728831,0.0,0.9070376,4.103295,2.358815,0.0,0.0,0.0,4.4455,0.25732,1.3554612,1.1573128,0.0,0.2879894,0.7066783,1.2678046,0.083180964,0.0,4.7302194,0.009526005,1.6318493,0.22732513,0.087628685,0.406337,3.676663,2.0678732,0.43814883,1.3990653,0.8248821,0.0,0.1336438,0.06987791,0.0,1.1978002,2.7286007,0.35513905,0.0,0.18630114,0.0,3.9574082,0.0,0.051355492,0.0,0.9697243,0.5038651,0.0,0.0,0.041237272,0.021493886,0.82063997,0.056277256,0.9544746,3.667481,5.2713146,0.18067496,2.1729138,2.1666937,5.877317,3.7127397,0.22856998,0.8399865,2.3051767,2.4559593,2.6168334,3.1110601,0.0,0.23745692,0.0,0.81211483,0.0,0.43130538,1.1038702,0.18849887,0.0,0.0,0.22280723,0.4506722,0.278354,3.8670344,0.27257487,0.0,0.034663774,8.918831,1.1115226,1.5357469,0.9361244,0.5266539,0.03094228,0.0,2.7048798,0.4964706,8.367421,0.1095097,4.103294,1.1361473,4.6312456,1.6571227,0.0,0.9858088,0.98663265,1.2326255,18.273752,0.106127754,0.0,10.132444,7.3140254,13.0637665,0.0,0.54206985,0.838631,0.66567606,0.0,0.67223907,0.8846641,3.690928,0.0,1.4550297,0.0,0.0,0.3765201,0.124779105,0.57198584,0.06769558,0.05194254,2.3791409,0.0,1.0119655,0.9034338,0.0,1.0168633,2.3887832,1.5080258,0.80849445,12.042129,0.9865963,0.0,0.0,0.035975855,4.0963764,0.32174584,0.020602569,0.06682827,4.939119,2.6347206,0.0,3.7265346,0.23705861,0.016764328,0.3655856,4.803521,0.20734458,0.13272938,0.047159385,6.9744997,0.0,0.0,2.8244588,0.0,0.48884863,2.9610062,0.056871142,0.037993588,2.6344845,1.8345705,5.6257424,1.9977221,2.0507917,5.414329,5.937831,0.17503321,0.6394768,8.901436,1.3387647,0.082600646,0.0,0.50507915,0.067801915,0.28612956,0.28773987,0.0009720632,9.509638,2.8477495,0.25585645,8.762377,0.0,1.0540804,1.1605431,10.134096,0.0,0.062995434,1.7530464,3.8067064,2.5039506,0.27154094,1.2373917,0.23179904,0.5515947,1.9493744,2.6501715,0.41633463,1.2599964,0.23946893,0.7492251,0.0,0.1563604,1.931312,6.6867003,0.15866813,0.03338933,0.18354198,3.7537577,1.3273323,0.51614493,0.0,0.45749947,0.04283701 4 | False,0.46112114,2.1335888,1.8241048,3.7351613,0.36953086,1.5293108,2.1601055,2.860932,5.1433387,0.28087416,3.7314804,0.3581663,0.091056935,0.003450069,6.037664,0.30612454,0.571893,1.5549864,0.7488508,10.657478,0.7966928,1.4103812,0.42179903,2.5708578,3.5698192,0.34951842,0.020087833,2.7209818,1.200906,2.6880674,3.3300486,0.08889564,0.4264115,1.9196259,2.153557,0.08119932,1.6303731,0.52323073,0.07305842,4.9688535,2.0254424,0.68028307,3.0709374,6.11421,0.015177524,0.0,1.4988465,0.1980059,0.16609585,1.0889233,1.5761944,3.9454832,9.707764,0.44393125,1.1122248,0.061205845,0.79469895,5.656348,1.6394545,4.451493,3.022262,0.44380793,1.9415722,0.0,0.56014353,1.6221204,0.0,0.652658,0.5011004,1.7223866,0.0021700696,5.4243283,0.69782317,4.4391007,0.0,0.3059343,1.3109063,0.030020507,2.2842977,0.2192384,0.9240655,0.04335497,0.0,0.3255787,2.90101,0.49511334,0.8484899,2.643336,0.7712128,0.286951,6.749533,3.020384,2.1298018,0.22571988,2.5373983,0.81481665,2.4230592,2.378148,1.2698388,0.47444567,0.12968856,2.7633832,0.6543257,1.1585423,0.40002802,0.97725105,2.413279,1.6619308,0.040581264,0.0,2.0613706,0.540403,0.7375542,1.8673608,2.4644318,1.8475541,0.36726853,0.5496293,0.8709604,0.9798058,0.78019166,2.4870117,0.109242216,0.33667758,0.60807127,0.63456565,4.31613,7.425311,1.6955297,3.8750064,0.0,1.5589504,1.6156995,0.7165218,1.11664,0.29832062,0.61164075,0.06982304,4.015175,6.425628,0.84591556,3.3241649,2.3341613,2.5000346,1.9833434,0.0,1.1131458,2.7022448,2.1387928,0.3581163,0.9521044,0.12547801,0.39652684,5.151849,0.9570625,2.0943348,0.9473914,5.9737678,7.1045117,3.8365586,0.011891012,7.778302,5.3617887,0.16430879,0.09030357,1.3474579,0.9955001,1.5114158,0.96471924,3.8530567,0.19509202,0.8279502,0.0,3.236872,2.087074,1.0473642,2.5557902,0.12543155,0.27207533,2.0083,0.0,2.331559,2.0011897,1.253072,0.7369683,0.0,3.8982956,0.0,0.08348985,5.4233794,0.7739589,2.419749,2.9180753,7.94402,3.5744588,0.47413462,4.6088147,1.874471,5.8800855,0.6224164,4.213573,0.6233635,1.9506603,0.1731375,2.1621401,0.09919829,3.030944,0.70585084,1.572602,2.2551274,0.7554109,0.48613983,0.011739742,0.0,4.6881547,2.9190836,3.6938877,0.8468688,1.9456602,1.380586,0.1180727,0.08764773,0.6397943,12.080511,0.040237863,2.866631,0.5557986,3.6685414,1.9213517,1.4565915,0.026319146,0.44326526,9.5060625,2.3732884,0.0796136,0.48028627,0.96748525,1.3613396,5.516922,4.1135426,11.771184,4.3025475,0.028500572,0.8796897,4.3576713,2.7081985,0.52707255,0.0,0.20732209,4.192343,0.0011305375,0.95325685,1.8918582,2.6601713,0.16889462,0.23276515,0.0,2.1605675,0.39623815,2.9520898,12.896067,6.8731117,0.051315077,0.37148917,15.056107,0.63663244,0.0,3.582499,0.0,1.6672244,1.2374235,0.100534566,0.3597434,9.280933,2.664385,0.14248258,0.63993657,11.7247,0.71052706,1.1474961,0.0,0.08538999,0.19263059,6.153173,0.0,0.11669634,1.5751977,0.32482773,1.5950497,0.1672559,2.7469232,0.56592935,0.3329131,1.0864764,1.5494595,13.196359,0.024275294,0.35035914,2.1386523,0.0,0.0,0.85830605,0.23369512,0.0,1.1287621,0.14058255,0.46435067,0.5499137,5.0106955,3.0786238,1.4176838,0.75445217,0.6832051,0.70677876,2.7583437,3.7263772,13.683774,2.5339792,1.268514,1.1097827,1.3701215,4.8274655,0.86168194,2.4353619,0.22178395,0.0018151865,0.028063435,0.18894655,0.0,1.4722105,4.137401,2.1483555,0.0,0.4736592,0.0,14.141827,0.8513631,0.33892128,0.0,0.34540272,4.4188547,1.0770575,2.5925288,0.33556724,0.0039636865,5.5812016,1.2223202,0.013105068,1.0221738,4.018209,0.5878394,4.442058,0.57266945,8.175587,2.5914788,0.0,0.3444457,9.432242,3.55173,0.095683545,0.3256337,0.0029114995,0.0,0.27841532,0.045093197,1.3534012,0.65515906,0.5296211,1.1997733,0.0,0.0,0.0,1.2729379,0.829383,0.2642946,4.731426,2.6540146,0.01989702,9.20689,0.5719824,0.07834564,0.14849903,0.514003,7.3862925,1.1155077,3.555706,3.9550712,4.4527264,3.5506215,5.755327,3.5920541,0.044972666,2.1465847,0.024345243,2.2532241,0.45542118,3.2165465,5.4006534,0.46695796,0.0,18.149057,1.4007834,4.6173263,1.1946156,0.0,1.2031934,3.549035,7.8613353,2.3801801,2.3311281,5.995833,2.5379546,10.0461445,0.22278792,0.7071859,0.20086314,1.9559548,4.94324,8.251469,0.40846524,0.96566707,0.0,3.3403916,0.12761915,0.16489993,0.21057117,4.756474,1.182665,1.3473257,4.2571015,2.1792533,1.1000628,3.8973396,3.8577735,0.17022908,1.943259,0.011295123,0.05758635,6.0523705,2.6862469,5.957788,1.2351397,2.718024,0.07126258,2.2447028,0.8245993,5.0258527,0.15583189,4.517081,2.8265631,0.0,1.6555821,1.4330915,1.2179266,1.3872147,1.2570627,0.4305941,4.3483677,3.3759675,1.6939688,5.180839,2.5947,3.4261672,0.0,4.666795,0.0,0.0,10.839356,2.3502908,2.4827735,0.43250966,0.07325214,2.6113148,1.4340428,0.4222705,0.2289919,3.2848847,0.25043586,1.1298453,4.430055,3.0049922,5.276356,1.8888803,2.1993637,0.0,1.827844,9.044594,10.208017,8.133966,0.8523788,3.3054745,0.21871397,2.0441742,4.3706307,4.0342016,0.21408726,4.178865,1.5641925,0.0,4.0655494,1.3194618,3.9595556,14.100289,2.7498658,0.28389466,0.30229512,1.483878,4.3026614,0.0,2.7556431,0.9810146,0.32740286 5 | -------------------------------------------------------------------------------- /tests/image_featurizer_testing/csv_checking/squeezenet_check_csv_mult.csv: -------------------------------------------------------------------------------- 1 | images_1_missing,images_1_feat_0,images_1_feat_1,images_1_feat_2,images_1_feat_3,images_1_feat_4,images_1_feat_5,images_1_feat_6,images_1_feat_7,images_1_feat_8,images_1_feat_9,images_1_feat_10,images_1_feat_11,images_1_feat_12,images_1_feat_13,images_1_feat_14,images_1_feat_15,images_1_feat_16,images_1_feat_17,images_1_feat_18,images_1_feat_19,images_1_feat_20,images_1_feat_21,images_1_feat_22,images_1_feat_23,images_1_feat_24,images_1_feat_25,images_1_feat_26,images_1_feat_27,images_1_feat_28,images_1_feat_29,images_1_feat_30,images_1_feat_31,images_1_feat_32,images_1_feat_33,images_1_feat_34,images_1_feat_35,images_1_feat_36,images_1_feat_37,images_1_feat_38,images_1_feat_39,images_1_feat_40,images_1_feat_41,images_1_feat_42,images_1_feat_43,images_1_feat_44,images_1_feat_45,images_1_feat_46,images_1_feat_47,images_1_feat_48,images_1_feat_49,images_1_feat_50,images_1_feat_51,images_1_feat_52,images_1_feat_53,images_1_feat_54,images_1_feat_55,images_1_feat_56,images_1_feat_57,images_1_feat_58,images_1_feat_59,images_1_feat_60,images_1_feat_61,images_1_feat_62,images_1_feat_63,images_1_feat_64,images_1_feat_65,images_1_feat_66,images_1_feat_67,images_1_feat_68,images_1_feat_69,images_1_feat_70,images_1_feat_71,images_1_feat_72,images_1_feat_73,images_1_feat_74,images_1_feat_75,images_1_feat_76,images_1_feat_77,images_1_feat_78,images_1_feat_79,images_1_feat_80,images_1_feat_81,images_1_feat_82,images_1_feat_83,images_1_feat_84,images_1_feat_85,images_1_feat_86,images_1_feat_87,images_1_feat_88,images_1_feat_89,images_1_feat_90,images_1_feat_91,images_1_feat_92,images_1_feat_93,images_1_feat_94,images_1_feat_95,images_1_feat_96,images_1_feat_97,images_1_feat_98,images_1_feat_99,images_1_feat_100,images_1_feat_101,images_1_feat_102,images_1_feat_103,images_1_feat_104,images_1_feat_105,images_1_feat_106,images_1_feat_107,images_1_feat_108,images_1_feat_109,images_1_feat_110,images_1_feat_111,images_1_feat_112,images_1_feat_113,images_1_feat_114,images_1_feat_115,images_1_feat_116,images_1_feat_117,images_1_feat_118,images_1_feat_119,images_1_feat_120,images_1_feat_121,images_1_feat_122,images_1_feat_123,images_1_feat_124,images_1_feat_125,images_1_feat_126,images_1_feat_127,images_1_feat_128,images_1_feat_129,images_1_feat_130,images_1_feat_131,images_1_feat_132,images_1_feat_133,images_1_feat_134,images_1_feat_135,images_1_feat_136,images_1_feat_137,images_1_feat_138,images_1_feat_139,images_1_feat_140,images_1_feat_141,images_1_feat_142,images_1_feat_143,images_1_feat_144,images_1_feat_145,images_1_feat_146,images_1_feat_147,images_1_feat_148,images_1_feat_149,images_1_feat_150,images_1_feat_151,images_1_feat_152,images_1_feat_153,images_1_feat_154,images_1_feat_155,images_1_feat_156,images_1_feat_157,images_1_feat_158,images_1_feat_159,images_1_feat_160,images_1_feat_161,images_1_feat_162,images_1_feat_163,images_1_feat_164,images_1_feat_165,images_1_feat_166,images_1_feat_167,images_1_feat_168,images_1_feat_169,images_1_feat_170,images_1_feat_171,images_1_feat_172,images_1_feat_173,images_1_feat_174,images_1_feat_175,images_1_feat_176,images_1_feat_177,images_1_feat_178,images_1_feat_179,images_1_feat_180,images_1_feat_181,images_1_feat_182,images_1_feat_183,images_1_feat_184,images_1_feat_185,images_1_feat_186,images_1_feat_187,images_1_feat_188,images_1_feat_189,images_1_feat_190,images_1_feat_191,images_1_feat_192,images_1_feat_193,images_1_feat_194,images_1_feat_195,images_1_feat_196,images_1_feat_197,images_1_feat_198,images_1_feat_199,images_1_feat_200,images_1_feat_201,images_1_feat_202,images_1_feat_203,images_1_feat_204,images_1_feat_205,images_1_feat_206,images_1_feat_207,images_1_feat_208,images_1_feat_209,images_1_feat_210,images_1_feat_211,images_1_feat_212,images_1_feat_213,images_1_feat_214,images_1_feat_215,images_1_feat_216,images_1_feat_217,images_1_feat_218,images_1_feat_219,images_1_feat_220,images_1_feat_221,images_1_feat_222,images_1_feat_223,images_1_feat_224,images_1_feat_225,images_1_feat_226,images_1_feat_227,images_1_feat_228,images_1_feat_229,images_1_feat_230,images_1_feat_231,images_1_feat_232,images_1_feat_233,images_1_feat_234,images_1_feat_235,images_1_feat_236,images_1_feat_237,images_1_feat_238,images_1_feat_239,images_1_feat_240,images_1_feat_241,images_1_feat_242,images_1_feat_243,images_1_feat_244,images_1_feat_245,images_1_feat_246,images_1_feat_247,images_1_feat_248,images_1_feat_249,images_1_feat_250,images_1_feat_251,images_1_feat_252,images_1_feat_253,images_1_feat_254,images_1_feat_255,images_2_missing,images_2_feat_0,images_2_feat_1,images_2_feat_2,images_2_feat_3,images_2_feat_4,images_2_feat_5,images_2_feat_6,images_2_feat_7,images_2_feat_8,images_2_feat_9,images_2_feat_10,images_2_feat_11,images_2_feat_12,images_2_feat_13,images_2_feat_14,images_2_feat_15,images_2_feat_16,images_2_feat_17,images_2_feat_18,images_2_feat_19,images_2_feat_20,images_2_feat_21,images_2_feat_22,images_2_feat_23,images_2_feat_24,images_2_feat_25,images_2_feat_26,images_2_feat_27,images_2_feat_28,images_2_feat_29,images_2_feat_30,images_2_feat_31,images_2_feat_32,images_2_feat_33,images_2_feat_34,images_2_feat_35,images_2_feat_36,images_2_feat_37,images_2_feat_38,images_2_feat_39,images_2_feat_40,images_2_feat_41,images_2_feat_42,images_2_feat_43,images_2_feat_44,images_2_feat_45,images_2_feat_46,images_2_feat_47,images_2_feat_48,images_2_feat_49,images_2_feat_50,images_2_feat_51,images_2_feat_52,images_2_feat_53,images_2_feat_54,images_2_feat_55,images_2_feat_56,images_2_feat_57,images_2_feat_58,images_2_feat_59,images_2_feat_60,images_2_feat_61,images_2_feat_62,images_2_feat_63,images_2_feat_64,images_2_feat_65,images_2_feat_66,images_2_feat_67,images_2_feat_68,images_2_feat_69,images_2_feat_70,images_2_feat_71,images_2_feat_72,images_2_feat_73,images_2_feat_74,images_2_feat_75,images_2_feat_76,images_2_feat_77,images_2_feat_78,images_2_feat_79,images_2_feat_80,images_2_feat_81,images_2_feat_82,images_2_feat_83,images_2_feat_84,images_2_feat_85,images_2_feat_86,images_2_feat_87,images_2_feat_88,images_2_feat_89,images_2_feat_90,images_2_feat_91,images_2_feat_92,images_2_feat_93,images_2_feat_94,images_2_feat_95,images_2_feat_96,images_2_feat_97,images_2_feat_98,images_2_feat_99,images_2_feat_100,images_2_feat_101,images_2_feat_102,images_2_feat_103,images_2_feat_104,images_2_feat_105,images_2_feat_106,images_2_feat_107,images_2_feat_108,images_2_feat_109,images_2_feat_110,images_2_feat_111,images_2_feat_112,images_2_feat_113,images_2_feat_114,images_2_feat_115,images_2_feat_116,images_2_feat_117,images_2_feat_118,images_2_feat_119,images_2_feat_120,images_2_feat_121,images_2_feat_122,images_2_feat_123,images_2_feat_124,images_2_feat_125,images_2_feat_126,images_2_feat_127,images_2_feat_128,images_2_feat_129,images_2_feat_130,images_2_feat_131,images_2_feat_132,images_2_feat_133,images_2_feat_134,images_2_feat_135,images_2_feat_136,images_2_feat_137,images_2_feat_138,images_2_feat_139,images_2_feat_140,images_2_feat_141,images_2_feat_142,images_2_feat_143,images_2_feat_144,images_2_feat_145,images_2_feat_146,images_2_feat_147,images_2_feat_148,images_2_feat_149,images_2_feat_150,images_2_feat_151,images_2_feat_152,images_2_feat_153,images_2_feat_154,images_2_feat_155,images_2_feat_156,images_2_feat_157,images_2_feat_158,images_2_feat_159,images_2_feat_160,images_2_feat_161,images_2_feat_162,images_2_feat_163,images_2_feat_164,images_2_feat_165,images_2_feat_166,images_2_feat_167,images_2_feat_168,images_2_feat_169,images_2_feat_170,images_2_feat_171,images_2_feat_172,images_2_feat_173,images_2_feat_174,images_2_feat_175,images_2_feat_176,images_2_feat_177,images_2_feat_178,images_2_feat_179,images_2_feat_180,images_2_feat_181,images_2_feat_182,images_2_feat_183,images_2_feat_184,images_2_feat_185,images_2_feat_186,images_2_feat_187,images_2_feat_188,images_2_feat_189,images_2_feat_190,images_2_feat_191,images_2_feat_192,images_2_feat_193,images_2_feat_194,images_2_feat_195,images_2_feat_196,images_2_feat_197,images_2_feat_198,images_2_feat_199,images_2_feat_200,images_2_feat_201,images_2_feat_202,images_2_feat_203,images_2_feat_204,images_2_feat_205,images_2_feat_206,images_2_feat_207,images_2_feat_208,images_2_feat_209,images_2_feat_210,images_2_feat_211,images_2_feat_212,images_2_feat_213,images_2_feat_214,images_2_feat_215,images_2_feat_216,images_2_feat_217,images_2_feat_218,images_2_feat_219,images_2_feat_220,images_2_feat_221,images_2_feat_222,images_2_feat_223,images_2_feat_224,images_2_feat_225,images_2_feat_226,images_2_feat_227,images_2_feat_228,images_2_feat_229,images_2_feat_230,images_2_feat_231,images_2_feat_232,images_2_feat_233,images_2_feat_234,images_2_feat_235,images_2_feat_236,images_2_feat_237,images_2_feat_238,images_2_feat_239,images_2_feat_240,images_2_feat_241,images_2_feat_242,images_2_feat_243,images_2_feat_244,images_2_feat_245,images_2_feat_246,images_2_feat_247,images_2_feat_248,images_2_feat_249,images_2_feat_250,images_2_feat_251,images_2_feat_252,images_2_feat_253,images_2_feat_254,images_2_feat_255 2 | False,0.032151844,3.5865228,1.315752,0.804667,0.9892403,3.570376,0.9755024,0.06260121,0.57173294,2.2252495,0.04281974,3.7095346,2.0525513,0.39772993,0.44111317,0.10065977,3.1001778,2.2169387,1.0426998,0.56015956,2.8175716,2.8769276,0.14155844,1.7427983,0.6826789,0.906466,3.653263,2.8122704,2.056664,0.55359733,0.481095,0.42757812,2.7485971,2.0675704,1.59769,2.4581704,0.88472706,2.3425405,2.0865939,1.2073997,4.3178005,0.0,0.87065583,1.3854185,0.0,2.7837539,3.3492265,3.1671135,0.6037793,1.2034793,0.40124676,1.9381127,0.2644626,2.158427,2.626845,0.06318747,0.926091,0.120606825,0.33433396,0.041057963,1.1146489,0.30635342,5.105906,2.7624483,2.1196234,1.2709795,3.7733057,0.15652497,0.060139555,3.8668485,0.58167386,3.4411852,0.38403928,3.1890352,0.95553064,0.16235399,2.2984002,0.19702272,2.8443837,2.64459,3.6794007,1.4597358,4.6320596,2.750855,2.5951371,0.17585137,1.304515,0.86632884,0.75128,0.02251982,0.13168602,0.50655293,0.5846592,3.4358816,0.24995285,5.4886694,1.1982974,1.8626316,0.059135407,2.1056836,2.0951126,0.699353,1.8674519,2.1154482,0.5804168,1.5129423,0.4627264,1.7317951,1.7663311,1.8727616,0.27453607,5.1676955,1.366602,1.6635038,1.1775745,0.3147263,7.648309,0.2309112,0.7246571,2.0933318,4.5853605,2.7120612,1.4000283,0.2896651,2.2420053,0.6363425,0.56835556,0.09605834,4.0282965,1.3860903,5.6501994,9.260081,3.448799,0.0,0.57330173,0.0,2.2107913,0.54027116,5.406924,0.27231407,0.2444771,0.096003436,0.29615045,0.481741,4.4590473,2.9705963,2.585092,1.1812321,0.24411564,1.9902232,0.39232555,0.0,3.9584,0.76242936,1.6751702,0.9098742,1.0107231,0.43173075,3.0560253,0.4309674,2.9484415,0.45585212,0.094824165,0.42306298,0.0,6.8198256,0.34639448,4.397487,1.4070835,1.2396935,0.9451138,2.4347641,0.18908042,0.32427,3.1504536,7.7099295,4.11038,3.00638,3.0174673,0.6955748,1.2137029,0.7294628,0.03256593,0.45546973,0.15246361,0.01944031,2.2526162,0.032236118,0.35721314,1.3768663,1.0753764,4.0671473,0.872133,2.8664467,4.890916,0.60187596,2.1615863,1.4286492,4.1911664,1.1179287,5.493279,1.1627977,0.859158,0.6117299,3.4715607,3.855235,0.23882593,0.4697149,1.5809593,1.3429917,0.40973657,1.6362605,0.0,2.1912305,0.004951306,1.107747,0.22422045,0.68739545,0.09393799,2.8229027,2.9547968,0.27820018,0.029774254,0.37290597,1.5659297,0.20969544,1.1468534,3.5758927,0.93914914,3.1536644,1.5091945,2.2708817,0.2799891,0.1270991,1.4212998,0.09309016,0.3834405,2.3143768,4.773201,1.1998423,1.6979942,0.71103805,0.62321854,1.3471316,2.73194,1.3019589,0.5928317,1.1895822,2.2769551,1.3584654,1.1736261,4.7884135,1.0209852,5.6817183,0.18072754,0.2752757,False,0.7829944,2.597283,1.6106628,2.0120049,2.9870737,2.3465264,1.0001415,0.2743848,1.2728014,1.1420937,0.8397067,0.4207223,0.45487517,0.839304,0.54279983,0.25321797,0.28857684,0.815493,2.123739,1.1415942,3.674873,0.81449133,0.4482375,1.0009123,1.8147713,4.667472,2.8826227,2.5312274,1.9435756,1.9872192,1.2261478,1.6609036,0.99615574,1.8712169,2.2789888,3.9922247,3.260493,0.6086244,3.6616082,0.9499898,0.27137852,0.13832209,0.6840669,2.2144208,0.1592703,2.0724945,2.9936137,4.9199324,1.3068136,0.57409656,1.2921237,1.7631783,0.53088117,2.0909026,0.27662548,0.8248203,0.7007468,1.2622775,0.7498356,0.49382013,4.415209,0.80923086,4.082539,1.8295598,2.4595737,1.0801203,1.9241952,0.6621319,0.0613492,1.1559494,1.981123,0.97952366,2.0475883,1.4752805,2.3019457,1.572546,1.7999018,1.2200348,1.1963491,6.842017,1.7239827,1.8578459,2.1301274,1.6793618,3.4350028,1.3809859,2.727663,2.510665,0.3335942,0.667192,0.4415267,1.8940499,0.36955318,0.5122144,0.34599268,0.68305147,4.2878857,1.0472362,1.7063048,1.8210968,0.6146735,3.9977703,0.8800388,0.85570693,0.07618098,1.6891319,1.3662521,3.4621994,0.9804348,1.7198968,0.8756955,4.3309813,0.77023315,0.56986344,0.61684006,0.0029520679,7.4704833,0.61182743,2.4819684,5.986584,3.6903899,2.1952245,0.6222894,1.8522813,3.3412242,1.9820032,3.7619264,1.5274217,0.93579257,0.2563519,1.8535069,0.27781856,0.65783703,0.58371305,0.21301693,0.5961987,0.5160588,0.10464815,1.9794508,1.4264858,0.36068118,1.8733753,0.5190635,1.5237458,0.75259805,4.06965,0.061819263,2.4436593,0.07864416,2.5051663,1.1794075,0.0,2.35141,1.256387,0.1439947,0.98724145,0.041590482,2.3698726,0.9295872,0.24698284,2.8722682,0.91860706,0.41244105,0.10176086,0.5989001,1.5418699,0.09315057,1.9787041,0.025677746,0.48486215,0.25193256,0.020618636,0.42106694,0.5053759,4.4693975,1.1767944,4.022005,1.9706548,1.5725816,2.5363965,1.5555301,0.11872846,0.40605742,0.7675878,0.094249435,0.111403614,0.3645131,2.0698047,0.017331887,5.015177,1.2359357,0.27879807,1.3524399,4.431946,2.1064017,2.8836966,0.82856137,0.9862207,9.753189,0.053063877,8.723235,6.5318832,0.6903504,0.33283803,0.77845156,1.845464,0.72751486,0.18826005,0.34838247,0.059819058,1.1895704,0.95769966,0.5084317,1.9484046,6.4253116,0.49329814,0.017987927,2.2090611,0.04371542,3.7869196,1.8632673,0.12691146,2.5845535,0.17003697,3.5108294,0.0,1.4122294,1.7249274,0.047432363,2.2345276,3.8117323,3.7325604,3.056432,4.7704563,0.71068263,0.25253958,0.17696574,0.14435597,6.178694,4.5091166,0.5270402,5.64732,0.031497717,2.7798765,1.3877457,0.73459536,1.2504846,1.5332531,0.7497327,0.37461254,1.0438362,3.4226842,0.10846566,2.540545,0.25807247,0.25016823 3 | False,1.2973549,2.779633,0.9494208,2.5105188,2.7121065,2.0448234,0.0472535,3.1718943,1.0634396,5.7031646,1.103537,1.4963284,1.9596689,1.3705348,1.9444867,1.7094721,1.1730187,1.1173782,1.0768019,2.520956,1.3528627,4.5925736,0.007588762,0.8484262,0.6275096,2.7608387,5.0758476,0.58671534,3.2255235,3.0454736,1.733035,0.9707861,1.0911319,0.326329,1.1117435,2.7132492,2.568462,0.15296715,0.6704634,1.2517681,0.48371023,0.16278934,1.6980617,1.745913,0.52908194,4.8849587,1.1777608,1.6761075,2.4006038,0.87214226,1.4465358,0.906434,0.6886395,2.0376048,0.020290632,1.3008869,1.3024576,2.155993,0.4584489,0.9253831,1.6336017,0.2229599,0.62131846,5.870721,2.785268,0.7794752,1.1661106,0.7074803,0.3407319,5.220402,2.08504,2.417098,0.9916717,1.9076953,1.2484546,0.5387912,2.7741878,1.5256987,3.4605796,5.4705353,3.8950965,2.7630486,0.7188807,1.253458,2.4088879,0.5115211,1.618436,1.5672191,1.3406109,1.1401877,1.1657795,1.6271309,0.36848414,1.9491478,2.7534347,1.596854,5.4310474,2.0242968,3.241643,3.251251,2.4184682,1.061899,1.1306692,1.8683975,1.9138647,0.62077534,0.005869871,3.8036191,2.2703784,1.6631231,0.10286021,6.3601527,1.4534345,2.11217,1.6889715,0.2347922,5.9396753,0.27994993,1.1644124,4.8152323,8.036865,0.45409513,3.532935,0.26353627,2.1998327,0.47719368,2.2760148,0.2008299,1.0802838,1.6741639,9.884589,0.21140212,7.8463697,1.7912495,0.8336122,0.66897905,4.8203382,1.4034338,6.182318,0.9290116,0.042694993,3.1729019,0.05834817,0.9500127,0.8811528,1.6564263,0.70969474,7.372909,0.18731722,1.0693262,0.42915303,0.11684756,0.63467234,0.5071322,4.0446596,1.086068,0.69499195,3.2423606,8.108876,1.1891484,3.0987935,1.6485219,0.11179957,0.10850499,0.73610526,3.1428783,0.2368296,7.0709133,0.5951422,0.17270136,2.747956,1.464048,2.7925825,0.6177126,2.5201914,2.5149488,4.374128,1.2957394,4.888344,1.8237067,0.1642726,0.13920766,0.6992472,0.59239006,0.59988666,0.0,1.0511605,2.4978602,1.3369558,4.8894362,0.113422334,3.9501476,2.3356068,4.203899,4.652974,1.8185134,1.085465,1.3543227,4.3086,0.23347898,9.77492,2.905971,0.6015967,5.705185,2.3556542,4.266894,5.134466,0.45402452,3.4495974,4.329967,0.48283353,1.7340055,0.18773556,2.9695694,2.8022137,1.6396581,3.8775566,1.0567441,0.034440737,4.3693085,3.596464,1.3946433,1.534651,2.5908422,3.671822,0.82779104,1.3255091,1.3221387,2.3894808,2.5349681,3.8877697,1.7130836,2.3333974,5.419678,2.416532,0.2528809,2.0226789,0.3256312,1.7676603,2.7799501,4.140674,2.044122,0.913922,9.626306,4.4931726,1.7620943,3.2074025,2.1241446,2.8715286,2.0327747,2.6395087,8.425077,0.29309487,2.8932698,1.3778216,0.6542087,True,0.0,0.13000804,0.0,0.0,0.002037752,0.0,0.0,0.0,0.0,0.06111543,0.0,6.702346,1.4267697,3.938085,0.7040212,0.0,0.0,1.0116627,0.0,4.539293,0.0,1.1017358,2.827432,0.0,0.0,0.0,1.3913443,0.33522812,0.0,3.5802555,0.0,0.0,3.273904,0.0,0.0,0.108908735,0.44315967,0.0,0.048082244,1.808021,2.3631117,0.93263125,0.25839823,0.0,0.0,1.5541325,4.5023203,2.4522083,0.5370079,0.0,3.1309,0.0,0.0,0.00025149493,0.0,0.0,1.598844,0.0,0.0,0.0,3.6755223,1.5220081,0.17540026,4.2726383,0.0,0.0,1.0369809,1.8130282,1.1478732,0.070069,0.0,0.0,0.37161943,0.0,2.6920598,1.8049885,0.011324564,0.1839397,0.0,0.0,0.14800237,3.2332604,0.0,0.0,0.0,5.175657,0.3173036,1.5077751,3.795833,0.0,0.0,0.0,0.002702519,0.60783273,7.8407364,0.6542296,0.011128779,0.0,0.0,3.0155509,0.0,0.1969728,0.2911376,1.9353493,0.0,0.0,0.31412342,0.06156551,0.0,0.0,2.501566,0.0,3.511222,0.54997545,0.0,6.5137396,4.60137,3.5791976,3.2295272,0.030881694,0.0,0.0,0.0,0.044022877,1.2871318,1.7206844,1.423317,0.00012220179,0.0,0.018707046,0.0,0.063907124,7.7610717,1.8263488,0.0,2.0830753,4.024913,3.679568,0.26151466,1.841901,0.09919314,0.19268763,0.28817415,0.12974192,0.14141828,4.436533,0.0,0.46092474,0.10102605,2.2220867,0.0,6.958406,0.1396151,2.0149689,5.698128,0.35020724,4.1179953,0.033450875,0.0048411437,0.0,0.876317,0.9527694,0.6799472,0.0,1.1751864,0.009888711,0.41615263,4.371997,0.0,0.0029504623,6.440926,9.185331,6.884625,5.5525703,0.0,0.3216756,0.16928916,0.34024408,0.0,0.1840653,0.00047650983,9.906516e-05,0.0,0.17830971,0.40046695,3.0138555,0.0,0.3414072,2.1366544,2.0524268,0.059395626,0.0,0.7721342,0.0024322374,0.030976947,0.0,1.9868054,0.045436963,0.0,0.0,0.009095855,0.695086,0.00048830017,6.8302617,1.7030824,0.011468042,0.0,2.9505427,0.16068885,0.0001173876,0.0,0.18750118,0.036273684,0.0,0.018228091,4.979061,2.84645,3.6533518,0.023549175,0.7373421,2.2333076,3.1135373,0.0,7.839826,0.09752249,4.4298525,1.2061259,0.0,0.09014438,1.6528475,0.0586081,0.0,4.721425,0.1638548,0.0016590456,0.16397965,0.085280664,0.0,0.5875555,0.020818662,0.019216344,4.4206805,0.69836605,0.0,0.021653343,0.042921916,0.02777873,0.0011217439,0.4927751,0.0,0.0,0.28798205,0.0,9.798935,0.27450448,0.045702275 4 | True,0.0,0.13000804,0.0,0.0,0.002037752,0.0,0.0,0.0,0.0,0.06111543,0.0,6.702346,1.4267697,3.938085,0.7040212,0.0,0.0,1.0116627,0.0,4.539293,0.0,1.1017358,2.827432,0.0,0.0,0.0,1.3913443,0.33522812,0.0,3.5802555,0.0,0.0,3.273904,0.0,0.0,0.108908735,0.44315967,0.0,0.048082244,1.808021,2.3631117,0.93263125,0.25839823,0.0,0.0,1.5541325,4.5023203,2.4522083,0.5370079,0.0,3.1309,0.0,0.0,0.00025149493,0.0,0.0,1.598844,0.0,0.0,0.0,3.6755223,1.5220081,0.17540026,4.2726383,0.0,0.0,1.0369809,1.8130282,1.1478732,0.070069,0.0,0.0,0.37161943,0.0,2.6920598,1.8049885,0.011324564,0.1839397,0.0,0.0,0.14800237,3.2332604,0.0,0.0,0.0,5.175657,0.3173036,1.5077751,3.795833,0.0,0.0,0.0,0.002702519,0.60783273,7.8407364,0.6542296,0.011128779,0.0,0.0,3.0155509,0.0,0.1969728,0.2911376,1.9353493,0.0,0.0,0.31412342,0.06156551,0.0,0.0,2.501566,0.0,3.511222,0.54997545,0.0,6.5137396,4.60137,3.5791976,3.2295272,0.030881694,0.0,0.0,0.0,0.044022877,1.2871318,1.7206844,1.423317,0.00012220179,0.0,0.018707046,0.0,0.063907124,7.7610717,1.8263488,0.0,2.0830753,4.024913,3.679568,0.26151466,1.841901,0.09919314,0.19268763,0.28817415,0.12974192,0.14141828,4.436533,0.0,0.46092474,0.10102605,2.2220867,0.0,6.958406,0.1396151,2.0149689,5.698128,0.35020724,4.1179953,0.033450875,0.0048411437,0.0,0.876317,0.9527694,0.6799472,0.0,1.1751864,0.009888711,0.41615263,4.371997,0.0,0.0029504623,6.440926,9.185331,6.884625,5.5525703,0.0,0.3216756,0.16928916,0.34024408,0.0,0.1840653,0.00047650983,9.906516e-05,0.0,0.17830971,0.40046695,3.0138555,0.0,0.3414072,2.1366544,2.0524268,0.059395626,0.0,0.7721342,0.0024322374,0.030976947,0.0,1.9868054,0.045436963,0.0,0.0,0.009095855,0.695086,0.00048830017,6.8302617,1.7030824,0.011468042,0.0,2.9505427,0.16068885,0.0001173876,0.0,0.18750118,0.036273684,0.0,0.018228091,4.979061,2.84645,3.6533518,0.023549175,0.7373421,2.2333076,3.1135373,0.0,7.839826,0.09752249,4.4298525,1.2061259,0.0,0.09014438,1.6528475,0.0586081,0.0,4.721425,0.1638548,0.0016590456,0.16397965,0.085280664,0.0,0.5875555,0.020818662,0.019216344,4.4206805,0.69836605,0.0,0.021653343,0.042921916,0.02777873,0.0011217439,0.4927751,0.0,0.0,0.28798205,0.0,9.798935,0.27450448,0.045702275,True,0.0,0.13000804,0.0,0.0,0.002037752,0.0,0.0,0.0,0.0,0.06111543,0.0,6.702346,1.4267697,3.938085,0.7040212,0.0,0.0,1.0116627,0.0,4.539293,0.0,1.1017358,2.827432,0.0,0.0,0.0,1.3913443,0.33522812,0.0,3.5802555,0.0,0.0,3.273904,0.0,0.0,0.108908735,0.44315967,0.0,0.048082244,1.808021,2.3631117,0.93263125,0.25839823,0.0,0.0,1.5541325,4.5023203,2.4522083,0.5370079,0.0,3.1309,0.0,0.0,0.00025149493,0.0,0.0,1.598844,0.0,0.0,0.0,3.6755223,1.5220081,0.17540026,4.2726383,0.0,0.0,1.0369809,1.8130282,1.1478732,0.070069,0.0,0.0,0.37161943,0.0,2.6920598,1.8049885,0.011324564,0.1839397,0.0,0.0,0.14800237,3.2332604,0.0,0.0,0.0,5.175657,0.3173036,1.5077751,3.795833,0.0,0.0,0.0,0.002702519,0.60783273,7.8407364,0.6542296,0.011128779,0.0,0.0,3.0155509,0.0,0.1969728,0.2911376,1.9353493,0.0,0.0,0.31412342,0.06156551,0.0,0.0,2.501566,0.0,3.511222,0.54997545,0.0,6.5137396,4.60137,3.5791976,3.2295272,0.030881694,0.0,0.0,0.0,0.044022877,1.2871318,1.7206844,1.423317,0.00012220179,0.0,0.018707046,0.0,0.063907124,7.7610717,1.8263488,0.0,2.0830753,4.024913,3.679568,0.26151466,1.841901,0.09919314,0.19268763,0.28817415,0.12974192,0.14141828,4.436533,0.0,0.46092474,0.10102605,2.2220867,0.0,6.958406,0.1396151,2.0149689,5.698128,0.35020724,4.1179953,0.033450875,0.0048411437,0.0,0.876317,0.9527694,0.6799472,0.0,1.1751864,0.009888711,0.41615263,4.371997,0.0,0.0029504623,6.440926,9.185331,6.884625,5.5525703,0.0,0.3216756,0.16928916,0.34024408,0.0,0.1840653,0.00047650983,9.906516e-05,0.0,0.17830971,0.40046695,3.0138555,0.0,0.3414072,2.1366544,2.0524268,0.059395626,0.0,0.7721342,0.0024322374,0.030976947,0.0,1.9868054,0.045436963,0.0,0.0,0.009095855,0.695086,0.00048830017,6.8302617,1.7030824,0.011468042,0.0,2.9505427,0.16068885,0.0001173876,0.0,0.18750118,0.036273684,0.0,0.018228091,4.979061,2.84645,3.6533518,0.023549175,0.7373421,2.2333076,3.1135373,0.0,7.839826,0.09752249,4.4298525,1.2061259,0.0,0.09014438,1.6528475,0.0586081,0.0,4.721425,0.1638548,0.0016590456,0.16397965,0.085280664,0.0,0.5875555,0.020818662,0.019216344,4.4206805,0.69836605,0.0,0.021653343,0.042921916,0.02777873,0.0011217439,0.4927751,0.0,0.0,0.28798205,0.0,9.798935,0.27450448,0.045702275 5 | -------------------------------------------------------------------------------- /tests/image_featurizer_testing/csv_checking/testing_data.csv: -------------------------------------------------------------------------------- 1 | images_1,images_2,derp 2 | arendt.bmp,borges.jpg,4 3 | sappho.png,,3 4 | ,, 5 | -------------------------------------------------------------------------------- /tests/test_build_featurizer.py: -------------------------------------------------------------------------------- 1 | """Test the build_featurizer code.""" 2 | import os 3 | import random 4 | import warnings 5 | import logging 6 | 7 | import keras.backend as K 8 | import numpy as np 9 | import pytest 10 | from keras.layers import Dense, Activation, Input 11 | from keras.layers.merge import add 12 | from keras.models import Sequential, Model 13 | 14 | from pic2vec.build_featurizer import (_decapitate_model, _find_pooling_constant, 15 | _splice_layer, _downsample_model_features, 16 | _initialize_model, _check_downsampling_mismatch, 17 | build_featurizer) 18 | 19 | from pic2vec.squeezenet import SqueezeNet 20 | from pic2vec.enums import MODELS, ATOL 21 | 22 | random.seed(5102020) 23 | 24 | # Create tensor for splicing 25 | SPLICING_TENSOR = K.constant(3, shape=(3, 12)) 26 | 27 | # Create featurization for finding the pooling constant 28 | POOLING_FEATURES = K.constant(2, shape=(3, 60)) 29 | 30 | # Path to checking prediction arrays for each model in _initialize_model 31 | INITIALIZED_MODEL_TEST_ARRAY = 'tests/build_featurizer_testing/{}_test_prediction.npy' 32 | 33 | 34 | 35 | @pytest.fixture(scope='module') 36 | def check_model(): 37 | # Building the checking model 38 | input_layer = Input(shape=(100, )) 39 | layer = Dense(40)(input_layer) 40 | layer = Activation('relu')(layer) 41 | layer = Dense(20)(layer) 42 | layer = Activation('relu')(layer) 43 | layer = Dense(10)(layer) 44 | layer = Activation('relu')(layer) 45 | layer = Dense(5)(layer) 46 | output_layer = Activation('softmax')(layer) 47 | 48 | check_model = Model(inputs=input_layer, outputs=output_layer) 49 | 50 | return check_model 51 | 52 | 53 | def test_decapitate_model_lazy_input(): 54 | """Test an error is raised when the model has a lazy input layer initialization""" 55 | # Raise warning when model has lazy input layer initialization 56 | error_model = Sequential([ 57 | Dense(40, input_shape=(100,)), 58 | Dense(20), 59 | Activation('softmax')]) 60 | 61 | with warnings.catch_warnings(record=True) as warning_check: 62 | _decapitate_model(error_model, 1) 63 | assert len(warning_check) == 1 64 | assert "depth issues" in str(warning_check[-1].message) 65 | 66 | 67 | def test_decapitate_model_too_deep(check_model): 68 | """Test error raised when model is decapitated too deep""" 69 | # Check for Value Error when passed a depth >= (# of layers in network) - 1 70 | with pytest.raises(ValueError): 71 | _decapitate_model(check_model, 8) 72 | 73 | 74 | def test_decapitate_model(check_model): 75 | """ 76 | This test creates a toy network, and checks that it calls the right errors 77 | and checks that it decapitates the network correctly: 78 | """ 79 | # Create test model 80 | test_model = _decapitate_model(check_model, 5) 81 | 82 | # Make checks for all of the necessary features: the model outputs, the 83 | # last layer, the last layer's connections, and the last layer's shape 84 | assert test_model.layers[-1] == test_model.layers[3] 85 | assert test_model.layers[3].outbound_nodes == [] 86 | assert test_model.outputs == [test_model.layers[3].output] 87 | assert test_model.layers[-1].output_shape == (None, 20) 88 | 89 | 90 | def test_splice_layer_bad_split(): 91 | """Check error with bad split on the tensor""" 92 | with pytest.raises(ValueError): 93 | _splice_layer(SPLICING_TENSOR, 5) 94 | 95 | 96 | def test_splice_layer(): 97 | """Test method splices tensors correctly""" 98 | # Create spliced and added layers via splicing function 99 | list_of_spliced_layers = _splice_layer(SPLICING_TENSOR, 3) 100 | # Add each of the layers together 101 | x = add(list_of_spliced_layers) 102 | # Create the spliced and added layers by hand 103 | check_layer = K.constant(9, shape=(3, 4)) 104 | # Check the math 105 | assert np.allclose(K.eval(check_layer), K.eval(x), atol=ATOL) 106 | 107 | 108 | def test_find_pooling_constant_upsample(): 109 | """Test error when trying to upsample""" 110 | with pytest.raises(ValueError): 111 | _find_pooling_constant(POOLING_FEATURES, 120) 112 | 113 | 114 | def test_find_pooling_constant_bad_divisor(): 115 | """Test error when trying to downsample to a non-divisor of the features""" 116 | with pytest.raises(ValueError): 117 | _find_pooling_constant(POOLING_FEATURES, 40) 118 | 119 | with pytest.raises(ValueError): 120 | _find_pooling_constant(POOLING_FEATURES, 0) 121 | 122 | 123 | def test_find_pooling_constant(): 124 | """Test that pooling constant given correct answer with good inputs""" 125 | assert _find_pooling_constant(POOLING_FEATURES, 6) == 10 126 | 127 | 128 | def test_downsample_model_features(): 129 | """ 130 | Test creates a toy numpy array, and checks that the method 131 | correctly downsamples the array into a hand-checked tensor 132 | """ 133 | # Create the spliced and averaged tensor via downsampling function 134 | array = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 135 | [11, 12, 13, 14, 15, 16, 17, 18, 19, 20], 136 | [21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 137 | ]) 138 | tensor = K.variable(array) 139 | 140 | x = _downsample_model_features(tensor, 5) 141 | 142 | # Create the spliced and averaged tensor by hand 143 | check_array = np.array([[1.5, 3.5, 5.5, 7.5, 9.5], 144 | [11.5, 13.5, 15.5, 17.5, 19.5], 145 | [21.5, 23.5, 25.5, 27.5, 29.5] 146 | ]) 147 | check_tensor = K.variable(check_array) 148 | # Check that they are equal: that it returns the correct tensor 149 | assert np.allclose(K.eval(check_tensor), K.eval(x), atol=ATOL) 150 | 151 | 152 | def test_check_downsampling_mismatch_bad_num_features(): 153 | """Raises error with autodownsampling an odd number of features""" 154 | with pytest.raises(ValueError): 155 | _check_downsampling_mismatch(True, 0, 2049) 156 | 157 | 158 | def test_check_downsampling_mismatch_autosample(): 159 | """Test method correctly autosamples""" 160 | # Testing automatic downsampling 161 | assert _check_downsampling_mismatch(True, 0, 2048) == (True, 1024) 162 | 163 | 164 | def test_check_downsampling_mismatch_no_sample(): 165 | """Test method correctly returns with no sampling""" 166 | # Testing no downsampling 167 | assert _check_downsampling_mismatch(False, 0, 2048) == (False, 0) 168 | 169 | 170 | def test_check_downsampling_mismatch_manual_sample(): 171 | """Test method correctly returns with manual sampling""" 172 | # Testing manual downsampling 173 | assert _check_downsampling_mismatch(False, 512, 2048) == (True, 512) 174 | 175 | 176 | def check_model_equal(model1, model2): 177 | """Check whether two models are equal""" 178 | # Testing models are the same from loaded weights and downloaded from keras 179 | assert len(model1.layers) == len(model2.layers) 180 | 181 | for layer in range(len(model1.layers)): 182 | for array in range(len(model1.layers[layer].get_weights())): 183 | assert np.allclose(model1.layers[layer].get_weights()[array], 184 | model2.layers[layer].get_weights()[array], atol=ATOL) 185 | 186 | 187 | def test_initialize_model_weights_not_found(): 188 | """Test error raised when the model can't find weights to load""" 189 | error_weight = 'htraenoytinutroppodnocesaevahtondideduti/losfosraeyderdnuhenootdenmednocsecar' 190 | try: 191 | assert not os.path.isfile(error_weight) 192 | except AssertionError: 193 | logging.error('Whoops, that mirage exists. ' 194 | 'Change error_weight to a file path that does not exist.') 195 | 196 | with pytest.raises(IOError): 197 | _initialize_model('squeezenet', error_weight) 198 | 199 | 200 | def test_initialize_model_bad_weights(): 201 | """ 202 | Test error raised when the model finds the weights file, 203 | but it's not the right format 204 | """ 205 | bad_weights_file = open('bad_weights_test', 'w') 206 | bad_weights_file.write('this should fail') 207 | bad_weights_file.close() 208 | error_weight = 'bad_weights_test' 209 | 210 | try: 211 | with pytest.raises(IOError): 212 | _initialize_model('squeezenet', error_weight) 213 | finally: 214 | os.remove(error_weight) 215 | 216 | 217 | def test_initialize_model_wrong_weights(): 218 | """Test error raised when weights exist but don't match model""" 219 | squeeze_weight_path = 'pic2vec/saved_models/squeezenet_weights_tf_dim_ordering_tf_kernels.h5' 220 | assert os.path.isfile(squeeze_weight_path) 221 | 222 | with pytest.raises(ValueError): 223 | _initialize_model('vgg16', squeeze_weight_path) 224 | 225 | 226 | INITIALIZE_MODEL_CASES = [ 227 | ('squeezenet', [67], (1, 227, 227, 3)), 228 | ('vgg16', [23], (1, 224, 224, 3)), 229 | ('vgg19', [26], (1, 224, 224, 3)), 230 | ('resnet50', [176, 177], (1, 224, 224, 3)), 231 | ('inceptionv3', [313], (1, 299, 299, 3)), 232 | ('xception', [134], (1, 299, 299, 3)), 233 | ] 234 | 235 | 236 | @pytest.mark.parametrize('model_str, expected_layers, test_size', 237 | INITIALIZE_MODEL_CASES, ids=MODELS) 238 | def test_initialize_model(model_str, expected_layers, test_size): 239 | """Test the initializations of each model""" 240 | model = _initialize_model(model_str) 241 | 242 | if model_str == 'squeezenet': 243 | try: 244 | model_downloaded_weights = SqueezeNet() 245 | except Exception: 246 | raise AssertionError('Problem loading SqueezeNet weights.') 247 | check_model_equal(model, model_downloaded_weights) 248 | 249 | # Versions of Keras 2.1.5 and later sometimes use different numbers of layers for these models, 250 | # without changing any behavior for predictions. 251 | # This checks that the model uses at least one of the expected numbers of layers. 252 | assert len(model.layers) in expected_layers 253 | 254 | # Create the test image to be predicted on 255 | blank_image = np.zeros(test_size) 256 | 257 | # Pre-checked prediction 258 | existing_test_array = np.load(INITIALIZED_MODEL_TEST_ARRAY.format(model_str)) 259 | 260 | generated_test_array = model.predict_on_batch(blank_image) 261 | 262 | # Check that each model predicts correctly to see if weights were correctly loaded 263 | assert np.allclose(generated_test_array, existing_test_array, atol=ATOL) 264 | del model 265 | 266 | 267 | FEATURIZER_MODEL_DICT = dict.fromkeys(MODELS) 268 | FEAT_CASES = [ # squeezenet 269 | (1, False, 128, 128, 'squeezenet'), (1, False, 0, 512, 'squeezenet'), 270 | (1, True, 0, 256, 'squeezenet'), (2, True, 0, 256, 'squeezenet'), 271 | (2, False, 128, 128, 'squeezenet'), (2, False, 0, 512, 'squeezenet'), 272 | (3, False, 96, 96, 'squeezenet'), (3, False, 0, 384, 'squeezenet'), 273 | (3, True, 0, 192, 'squeezenet'), (4, True, 0, 192, 'squeezenet'), 274 | (4, False, 96, 96, 'squeezenet'), (4, False, 0, 384, 'squeezenet'), 275 | 276 | # vgg16 277 | (1, False, 1024, 1024, 'vgg16'), (1, False, 0, 4096, 'vgg16'), 278 | (1, True, 0, 2048, 'vgg16'), (2, True, 0, 2048, 'vgg16'), 279 | (2, False, 1024, 1024, 'vgg16'), (2, False, 0, 4096, 'vgg16'), 280 | (3, False, 128, 128, 'vgg16'), (3, False, 0, 512, 'vgg16'), 281 | (3, True, 0, 256, 'vgg16'), (4, True, 0, 256, 'vgg16'), 282 | (4, False, 128, 128, 'vgg16'), (4, False, 0, 512, 'vgg16'), 283 | 284 | # vgg19 285 | (1, False, 1024, 1024, 'vgg19'), (1, False, 0, 4096, 'vgg19'), 286 | (1, True, 0, 2048, 'vgg19'), (2, True, 0, 2048, 'vgg19'), 287 | (2, False, 1024, 1024, 'vgg19'), (2, False, 0, 4096, 'vgg19'), 288 | (3, False, 128, 128, 'vgg19'), (3, False, 0, 512, 'vgg19'), 289 | (3, True, 0, 256, 'vgg19'), (4, True, 0, 256, 'vgg19'), 290 | (4, False, 128, 128, 'vgg19'), (4, False, 0, 512, 'vgg19'), 291 | 292 | # resnet50 293 | (1, False, 512, 512, 'resnet50'), (1, False, 0, 2048, 'resnet50'), 294 | (1, True, 0, 1024, 'resnet50'), (2, True, 0, 1024, 'resnet50'), 295 | (2, False, 512, 512, 'resnet50'), (2, False, 0, 2048, 'resnet50'), 296 | (3, False, 512, 512, 'resnet50'), (3, False, 0, 2048, 'resnet50'), 297 | (3, True, 0, 1024, 'resnet50'), (4, True, 0, 1024, 'resnet50'), 298 | (4, False, 512, 512, 'resnet50'), (4, False, 0, 2048, 'resnet50'), 299 | 300 | # inceptionv3 301 | (1, False, 512, 512, 'inceptionv3'), (1, False, 0, 2048, 'inceptionv3'), 302 | (1, True, 0, 1024, 'inceptionv3'), (2, True, 0, 1024, 'inceptionv3'), 303 | (2, False, 512, 512, 'inceptionv3'), (2, False, 0, 2048, 'inceptionv3'), 304 | (3, False, 512, 512, 'inceptionv3'), (3, False, 0, 2048, 'inceptionv3'), 305 | (3, True, 0, 1024, 'inceptionv3'), (4, True, 0, 640, 'inceptionv3'), 306 | (4, False, 320, 320, 'inceptionv3'), (4, False, 0, 1280, 'inceptionv3'), 307 | 308 | # xception 309 | (1, False, 512, 512, 'xception'), (1, False, 0, 2048, 'xception'), 310 | (1, True, 0, 1024, 'xception'), (2, True, 0, 512, 'xception'), 311 | (2, False, 256, 256, 'xception'), (2, False, 0, 1024, 'xception'), 312 | (3, False, 182, 182, 'xception'), (3, False, 0, 728, 'xception'), 313 | (3, True, 0, 364, 'xception'), (4, True, 0, 364, 'xception'), 314 | (4, False, 182, 182, 'xception'), (4, False, 0, 728, 'xception') 315 | ] 316 | 317 | 318 | @pytest.mark.parametrize('depth, autosample, sample_size, expected_size, model_str', FEAT_CASES) 319 | def test_build_featurizer(depth, autosample, sample_size, expected_size, model_str): 320 | """Test all of the model iterations""" 321 | if FEATURIZER_MODEL_DICT[model_str] is None: 322 | FEATURIZER_MODEL_DICT[model_str] = _initialize_model(model_str) 323 | 324 | model = build_featurizer(depth, autosample, sample_size, 325 | model_str=model_str, loaded_model=FEATURIZER_MODEL_DICT[model_str]) 326 | assert model.layers[-1].output_shape == (None, expected_size) 327 | del model 328 | 329 | 330 | if __name__ == '__main__': 331 | test_decapitate_model() 332 | test_splice_layer() 333 | test_find_pooling_constant() 334 | test_downsample_model_features() 335 | test_initialize_model() 336 | test_build_featurizer() 337 | -------------------------------------------------------------------------------- /tests/test_data_featurizing.py: -------------------------------------------------------------------------------- 1 | """Test data_featurizing module""" 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | from keras.layers import Conv2D, Dense, Flatten 6 | from keras.models import Sequential 7 | 8 | from pic2vec.enums import ATOL 9 | from pic2vec.data_featurizing import (featurize_data, 10 | create_features, 11 | _create_features_df_helper) 12 | 13 | np.random.seed(5102020) 14 | 15 | # The paths to the test csvs 16 | CHECK_CSV_IMAGES_PATH = 'tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_images' 17 | CHECK_CSV_FULL_PATH = 'tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full' 18 | CHECK_CSV_FEATURES_ONLY_PATH = ('tests/data_featurizing_testing/csv_testing/' 19 | 'featurize_data_check_csv_features_only') 20 | 21 | 22 | # The mock array being treated as the vectorized data 23 | check_data_temp = np.ones((4, 2, 2, 2)) 24 | check_data_temp[2] = np.zeros((2, 2, 2)) 25 | CHECK_DATA = check_data_temp 26 | 27 | # The mock array being treated as the "full featurized data" 28 | CHECK_ARRAY = np.array([[1., 2., 3.], 29 | [4., 5., 6.], 30 | [0., 0., 0.], 31 | [7., 8., 9.] 32 | ]) 33 | 34 | # Create model 35 | MODEL = Sequential([ 36 | Conv2D(5, (3, 3), input_shape=(5, 5, 3), activation='relu'), 37 | Flatten(), 38 | Dense(5) 39 | ]) 40 | 41 | 42 | def test_featurize_data_bad_array(): 43 | """Test errors with a badly formatted array""" 44 | error_array = np.ones((5, 5, 10)) 45 | 46 | with pytest.raises(ValueError): 47 | featurize_data(MODEL, error_array) 48 | 49 | 50 | def test_featurize_data(): 51 | """ 52 | Test that the featurize_data model correctly outputs the features of a toy 53 | network on a toy tensor 54 | """ 55 | # Create the checked array 56 | init_array = np.ones((5, 5, 5, 3)) 57 | 58 | for i in range(5): 59 | init_array[i] = init_array[i] * i 60 | 61 | # Check the prediction vs. the saved array 62 | check_array = np.load('tests/data_featurizing_testing/array_testing/check_featurize.npy') 63 | assert np.allclose(featurize_data(MODEL, init_array), check_array, atol=ATOL) 64 | 65 | 66 | def test_create_features_bad_feature_array(): 67 | """ 68 | Test that the model raises an error when a bad array 69 | is passed in (i.e. wrong shape) 70 | """ 71 | # An error array with the wrong size 72 | error_feature_array = np.zeros((4, 3, 2)) 73 | with pytest.raises(ValueError): 74 | create_features(CHECK_DATA, error_feature_array, 75 | 'image') 76 | 77 | 78 | def test_features_to_csv_bad_data_array(): 79 | """Raise error when a bad data array is passed (i.e. wrong shape)""" 80 | # An error array with the wrong size 81 | error_array = np.zeros((4, 3, 2)) 82 | with pytest.raises(ValueError): 83 | create_features(error_array, CHECK_ARRAY, 'image') 84 | 85 | 86 | def test_create_features_df_helper(): 87 | """Test that the correct full array is created to be passed to the create_features function""" 88 | full_df_test = _create_features_df_helper(CHECK_DATA, CHECK_ARRAY, 'image') 89 | assert full_df_test.equals(pd.read_csv(CHECK_CSV_FULL_PATH)) 90 | 91 | 92 | def test_features_to_csv(): 93 | """Test that the model creates the correct csvs from a toy array, csv, and image list""" 94 | # Create the test 95 | full_test_dataframe = create_features(CHECK_DATA, CHECK_ARRAY, 96 | 'image') 97 | 98 | # Assert that the dataframes returned are correct 99 | assert full_test_dataframe.equals(pd.read_csv(CHECK_CSV_FULL_PATH)) 100 | -------------------------------------------------------------------------------- /tests/test_feature_preprocessing.py: -------------------------------------------------------------------------------- 1 | """Test feature_preprocessing module""" 2 | import logging 3 | import os 4 | import random 5 | import pandas as pd 6 | import numpy as np 7 | import pytest 8 | 9 | from pic2vec.enums import ATOL 10 | 11 | from pic2vec.feature_preprocessing import (_create_df_with_image_paths, 12 | _find_directory_image_paths, 13 | _find_csv_image_paths, 14 | _find_combined_image_paths, 15 | _image_paths_finder, _convert_single_image, 16 | preprocess_data, 17 | natural_key) 18 | 19 | # Initialize seed to cut out any randomness (such as in image interpolation, etc) 20 | random.seed(5102020) 21 | 22 | # List of images used in testing 23 | IMAGE_LIST_SINGLE = ['arendt.bmp', 'borges.jpg', 'sappho.png'] 24 | 25 | # Shared paths 26 | IMAGE_PATH = 'tests/feature_preprocessing_testing/test_images/' 27 | CSV_PATH = 'tests/feature_preprocessing_testing/csv_testing/' 28 | IMAGE_ARRAY_PATH = 'tests/feature_preprocessing_testing/test_image_arrays/' 29 | URL_PATH = '{}url_test'.format(CSV_PATH) 30 | TEST_ARRAY = 'tests/feature_preprocessing_testing/test_preprocessing_arrays/{}.npy' 31 | 32 | # Column headers 33 | IMG_COL_HEAD = 'images' 34 | NEW_IMG_COL_HEAD = 'new_images' 35 | 36 | # Image lists for directory and url 37 | URL_LIST = ['https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/borges.jpg', 38 | 'https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/arendt.bmp', 39 | 'https://s3.amazonaws.com/datarobot_public_datasets/images/pic2vec/sappho.png' 40 | ] 41 | 42 | # Preprocessing paths 43 | DIRECTORY_CSV_PATH_PREPROCESS = '{}directory_preprocess_system_test'.format(CSV_PATH) 44 | ERROR_NEW_CSV_PATH_PREPROCESS = '{}generated_error_preprocess_system_test'.format(CSV_PATH) 45 | NEW_CSV_PATH_PREPROCESS = '{}generated_preprocess_system_test'.format(CSV_PATH) 46 | COMBINED_LIST_PREPROCESS = ['', 'arendt.bmp', 'sappho.png', 'arendt.bmp'] 47 | ERROR_ROW_CSV = '{}error_row'.format(CSV_PATH) 48 | 49 | # Loading image arrays 50 | arendt_array = np.load(TEST_ARRAY.format('arendt')) 51 | borges_array = np.load(TEST_ARRAY.format('borges')) 52 | sappho_array = np.load(TEST_ARRAY.format('sappho')) 53 | arendt_grayscale_array = np.load(TEST_ARRAY.format('arendt_grayscale')) 54 | sappho_grayscale_array = np.load(TEST_ARRAY.format('sappho_grayscale')) 55 | 56 | # Test arrays for build_featurizer 57 | DIRECTORY_ARRAYS = [arendt_array, borges_array, sappho_array] 58 | CSV_ARRAYS = [borges_array, arendt_array, sappho_array] 59 | COMBINED_ARRAYS = [np.zeros((borges_array.shape)), arendt_array, sappho_array, arendt_array] 60 | GRAYSCALE_ARRAYS = [np.zeros((arendt_grayscale_array.shape)), arendt_grayscale_array, 61 | sappho_grayscale_array, arendt_grayscale_array] 62 | BATCH_ARRAYS_DIR = [arendt_array, borges_array] 63 | 64 | # ---- TESTING ---- # 65 | 66 | 67 | def test_create_df_with_image_paths(): 68 | """Test method creates csv correctly from list of images""" 69 | df = _create_df_with_image_paths(IMAGE_LIST_SINGLE, IMG_COL_HEAD) 70 | 71 | assert pd.read_csv('{}create_csv_check'.format(CSV_PATH)).equals(df) 72 | 73 | 74 | def test_natural_sort(): 75 | """Test the natural sort function""" 76 | unsorted_alphanumeric = ['1.jpg', '10.jpg', '2.jpg', '15.jpg', '20.jpg', '5.jpg'] 77 | natural_sort = ['1.jpg', '2.jpg', '5.jpg', '10.jpg', '15.jpg', '20.jpg'] 78 | assert natural_sort == sorted(unsorted_alphanumeric, key=natural_key) 79 | 80 | 81 | def test_find_directory_image_paths(): 82 | """ 83 | Test method returns a sorted list of valid image files 84 | to be fed into the featurizer from a directory. 85 | """ 86 | test_image_paths = _find_directory_image_paths(IMAGE_PATH) 87 | 88 | assert test_image_paths == IMAGE_LIST_SINGLE 89 | 90 | 91 | def test_find_csv_image_paths(): 92 | """Test method correctly finds image paths in the csv, and in the right order""" 93 | check_image_paths = ['borges.jpg', 'arendt.bmp', 'sappho.png'] 94 | test_image_paths, df = _find_csv_image_paths('{}csv_image_path_check'.format(CSV_PATH), 95 | IMG_COL_HEAD) 96 | 97 | with pytest.raises(ValueError): 98 | _find_csv_image_paths('{}csv_image_path_check'.format(CSV_PATH), 'Error Column') 99 | 100 | assert test_image_paths == check_image_paths 101 | assert pd.read_csv('{}csv_image_path_check'.format(CSV_PATH)).equals(df) 102 | 103 | 104 | def test_find_combined_image_paths(): 105 | """Test that method only returns images that overlap between directory and csv""" 106 | check_image_paths = ['', 'arendt.bmp', 'sappho.png'] 107 | 108 | invalid_csv_image_path = 'heidegger.png' 109 | invalid_directory_image_path = 'borges.jpg' 110 | 111 | test_path, df = _find_combined_image_paths(IMAGE_PATH, 112 | '{}directory_combined_image_path_test' 113 | .format(CSV_PATH), IMG_COL_HEAD) 114 | 115 | with pytest.raises(ValueError): 116 | _find_combined_image_paths(IMAGE_PATH, 117 | '{}error_directory_combined_test'.format(CSV_PATH), 118 | IMG_COL_HEAD) 119 | 120 | assert invalid_csv_image_path not in test_path 121 | assert invalid_directory_image_path not in test_path 122 | 123 | assert check_image_paths == test_path 124 | assert pd.read_csv('{}directory_combined_image_path_test'.format(CSV_PATH)).equals(df) 125 | 126 | 127 | CONVERT_IMAGE_CASES = [ 128 | ('url', URL_LIST[0]), 129 | ('directory', '{}borges.jpg'.format(IMAGE_PATH)) 130 | ] 131 | 132 | 133 | @pytest.mark.parametrize('grayscale', [None, True], ids=['RGB', 'grayscale']) 134 | @pytest.mark.parametrize('size', [(299, 299), (299, 467)], ids=['scaled', 'isotropic']) 135 | @pytest.mark.parametrize('image_source,image_path', CONVERT_IMAGE_CASES, ids=['url', 'directory']) 136 | def test_convert_single_image(image_source, image_path, size, grayscale): 137 | """Test converting images from url and directory with options for size and grayscale""" 138 | iso = '' 139 | gscale = '' 140 | 141 | if size != (299, 299): 142 | iso = '_isotropic' 143 | if grayscale is not None: 144 | gscale = '_grayscale' 145 | 146 | check_array = np.load('{path}image_test{isotropic}{grayscale}.npy' 147 | .format(path=IMAGE_ARRAY_PATH, 148 | isotropic=iso, 149 | grayscale=gscale)) 150 | 151 | converted_image = _convert_single_image(image_source, 'xception', image_path, size, grayscale) 152 | 153 | assert np.allclose(check_array, converted_image, atol=ATOL) 154 | 155 | 156 | PATHS_FINDER_CASES = [ 157 | (IMAGE_PATH, '', NEW_IMG_COL_HEAD, IMAGE_LIST_SINGLE), 158 | 159 | ('', URL_PATH, IMG_COL_HEAD, URL_LIST), 160 | 161 | (IMAGE_PATH, '{}directory_combined_image_path_test'.format(CSV_PATH), 162 | IMG_COL_HEAD, ['', 'arendt.bmp', 'sappho.png']) 163 | ] 164 | 165 | 166 | @pytest.mark.parametrize('image_path, csv_path, image_column_header, check_images', 167 | PATHS_FINDER_CASES, ids=['directory_only', 'csv_only', 'combined']) 168 | def test_image_paths_finder(image_path, csv_path, image_column_header, check_images): 169 | """ 170 | Test the correct image paths returns for all three cases: directory only, 171 | csv only, and combined csv + directory 172 | """ 173 | # check the new csv doesn't already exist 174 | # generated image lists 175 | case, df = _image_paths_finder(image_path, csv_path, image_column_header) 176 | 177 | # Check the image lists match 178 | assert case == check_images 179 | 180 | 181 | def test_preprocess_data_no_input(): 182 | """Raise error if no csv or directory is passed""" 183 | with pytest.raises(ValueError): 184 | preprocess_data(IMG_COL_HEAD, 'xception', ['']) 185 | 186 | 187 | def test_preprocess_data_fake_dir(): 188 | """Raise an error if the image_path doesn't point to a real directory""" 189 | error_dir = 'egaugnalymgnidnatsrednufoerusuoyera/emdaerohwuoy/' 190 | try: 191 | assert not os.path.isdir(error_dir) 192 | except AssertionError: 193 | logging.error('Whoops, that labyrinth exists. ' 194 | 'Change error_dir to a directory path that does not exist.') 195 | with pytest.raises(TypeError): 196 | preprocess_data(IMG_COL_HEAD, 'xception', list_of_images=IMAGE_LIST_SINGLE, 197 | image_path=error_dir) 198 | 199 | assert not os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS) 200 | 201 | 202 | @pytest.mark.xfail 203 | def test_preprocess_data_fake_csv(): 204 | """Raise an error if the csv_path doesn't point to a file""" 205 | error_file = 'rehtonaybtmaerdecnaraeppaeremasawootehtahtdootsrednueh' 206 | try: 207 | assert not os.path.isfile(error_file) 208 | except AssertionError: 209 | logging.error( 210 | 'Whoops, that dreamer exists. change to error_file to a file path that does not exist.') 211 | with pytest.raises(TypeError): 212 | preprocess_data(IMG_COL_HEAD, 'xception', csv_path=error_file, 213 | list_of_images=IMAGE_LIST_SINGLE) 214 | 215 | assert not os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS) 216 | 217 | 218 | def test_preprocess_data_invalid_url_or_dir(): 219 | """Raise an error if the image in the column is an invalid path""" 220 | preprocess_data(IMG_COL_HEAD, 'xception', list_of_images=IMAGE_LIST_SINGLE, 221 | csv_path=ERROR_ROW_CSV) 222 | 223 | 224 | def test_preprocess_data_invalid_model_str(): 225 | """Raise an error if the model_str is not a valid model""" 226 | with pytest.raises(ValueError): 227 | preprocess_data(IMG_COL_HEAD, 'derp', [''], csv_path=DIRECTORY_CSV_PATH_PREPROCESS) 228 | 229 | 230 | def compare_preprocessing(case, csv_path, check_arrays, image_list): 231 | """Compare a case from a full preprocessing step with the expected values of that case""" 232 | # Check correct number of images vectorized 233 | assert len(case[0]) == len(check_arrays) 234 | 235 | for image in range(len(check_arrays)): 236 | # Check all data vectors correctly generated 237 | assert np.allclose(case[0][image], check_arrays[image], atol=ATOL) 238 | 239 | # csv path correctly returned as non-existent, and correct image list returned 240 | assert case[1] == image_list 241 | 242 | 243 | @pytest.mark.xfail 244 | def test_preprocess_data_grayscale(): 245 | # Ensure the new csv doesn't already exist 246 | if os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS): 247 | os.remove(ERROR_NEW_CSV_PATH_PREPROCESS) 248 | 249 | # Create the full (data, csv_path, image_list) for each of the three cases 250 | preprocessed_case = preprocess_data(IMG_COL_HEAD, 'xception', grayscale=True, 251 | image_path=IMAGE_PATH, 252 | csv_path=DIRECTORY_CSV_PATH_PREPROCESS) 253 | 254 | # Ensure a new csv wasn't created when they weren't needed 255 | assert not os.path.isfile(ERROR_NEW_CSV_PATH_PREPROCESS) 256 | 257 | compare_preprocessing(preprocessed_case, DIRECTORY_CSV_PATH_PREPROCESS, 258 | GRAYSCALE_ARRAYS, COMBINED_LIST_PREPROCESS) 259 | 260 | 261 | PREPROCESS_DATA_CASES = [ 262 | # Tests an image directory-only preprocessing step 263 | (IMAGE_PATH, '', 264 | DIRECTORY_ARRAYS, IMAGE_LIST_SINGLE), 265 | 266 | # Tests a CSV-only URL-based preprocessing step 267 | ('', URL_PATH, 268 | CSV_ARRAYS, URL_LIST), 269 | 270 | # Tests a combined directory+csv preprocessing step 271 | (IMAGE_PATH, DIRECTORY_CSV_PATH_PREPROCESS, 272 | COMBINED_ARRAYS, COMBINED_LIST_PREPROCESS), 273 | ] 274 | 275 | 276 | @pytest.mark.parametrize('image_path, csv_path, check_arrays, image_list', 277 | PREPROCESS_DATA_CASES, ids=['dir_only', 'csv_only', 'combined']) 278 | def test_preprocess_data(image_path, csv_path, check_arrays, image_list): 279 | """ 280 | Full integration test: check for Type and Value errors for badly passed variables, 281 | and make sure that the network preprocesses data correctly for all three cases. 282 | """ 283 | 284 | # Create the full (data, csv_path, image_list) for each of the three cases 285 | preprocessed_case = preprocess_data(IMG_COL_HEAD, 'xception', list_of_images=image_list, 286 | grayscale=False, 287 | image_path=image_path, csv_path=csv_path) 288 | 289 | compare_preprocessing(preprocessed_case, csv_path, check_arrays, image_list) 290 | -------------------------------------------------------------------------------- /tests/test_image_featurizer.py: -------------------------------------------------------------------------------- 1 | """Test the full featurizer class""" 2 | import os 3 | import pytest 4 | import shutil 5 | from mock import patch 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from pic2vec.image_featurizer import ImageFeaturizer, _create_csv_path, _named_path_finder 11 | from pic2vec.enums import MODELS, ATOL 12 | 13 | # Constant paths 14 | CSV_NAME = 'tests/image_featurizer_testing/csv_checking/testing_data.csv' 15 | 16 | # List of images for datasets with single and multiple image columns 17 | IMAGE_LIST_SINGLE = ['arendt.bmp', 'borges.jpg', 'sappho.png'] 18 | IMAGE_LIST_MULT = [['arendt.bmp', 'sappho.png', ''], ['borges.jpg', '', '']] 19 | 20 | # CSVs and arrays for testing predictions on datasets with a single image column 21 | CHECK_CSV_SINGLE = 'tests/image_featurizer_testing/csv_checking/{}_check_csv.csv' 22 | CHECK_ARRAY_SINGLE = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}.npy' 23 | 24 | # CSVs and arrays for testing predictions on datasets with multiple image columns 25 | CHECK_CSV_MULT = 'tests/image_featurizer_testing/csv_checking/{}_check_csv_mult.csv' 26 | CHECK_ARRAY_MULT = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}_mult.npy' 27 | 28 | # Arguments to load the data into the featurizers 29 | LOAD_DATA_ARGS_SINGLE = { 30 | 'image_columns': 'images', 31 | 'image_path': 'tests/feature_preprocessing_testing/test_images' 32 | } 33 | 34 | # Static expected attributes to compare with the featurizer attributes 35 | COMPARE_ARGS = { 36 | 'downsample_size': 0, 37 | 'image_columns': ['images'], 38 | 'automatic_downsample': False, 39 | 'csv_path': '', 40 | 'image_dict': {'images': IMAGE_LIST_SINGLE}, 41 | 'depth': 1 42 | } 43 | 44 | LOAD_DATA_ARGS_MULT_ERROR = { 45 | 'image_columns': ['images_1', 'images_2'], 46 | 'image_path': 'tests/feature_preprocessing_testing/test_images', 47 | } 48 | 49 | LOAD_DATA_ARGS_MULT = { 50 | 'image_columns': ['images_1', 'images_2'], 51 | 'image_path': 'tests/feature_preprocessing_testing/test_images', 52 | 'csv_path': CSV_NAME 53 | } 54 | 55 | COMPARE_ARGS_MULT = { 56 | 'downsample_size': 0, 57 | 'image_columns': ['images_1', 'images_2'], 58 | 'automatic_downsample': True, 59 | 'csv_path': CSV_NAME, 60 | 'image_dict': {'images_1': IMAGE_LIST_MULT[0], 'images_2': IMAGE_LIST_MULT[1]}, 61 | 'depth': 1, 62 | } 63 | # Variable attributes to load the featurizer with 64 | LOAD_PARAMS = [ 65 | ('squeezenet', (227, 227), CHECK_ARRAY_SINGLE.format('squeezenet')), 66 | ('vgg16', (224, 224), CHECK_ARRAY_SINGLE.format('vgg16')), 67 | ('vgg19', (224, 224), CHECK_ARRAY_SINGLE.format('vgg19')), 68 | ('resnet50', (224, 224), CHECK_ARRAY_SINGLE.format('resnet50')), 69 | ('inceptionv3', (299, 299), CHECK_ARRAY_SINGLE.format('inceptionv3')), 70 | ('xception', (299, 299), CHECK_ARRAY_SINGLE.format('xception')) 71 | ] 72 | 73 | LOAD_PARAMS_MULT = [ 74 | ('squeezenet', (227, 227), CHECK_ARRAY_MULT.format('squeezenet')), 75 | ('vgg16', (224, 224), CHECK_ARRAY_MULT.format('vgg16')), 76 | ('vgg19', (224, 224), CHECK_ARRAY_MULT.format('vgg19')), 77 | ('resnet50', (224, 224), CHECK_ARRAY_MULT.format('resnet50')), 78 | ('inceptionv3', (299, 299), CHECK_ARRAY_MULT.format('inceptionv3')), 79 | ('xception', (299, 299), CHECK_ARRAY_MULT.format('xception')) 80 | ] 81 | 82 | 83 | # Remove path to the generated csv if it currently exists 84 | if os.path.isdir('tests/image_featurizer_testing/csv_tests'): 85 | shutil.rmtree('tests/image_featurizer_testing/csv_tests') 86 | 87 | 88 | def compare_featurizer_class(featurizer, 89 | scaled_size, 90 | featurized_data, 91 | downsample_size, 92 | image_columns, 93 | automatic_downsample, 94 | csv_path, 95 | image_dict, 96 | depth, 97 | featurized=False, 98 | check_csv='', 99 | saved_data=True): 100 | """Check the necessary assertions for a featurizer image.""" 101 | print(featurizer.features) 102 | assert featurizer.scaled_size == scaled_size 103 | assert np.allclose(featurizer.features.astype(float).values, featurized_data, atol=ATOL) 104 | assert featurizer.downsample_size == downsample_size 105 | assert featurizer.image_columns == image_columns 106 | assert featurizer.autosample == automatic_downsample 107 | assert featurizer.csv_path == csv_path 108 | assert featurizer.image_dict == image_dict 109 | assert featurizer.depth == depth 110 | if featurized: 111 | assert np.array_equal(pd.read_csv(check_csv).columns, featurizer.features.columns) 112 | assert np.allclose(featurizer.features.astype(float), pd.read_csv(check_csv).astype(float), 113 | atol=ATOL) 114 | 115 | 116 | def compare_empty_input(featurizer): 117 | assert np.array_equal(featurizer.data, np.zeros((1))) 118 | assert featurizer.features.equals(pd.DataFrame()) 119 | assert featurizer.full_dataframe.equals(pd.DataFrame()) 120 | assert featurizer.csv_path == '' 121 | assert featurizer.image_list == '' 122 | assert featurizer.image_columns == '' 123 | assert featurizer.image_path == '' 124 | 125 | 126 | @pytest.fixture() 127 | def featurizer(): 128 | featurizer = ImageFeaturizer() 129 | return featurizer 130 | 131 | 132 | @pytest.fixture() 133 | def featurizer_with_data(): 134 | featurizer_with_data = ImageFeaturizer() 135 | featurizer_with_data.featurize(save_features=True, **LOAD_DATA_ARGS_SINGLE) 136 | return featurizer_with_data 137 | 138 | 139 | @pytest.fixture() 140 | def featurizer_autosample(): 141 | featurizer = ImageFeaturizer(autosample=True) 142 | return featurizer 143 | 144 | 145 | def test_featurize_first(featurizer): 146 | """Test that the featurizer raises an error if featurize is called before loading data""" 147 | # Raise error if attempting to featurize before loading data 148 | with pytest.raises(IOError): 149 | featurizer.featurize_preloaded_data() 150 | 151 | 152 | def test_featurize_without_image_or_csv(featurizer): 153 | with pytest.raises(ValueError, match='Must specify either image_path or csv_path as input'): 154 | return featurizer.featurize(image_columns=['images_1', 'images_2']) 155 | 156 | 157 | def test_featurizer_build(featurizer): 158 | """Test that the featurizer saves empty attributes correctly after initializing""" 159 | compare_featurizer_class(featurizer, (0, 0), np.zeros((1)), 0, '', False, '', {}, 1) 160 | 161 | 162 | def test_load_data_single_column(featurizer): 163 | """Test that the featurizer saves attributes correctly after loading data""" 164 | featurizer.load_data(**LOAD_DATA_ARGS_SINGLE) 165 | compare_featurizer_class(featurizer, (227, 227), np.zeros((1)), **COMPARE_ARGS) 166 | 167 | 168 | def test_load_data_multiple_columns_no_csv(featurizer): 169 | """Test featurizer raises error if multiple columns passed with only a directory""" 170 | with pytest.raises(ValueError): 171 | featurizer.load_data(**LOAD_DATA_ARGS_MULT_ERROR) 172 | 173 | 174 | def test_create_csv_path(): 175 | test_csv = 'test.csv' 176 | test_dir = 'tests/image_featurizer_testing/create_csv_test/' 177 | try: 178 | assert not os.path.isdir(test_dir) 179 | _create_csv_path('{}{}'.format(test_dir, test_csv)) 180 | assert os.path.isdir(test_dir) 181 | finally: 182 | shutil.rmtree(test_dir) 183 | assert not os.path.isdir(test_dir) 184 | 185 | 186 | def test_named_path_finder(): 187 | """Check that named_path_finder produces the correct output""" 188 | check_named_path = 'csv_name_modelstring_depth-n_output-x_(now)' 189 | with patch('time.strftime', return_value='now'): 190 | test_named_path = _named_path_finder('csv_name', 'modelstring', 'n', 'x', 191 | omit_model=False, omit_depth=False, omit_output=False, 192 | omit_time=False) 193 | assert check_named_path == test_named_path 194 | 195 | 196 | def test_named_path_finder_time_only_omitted(): 197 | """Check that named_path_finder produces the correct output (without time)""" 198 | check_named_path = 'csv_name_modelstring_depth-n_output-x' 199 | test_named_path = _named_path_finder('csv_name', 'modelstring', 'n', 'x', 200 | omit_model=False, omit_depth=False, omit_output=False, 201 | omit_time=True) 202 | assert check_named_path == test_named_path 203 | 204 | 205 | def test_named_path_finder_all_omitted(): 206 | """Check that named_path_finder produces the correct output (without time)""" 207 | check_named_path = 'csv_name' 208 | test_named_path = _named_path_finder('csv_name', 'modelstring', 'n', 'x', 209 | omit_model=True, omit_depth=True, omit_output=True, 210 | omit_time=True) 211 | assert check_named_path == test_named_path 212 | 213 | 214 | def test_save_csv(featurizer_with_data): 215 | with patch('pandas.DataFrame.to_csv') as mock: 216 | with patch('pic2vec.image_featurizer._create_csv_path'): 217 | featurizer_with_data.save_csv() 218 | assert 'images_featurized_squeezenet_depth-1_output-512_(' in mock.call_args[0][0] 219 | 220 | 221 | def test_save_csv_with_named_path(featurizer_with_data): 222 | with patch('pandas.DataFrame.to_csv') as mock: 223 | with patch('pic2vec.image_featurizer._create_csv_path'): 224 | featurizer_with_data.save_csv(new_csv_path='foo/bar.csv') 225 | assert mock.call_args[0][0] == 'foo/bar.csv' 226 | 227 | 228 | def test_save_csv_no_df(featurizer): 229 | with pytest.raises(AttributeError, match='No dataframe has been featurized.'): 230 | featurizer.save_csv() 231 | 232 | 233 | def test_load_data_multiple_columns(featurizer_autosample): 234 | """Test featurizer loads data correctly with multiple image columns""" 235 | featurizer_autosample.load_data(**LOAD_DATA_ARGS_MULT) 236 | compare_featurizer_class(featurizer_autosample, (227, 227), np.zeros((1)), **COMPARE_ARGS_MULT) 237 | 238 | 239 | def test_featurize_save_csv_and_features(featurizer): 240 | """Make sure the featurizer writes the name correctly to csv with robust naming config""" 241 | 242 | name, ext = os.path.splitext(CSV_NAME) 243 | check_array_path = '{}_featurized_squeezenet_depth-1_output-512'.format(name) 244 | featurizer.featurize(save_csv=True, save_features=True, omit_time=True, 245 | **LOAD_DATA_ARGS_MULT) 246 | 247 | full_check = '{}{}'.format(check_array_path, ext) 248 | feature_check = '{}{}{}'.format(check_array_path, '_features_only', ext) 249 | 250 | try: 251 | assert os.path.isfile(full_check) 252 | assert os.path.isfile(feature_check) 253 | 254 | finally: 255 | if os.path.isfile(feature_check): 256 | os.remove(feature_check) 257 | if os.path.isfile(full_check): 258 | os.remove(full_check) 259 | 260 | 261 | def test_load_then_featurize_save_csv(featurizer): 262 | """Make sure the featurizer writes the name correctly to csv with robust naming config""" 263 | 264 | name, ext = os.path.splitext(CSV_NAME) 265 | check_array_path = '{}_featurized_squeezenet_depth-1_output-512'.format(name) 266 | featurizer.load_data(**LOAD_DATA_ARGS_MULT) 267 | featurizer.featurize_preloaded_data(save_csv=True, save_features=True, omit_time=True, 268 | batch_processing=False) 269 | 270 | full_check = '{}{}'.format(check_array_path, ext) 271 | feature_check = '{}{}{}'.format(check_array_path, '_features_only', ext) 272 | 273 | try: 274 | assert os.path.isfile(full_check) 275 | assert os.path.isfile(feature_check) 276 | 277 | finally: 278 | if os.path.isfile(feature_check): 279 | os.remove(feature_check) 280 | if os.path.isfile(full_check): 281 | os.remove(full_check) 282 | 283 | 284 | def test_clear_input(featurizer): 285 | featurizer.featurize(save_features=True, omit_time=True, omit_model=True, 286 | omit_depth=True, omit_output=True, **LOAD_DATA_ARGS_SINGLE) 287 | 288 | featurizer.clear_input(confirm=True) 289 | compare_empty_input(featurizer) 290 | 291 | 292 | def test_clear_input_no_confirm(featurizer): 293 | with pytest.raises(ValueError): 294 | featurizer.clear_input() 295 | 296 | 297 | def test_load_then_featurize_data_single_column(featurizer): 298 | """Test featurizations and attributes for each model are correct with a single image column""" 299 | featurizer.load_data(**LOAD_DATA_ARGS_SINGLE) 300 | featurizer.featurize_preloaded_data(save_features=True) 301 | check_array = np.load(CHECK_ARRAY_SINGLE.format('squeezenet')) 302 | compare_featurizer_class(featurizer, (227, 227), check_array, featurized=True, 303 | check_csv=CHECK_CSV_SINGLE.format('squeezenet'), **COMPARE_ARGS) 304 | 305 | 306 | def test_load_then_featurize_data_multiple_columns(featurizer_autosample): 307 | """ 308 | Test that the base featurizer works properly if you first load the data, 309 | and then featurize it later 310 | """ 311 | featurizer_autosample.load_data(**LOAD_DATA_ARGS_MULT) 312 | featurizer_autosample.featurize_preloaded_data(save_features=True) 313 | check_array = np.load(CHECK_ARRAY_MULT.format('squeezenet')) 314 | 315 | compare_featurizer_class(featurizer_autosample, (227, 227), check_array, featurized=True, 316 | check_csv=CHECK_CSV_MULT.format('squeezenet'), **COMPARE_ARGS_MULT) 317 | 318 | 319 | def test_featurize_data_no_save_features(featurizer): 320 | """ 321 | Test that the base featurizer works properly if you first load the data, 322 | and then featurize it later, without saving the features in the intermediate step. 323 | """ 324 | featurizer.load_data(**LOAD_DATA_ARGS_SINGLE) 325 | featurizer.featurize_preloaded_data() 326 | check_array = np.array([]) 327 | 328 | compare_featurizer_class(featurizer, (227, 227), check_array, 329 | check_csv=CHECK_CSV_SINGLE.format('squeezenet'), **COMPARE_ARGS) 330 | 331 | 332 | def test_featurize_data_single_column_batch_overflow(featurizer): 333 | """ 334 | Test that the base featurizer works properly if the batch size overflows the remaining rows 335 | with a single image column 336 | """ 337 | featurizer.featurize(save_features=True, **LOAD_DATA_ARGS_SINGLE) 338 | check_array = np.load(CHECK_ARRAY_SINGLE.format('squeezenet')) 339 | 340 | compare_featurizer_class(featurizer, (227, 227), check_array, featurized=True, 341 | check_csv=CHECK_CSV_SINGLE.format('squeezenet'), **COMPARE_ARGS) 342 | 343 | 344 | def test_featurize_data_multiple_columns_batch_overflow(featurizer_autosample): 345 | """ 346 | Test that the base featurizer works properly if the batch size overflows the remaining rows 347 | with multiple image columns 348 | """ 349 | featurizer_autosample.featurize(save_features=True, **LOAD_DATA_ARGS_MULT) 350 | check_array = np.load(CHECK_ARRAY_MULT.format('squeezenet')) 351 | 352 | compare_featurizer_class(featurizer_autosample, (227, 227), check_array, featurized=True, 353 | check_csv=CHECK_CSV_MULT.format('squeezenet'), **COMPARE_ARGS_MULT) 354 | 355 | 356 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS, ids=MODELS) 357 | def test_featurize_single_column_no_batch_processing(model, size, array_path): 358 | """Test that all of the featurizations and attributes for each model are correct""" 359 | feat = ImageFeaturizer(model=model) 360 | feat.featurize(batch_size=0, save_features=True, **LOAD_DATA_ARGS_SINGLE) 361 | 362 | check_array = np.load(array_path) 363 | 364 | compare_featurizer_class(feat, size, check_array, featurized=True, 365 | check_csv=CHECK_CSV_SINGLE.format(model), **COMPARE_ARGS) 366 | 367 | 368 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS_MULT, ids=MODELS) 369 | def test_featurize_data_multiple_columns_no_batch_processing(model, size, array_path): 370 | """Test featurizations and attributes for each model are correct with multiple image columns""" 371 | feat = ImageFeaturizer(model=model, autosample=True) 372 | feat.featurize(batch_processing=False, save_features=True, **LOAD_DATA_ARGS_MULT) 373 | check_array = np.load(array_path) 374 | 375 | compare_featurizer_class(feat, size, check_array, featurized=True, 376 | check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT) 377 | 378 | 379 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS, ids=MODELS) 380 | def test_featurize_single_column_with_batch_processing(model, size, array_path): 381 | """Test that all of the featurizations and attributes for each model are correct""" 382 | feat = ImageFeaturizer(model=model) 383 | feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_SINGLE) 384 | 385 | check_array = np.load(array_path) 386 | 387 | compare_featurizer_class(feat, size, check_array, featurized=True, 388 | check_csv=CHECK_CSV_SINGLE.format(model), **COMPARE_ARGS) 389 | 390 | 391 | @pytest.mark.parametrize('model,size,array_path', LOAD_PARAMS_MULT, ids=MODELS) 392 | def test_featurize_data_multiple_columns_with_batch_processing(model, size, array_path): 393 | """Test featurizations and attributes for each model are correct with multiple image columns""" 394 | feat = ImageFeaturizer(model=model, autosample=True) 395 | feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_MULT) 396 | check_array = np.load(array_path) 397 | 398 | compare_featurizer_class(feat, size, check_array, featurized=True, 399 | check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT) 400 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py34, py35, py36, py37, flake8 3 | 4 | [travis] 5 | python = 6 | 3.7: py37 7 | 3.6: py36 8 | 3.5: py35 9 | 3.4: py34 10 | 2.7: py27 11 | 12 | [testenv:flake8] 13 | basepython=python 14 | deps=flake8 15 | commands=flake8 pic2vec 16 | 17 | [testenv] 18 | setenv = 19 | PYTHONPATH = {toxinidir} 20 | deps = 21 | -r{toxinidir}/requirements_dev.txt 22 | commands = 23 | pip install -U pip 24 | py.test --basetemp={envtmpdir} 25 | 26 | 27 | ; If you want to make tox run the tests with the same versions, create a 28 | ; requirements.txt with the pinned versions and uncomment the following lines: 29 | ; deps = 30 | ; -r{toxinidir}/requirements.txt 31 | -------------------------------------------------------------------------------- /travis_pypi_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Update encrypted deploy password in Travis config file.""" 4 | 5 | 6 | from __future__ import print_function 7 | import base64 8 | import json 9 | import os 10 | from getpass import getpass 11 | import yaml 12 | from cryptography.hazmat.primitives.serialization import load_pem_public_key 13 | from cryptography.hazmat.backends import default_backend 14 | from cryptography.hazmat.primitives.asymmetric.padding import PKCS1v15 15 | 16 | 17 | try: 18 | from urllib import urlopen 19 | except ImportError: 20 | from urllib.request import urlopen 21 | 22 | 23 | GITHUB_REPO = 'datarobot/imagefeaturizer' 24 | TRAVIS_CONFIG_FILE = os.path.join( 25 | os.path.dirname(os.path.abspath(__file__)), '.travis.yml') 26 | 27 | 28 | def load_key(pubkey): 29 | """Load public RSA key. 30 | 31 | Work around keys with incorrect header/footer format. 32 | 33 | Read more about RSA encryption with cryptography: 34 | https://cryptography.io/latest/hazmat/primitives/asymmetric/rsa/ 35 | """ 36 | try: 37 | return load_pem_public_key(pubkey.encode(), default_backend()) 38 | except ValueError: 39 | # workaround for https://github.com/travis-ci/travis-api/issues/196 40 | pubkey = pubkey.replace('BEGIN RSA', 'BEGIN').replace('END RSA', 'END') 41 | return load_pem_public_key(pubkey.encode(), default_backend()) 42 | 43 | 44 | def encrypt(pubkey, password): 45 | """Encrypt password using given RSA public key and encode it with base64. 46 | 47 | The encrypted password can only be decrypted by someone with the 48 | private key (in this case, only Travis). 49 | """ 50 | key = load_key(pubkey) 51 | encrypted_password = key.encrypt(password, PKCS1v15()) 52 | return base64.b64encode(encrypted_password) 53 | 54 | 55 | def fetch_public_key(repo): 56 | """Download RSA public key Travis will use for this repo. 57 | 58 | Travis API docs: http://docs.travis-ci.com/api/#repository-keys 59 | """ 60 | keyurl = 'https://api.travis-ci.org/repos/{0}/key'.format(repo) 61 | data = json.loads(urlopen(keyurl).read().decode()) 62 | if 'key' not in data: 63 | errmsg = "Could not find public key for repo: {}.\n".format(repo) 64 | errmsg += "Have you already added your GitHub repo to Travis?" 65 | raise ValueError(errmsg) 66 | return data['key'] 67 | 68 | 69 | def prepend_line(filepath, line): 70 | """Rewrite a file adding a line to its beginning.""" 71 | with open(filepath) as f: 72 | lines = f.readlines() 73 | 74 | lines.insert(0, line) 75 | 76 | with open(filepath, 'w') as f: 77 | f.writelines(lines) 78 | 79 | 80 | def load_yaml_config(filepath): 81 | """Load yaml config file at the given path.""" 82 | with open(filepath) as f: 83 | return yaml.load(f) 84 | 85 | 86 | def save_yaml_config(filepath, config): 87 | """Save yaml config file at the given path.""" 88 | with open(filepath, 'w') as f: 89 | yaml.dump(config, f, default_flow_style=False) 90 | 91 | 92 | def update_travis_deploy_password(encrypted_password): 93 | """Put `encrypted_password` into the deploy section of .travis.yml.""" 94 | config = load_yaml_config(TRAVIS_CONFIG_FILE) 95 | 96 | config['deploy']['password'] = dict(secure=encrypted_password) 97 | 98 | save_yaml_config(TRAVIS_CONFIG_FILE, config) 99 | 100 | line = ('# This file was autogenerated and will overwrite' 101 | ' each time you run travis_pypi_setup.py\n') 102 | prepend_line(TRAVIS_CONFIG_FILE, line) 103 | 104 | 105 | def main(args): 106 | """Add a PyPI password to .travis.yml so that Travis can deploy to PyPI. 107 | 108 | Fetch the Travis public key for the repo, and encrypt the PyPI password 109 | with it before adding, so that only Travis can decrypt and use the PyPI 110 | password. 111 | """ 112 | public_key = fetch_public_key(args.repo) 113 | password = args.password or getpass('PyPI password: ') 114 | update_travis_deploy_password(encrypt(public_key, password.encode())) 115 | print("Wrote encrypted password to .travis.yml -- you're ready to deploy") 116 | 117 | 118 | if '__main__' == __name__: 119 | import argparse 120 | parser = argparse.ArgumentParser(description=__doc__) 121 | parser.add_argument('--repo', default=GITHUB_REPO, 122 | help='GitHub repo (default: %s)' % GITHUB_REPO) 123 | parser.add_argument('--password', 124 | help='PyPI password (will prompt if not provided)') 125 | 126 | args = parser.parse_args() 127 | main(args) 128 | -------------------------------------------------------------------------------- /utils/create_test_files.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a script that is used to update test files with current versions of the scientific 3 | libraries. Whenever scientific libraries are upgraded, this can be run to check whether predictions 4 | have changed for any of the models, and update them if need be. 5 | """ 6 | import numpy as np 7 | import pandas as pd 8 | import logging 9 | 10 | from tests.test_build_featurizer import INITIALIZE_MODEL_CASES, INITIALIZED_MODEL_TEST_ARRAY 11 | from pic2vec.build_featurizer import _initialize_model 12 | from pic2vec.enums import MODELS 13 | from pic2vec import ImageFeaturizer 14 | 15 | TEST_DATA_NAME = 'tests/image_featurizer_testing/csv_checking/testing_data.csv' 16 | 17 | LOAD_DATA_ARGS_SINGLE = { 18 | 'image_columns': 'images', 19 | 'image_path': 'tests/feature_preprocessing_testing/test_images', 20 | 'save_features': True 21 | } 22 | 23 | LOAD_DATA_ARGS_MULT = { 24 | 'image_columns': ['images_1', 'images_2'], 25 | 'image_path': 'tests/feature_preprocessing_testing/test_images', 26 | 'csv_path': TEST_DATA_NAME, 27 | 'save_features': True 28 | } 29 | 30 | # Arrays used to test model predictions on single and multiple image columns 31 | CHECK_ARRAY_SINGLE = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}.npy' 32 | CHECK_ARRAY_MULT = 'tests/image_featurizer_testing/array_tests/check_prediction_array_{}_mult.npy' 33 | 34 | # CSVs used to test model predictions on single and multiple image columns 35 | CHECK_CSV_SINGLE = 'tests/image_featurizer_testing/csv_checking/{}_check_csv.csv' 36 | CHECK_CSV_MULT = 'tests/image_featurizer_testing/csv_checking/{}_check_csv_mult.csv' 37 | 38 | # This creates a dictionary mapping from each model to the required image size for the test file 39 | MODEL_TO_IMAGE_SIZE_DICT = {model_map[0]: model_map[2] for model_map in INITIALIZE_MODEL_CASES} 40 | 41 | 42 | def update_test_files(model, multiple_image_columns=False): 43 | """ 44 | This function takes a model string as the main argument, initializes the appropriate 45 | ImageFeaturizer model, and uses it to predict on the test array and CSV. It logs 46 | whether the predictions have changed, and then updates the arrays and CSVs accordingly. 47 | 48 | Parameters 49 | ---------- 50 | model : str 51 | The name of one of pic2vec's supported models 52 | 53 | multiple_image_columns : bool 54 | A boolean that determines whether to update the csvs and arrays for single or multiple 55 | image columns 56 | 57 | Returns 58 | ------- 59 | None 60 | """ 61 | # Only autosample if updating the csvs and arrays for multiple image columns 62 | f = ImageFeaturizer(model=model, autosample=multiple_image_columns) 63 | 64 | # Load and featurize the data corresponding to either the single or multiple image columns 65 | load_data = LOAD_DATA_ARGS_MULT if multiple_image_columns else LOAD_DATA_ARGS_SINGLE 66 | f.featurize(**load_data) 67 | 68 | # Updating test CSVs 69 | features = f.features 70 | test_csv = CHECK_CSV_MULT if multiple_image_columns else CHECK_CSV_SINGLE 71 | 72 | # Have to convert to float32 73 | current_csv = pd.read_csv(test_csv.format(model)) 74 | cols = current_csv.select_dtypes(include='float64').columns 75 | current_csv = current_csv.astype({col: 'float32' for col in cols}) 76 | 77 | # Check prediction consistency and update files for test CSVs if necessary 78 | test_csv_identical = features.equals(current_csv) 79 | logging.INFO("Test csv identical for {}?".format(model)) 80 | logging.INFO(test_csv_identical) 81 | 82 | if not test_csv_identical: 83 | features.to_csv(test_csv.format(model), index=False) 84 | 85 | # Updating test arrays 86 | features = f.features.astype(float).values 87 | test_array = CHECK_ARRAY_MULT if multiple_image_columns else CHECK_ARRAY_SINGLE 88 | 89 | # Check prediction consistency and update files for test arrays if necessary 90 | test_array_identical = np.array_equal(features, np.load(test_array.format(model))) 91 | 92 | logging.INFO("Test array identical for {}?".format(model)) 93 | logging.INFO(test_array_identical) 94 | 95 | if not test_array_identical: 96 | np.save(test_array.format(model), features) 97 | 98 | 99 | def update_zeros_testing(model): 100 | """ 101 | This function is used to update arrays in a lower-level part of testing (build_featurizer) than 102 | the final ImageFeaturizer. This test does not use decapitated models, but rather downloads the 103 | full Keras pretrained model and checks its baseline predictions on a single blank 104 | (i.e. all-zeros) image. 105 | 106 | This function initializes the model, and uses it to predict on a single blank image. It logs 107 | whether the predictions have changed, and then updates the test arrays if necessary. 108 | 109 | Parameters 110 | ---------- 111 | model : str 112 | The name of one of pic2vec's supported models 113 | 114 | Returns 115 | ------- 116 | None 117 | """ 118 | 119 | # Create the test image to be predicted on 120 | m = _initialize_model(model) 121 | 122 | # Initialize a blank image of the appropriate size for the model 123 | blank_image = np.zeros(MODEL_TO_IMAGE_SIZE_DICT[model]) 124 | 125 | # Compare the generated predictions against the existing test array, and update if necessary 126 | existing_test_array = np.load(INITIALIZED_MODEL_TEST_ARRAY.format(model)) 127 | generated_array = m.predict_on_batch(blank_image) 128 | 129 | blank_prediction_identical = np.array_equal(generated_array, existing_test_array) 130 | 131 | logging.INFO("Is a blank image prediction unchanged for {}?".format(model)) 132 | logging.INFO(blank_prediction_identical) 133 | 134 | if not blank_prediction_identical: 135 | np.save(INITIALIZED_MODEL_TEST_ARRAY.format(model), generated_array) 136 | 137 | 138 | if __name__ == "__main__": 139 | for model in MODELS: 140 | update_test_files(model) 141 | update_test_files(model, multiple_image_columns=True) 142 | update_zeros_testing(model) 143 | --------------------------------------------------------------------------------