├── .coveragerc ├── .gitignore ├── .pre-commit-config.yaml ├── AUTHORS ├── CHANGELOG.md ├── LICENSE ├── README.md ├── VERSION ├── circle.yml ├── docker ├── Dockerfile.base ├── Dockerfile.csv2json ├── Dockerfile.images2vecs ├── Dockerfile.notebook ├── Dockerfile.rankDirectories ├── Dockerfile.test ├── Dockerfile.vectorSiamese └── pelops_start.sh ├── docs └── chips_to_features.md ├── etl ├── compareDirectory2Directory.py ├── convertCsvToJson.py ├── makeFeaturesResNet50.py ├── makeFeaturesTopSiamese.py └── makeFeaturesYOURMODEL.py ├── maintainers.md ├── makefile ├── misc ├── pelops.png └── pelops.svg ├── pelops ├── __init__.py ├── analysis │ ├── CMC_Confidence.ipynb │ ├── CameraVsCamera.ipynb │ ├── MakeChips.ipynb │ ├── SVMBinaryCarMatch.ipynb │ ├── ScoreChips.ipynb │ ├── __init__.py │ ├── analysis.py │ ├── camerautil.py │ ├── colormakemodel_dataset_maker.ipynb │ ├── comparecameras.py │ ├── isFileImage.ipynb │ ├── labelImageCars.ipynb │ ├── makeCMCplots.ipynb │ ├── makeFeatureFiles-TEST.ipynb │ ├── makeFeatureFiles.ipynb │ ├── makeFeaturesResNet50.ipynb │ ├── makeSiameseCMC.ipynb │ ├── makeVeri.py │ ├── recomputeCorpus.ipynb │ ├── saveExtractFeatsFromChips.ipynb │ ├── siamese.ipynb │ ├── siameseModelIterator-15.py │ ├── siameseModelIterator.ipynb │ ├── splitDataset.ipynb │ ├── test_analysis.py │ └── unsorted │ │ ├── __init__.py │ │ ├── makeH5pyFile.ipynb │ │ └── recompute │ │ ├── __init__.py │ │ ├── compute.py │ │ └── extract_feats_from_chips.py ├── const.py ├── datasets │ ├── __init__.py │ ├── chip.py │ ├── chipper.py │ ├── compcar.py │ ├── dgcars.py │ ├── featuredataset.py │ ├── slice.py │ ├── str.py │ └── veri.py ├── etl │ ├── __init__.py │ ├── computeMatrixCMC.py │ ├── json2h5.py │ ├── makeDistMatrix.py │ ├── makeFeaturesResNet50.py │ └── veriFileList2Json.py ├── experiment_api │ ├── __init__.py │ ├── experiment.py │ ├── metric.py │ └── run_metric.sh ├── features │ ├── feature_producer.py │ ├── hog.py │ ├── keras_model.py │ └── resnet50.py ├── models │ ├── __init__.py │ └── makesvm.py ├── training │ ├── CNN Retrainer.ipynb │ ├── Debug CNN Retrainer.ipynb │ ├── cnn_retrainer.py │ └── utils.py ├── transform_img │ ├── __init__.py │ ├── run.sh │ └── transform.py └── utils.py ├── requirements.txt ├── setup.py └── testci ├── install.sh ├── small.hdf5 ├── small.json ├── test_chip.py ├── test_chipper.py ├── test_compcar.py ├── test_dgcars.py ├── test_experiment_utils.py ├── test_featuredataset.py ├── test_featureproducer.py ├── test_hog_feature.py ├── test_keras_load_model.py ├── test_keras_model_feature.py ├── test_resnet50_feature.py ├── test_slice.py ├── test_str.py ├── test_training_utils.py └── test_veri.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | [report] 5 | # Regexes for lines to exclude from consideration 6 | exclude_lines = 7 | # Have to re-enable the standard pragma 8 | pragma: no cover 9 | 10 | # Don't complain about missing debug-only code: 11 | def __repr__ 12 | if self\.debug 13 | 14 | # Don't complain if tests don't hit defensive assertion code: 15 | raise AssertionError 16 | raise NotImplementedError 17 | 18 | # Don't complain if non-runnable code isn't run: 19 | if 0: 20 | if __name__ == .__main__.: 21 | 22 | ignore_errors = True 23 | 24 | [html] 25 | directory = coverage_html_report -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | venv/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | 56 | # Sphinx documentation 57 | docs/_build/ 58 | 59 | # PyBuilder 60 | target/ 61 | 62 | #PyCharm 63 | .idea 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | 68 | # swap files 69 | *.swp 70 | 71 | # OSX crap 72 | .DS_Store 73 | 74 | # pickled models 75 | **/*.pickle 76 | 77 | #other crap 78 | **/.ropeproject 79 | checkscript.sh 80 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/pre-commit/pre-commit 2 | sha: v0.9.4 3 | hooks: 4 | - id: validate_config 5 | - repo: git@github.com:pre-commit/pre-commit-hooks 6 | sha: v0.6.1 7 | hooks: 8 | - id: autopep8-wrapper 9 | - id: check-case-conflict 10 | - id: check-json 11 | - id: check-merge-conflict 12 | - id: check-symlinks 13 | - id: check-yaml 14 | - id: end-of-file-fixer 15 | - id: pretty-format-json 16 | args: 17 | - --autofix 18 | - id: trailing-whitespace 19 | - repo: git@github.com:asottile/reorder_python_imports 20 | sha: v0.3.0 21 | hooks: 22 | - id: reorder-python-imports 23 | - repo: git@github.com:Lab41/verboten_words.git 24 | sha: v1.0.0 25 | hooks: 26 | - id: verboten-words 27 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This file lists all individuals having contributed content to the repository. 2 | # If you're submitting a patch, please add your name here in alphabetical order as part of the patch. 3 | # 4 | # For a list of active project maintainers, see the MAINTAINERS file. 5 | # 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.1.0-dev (current, unreleased) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pelops 2 | 3 | [![CircleCI](https://circleci.com/gh/Lab41/pelops.svg?style=svg)](https://circleci.com/gh/Lab41/pelops)[![codecov](https://codecov.io/gh/Lab41/pelops/branch/master/graph/badge.svg)](https://codecov.io/gh/Lab41/pelops) 4 | 5 | 6 | Pelops Logo 7 | 8 | Pelops is a project by [Lab41](http://www.lab41.org/) that uses deep learning 9 | based methods to automatically identify cars by using their large scale 10 | features—color, shape, light configuration, etc. 11 | 12 | ## Install Instructions 13 | 14 | Pelops provides several Docker containers the assist in running the project. 15 | You can build them by checking out the code and running make: 16 | 17 | ```bash 18 | git clone https://github.com/Lab41/pelops.git 19 | cd pelops 20 | make 21 | ``` 22 | 23 | Then: 24 | 25 | ```bash 26 | make notebook 27 | ``` 28 | 29 | Which will run a container containing Pelops and a notebook server. 30 | 31 | Otherwise you can install Pelops using `pip`: 32 | 33 | ```bash 34 | git clone https://github.com/Lab41/pelops.git 35 | pip install pelops 36 | ``` 37 | 38 | There are several dependencies that will need to be installed. The 39 | [`requirements.txt`](requirements.txt) should include most of them, but other 40 | programs such as [keras](https://keras.io/) and 41 | [Tensorflow](https://www.tensorflow.org/) are also required. For this reason 42 | it is suggested to use the notebook container to run Pelops. 43 | 44 | ## Documentation 45 | 46 | - [Turning Chips into features](docs/chips_to_features.md) 47 | 48 | ## Tests 49 | 50 | Tests are currently written in [pytest](https://docs.pytest.org/en/latest/). The tests are automatically run when submitting pull requests. 51 | 52 | You can run the tests in a container by calling: 53 | 54 | ```bash 55 | make test 56 | ``` 57 | 58 | This will build a docker container, mount your local version of the code, and 59 | run the tests. 60 | 61 | ## Contributing to Pelops 62 | 63 | Want to contribute? Awesome! 64 | 65 | Please make sure you have [`pre-commit`](http://pre-commit.com/) installed so 66 | that your code is checked for various issues. 67 | 68 | After that, send us a pull request! We're happy to review them! 69 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | v0.1.0-dev 2 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | # Controls the build environment 2 | machine: 3 | python: 4 | version: 3.5.2 5 | environment: 6 | PATH: /home/ubuntu/miniconda3/bin:$PATH 7 | 8 | dependencies: 9 | override: 10 | # Moving to nilearn directory before performing the installation. 11 | - cd ~/pelops 12 | - source testci/install.sh: 13 | environment: 14 | DISTRIB: "conda" 15 | PYTHON_VERSION: "3.5" 16 | NUMPY_VERSION: "*" 17 | SCIPY_VERSION: "*" 18 | SCIKIT_LEARN_VERSION: "*" 19 | MATPLOTLIB_VERSION: "*" 20 | - conda install -y opencv hdfs3 21 | - conda install -y pytest pytest-cov pillow h5py scipy scikit-image 22 | - /home/ubuntu/miniconda3/bin/pip install imageio 23 | - /home/ubuntu/miniconda3/bin/pip install tensorflow==0.12.* git+git://github.com/fchollet/keras.git@2ad3544b017fe9c0d7a25ef0640baa52281372b5 24 | 25 | # Set up the commands to run as a test (override), as well as the commands to 26 | # run before (pre) and after (post). 27 | test: 28 | pre: 29 | - mkdir -p $CIRCLE_TEST_REPORTS/junit/ 30 | override: 31 | # Test installation via pip 32 | - cd /home/ubuntu/pelops && /home/ubuntu/miniconda3/bin/pip install . 33 | # Test importing installed package 34 | - /home/ubuntu/miniconda3/bin/python3 -c "import pelops; import pelops.datasets" 35 | # Run pytest tests 36 | - cd /home/ubuntu/pelops && /home/ubuntu/miniconda3/bin/python3 -m pytest -v --cov --cov-report=term-missing:skip-covered --junitxml=$CIRCLE_TEST_REPORTS/junit/junit_output.xml 37 | post: 38 | - bash <(curl -s https://codecov.io/bash) -t 08234947-61d0-48ea-b0f0-1c82d3f2dfd7 39 | -------------------------------------------------------------------------------- /docker/Dockerfile.base: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04 2 | 3 | MAINTAINER Lab41 4 | 5 | RUN apt-get update && \ 6 | apt-get install -y \ 7 | bzip2 \ 8 | ca-certificates \ 9 | git \ 10 | libglib2.0-0 \ 11 | libsm6 \ 12 | libxext6 \ 13 | libxrender1 \ 14 | wget 15 | 16 | #Configure environment 17 | ENV CONDA_DIR=/opt/conda \ 18 | # 4.2.12 is the last version with Python3.5, which we need 19 | MINICONDA_SCRIPT=Miniconda3-4.2.12-Linux-x86_64.sh \ 20 | MINICONDA_SHA=c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a 21 | 22 | # Install conda 23 | RUN cd /tmp && \ 24 | mkdir -p $CONDA_DIR && \ 25 | wget --quiet https://repo.continuum.io/miniconda/${MINICONDA_SCRIPT} && \ 26 | echo "${MINICONDA_SHA} ${MINICONDA_SCRIPT}" | sha256sum -c - && \ 27 | /bin/bash ${MINICONDA_SCRIPT} -f -b -p $CONDA_DIR && \ 28 | rm ${MINICONDA_SCRIPT} 29 | 30 | RUN $CONDA_DIR/bin/conda install --quiet --yes \ 31 | 'conda-build=2.1.*' \ 32 | 'cython=0.24*' \ 33 | 'h5py=2.6*' \ 34 | 'hdfs3=0.1.*' \ 35 | 'libhdfs3=2.2.*' \ 36 | 'numpy=1.11*' \ 37 | 'pillow=3.4*' \ 38 | 'pytest=3.0.*' \ 39 | 'python=3.5.*' \ 40 | 'scikit-image=0.12*' \ 41 | 'scikit-learn=0.18*' \ 42 | && $CONDA_DIR/bin/conda clean -tipsy 43 | 44 | RUN $CONDA_DIR/bin/conda update pip --quiet --yes 45 | 46 | # Install Python packages 47 | ENV TENSORFLOW_VERSION=0.12.* \ 48 | KERAS_VERSION=2ad3544b017fe9c0d7a25ef0640baa52281372b5 49 | RUN $CONDA_DIR/bin/pip install git+git://github.com/fchollet/keras.git@${KERAS_VERSION} \ 50 | tensorflow==${TENSORFLOW_VERSION} \ 51 | imageio 52 | 53 | ENV INDOCKERCONTAINER 1 54 | 55 | ADD . /pelops_root 56 | WORKDIR /pelops_root 57 | ENV PYTHONPATH=/pelops_root/pelops:$PYTHONPATH \ 58 | PATH=/usr/local/cuda/bin:/usr/local/nvidia/bin:$CONDA_DIR/bin:$PATH 59 | 60 | # install dependencies of plugins for pelops 61 | RUN for file in $(find . -name "requirements.txt"); \ 62 | do \ 63 | $CONDA_DIR/bin/pip install -r $file; \ 64 | done 65 | -------------------------------------------------------------------------------- /docker/Dockerfile.csv2json: -------------------------------------------------------------------------------- 1 | FROM continuumio/anaconda3:4.3.1 2 | 3 | MAINTAINER Lab41 4 | 5 | RUN mkdir -p /pelops_root 6 | WORKDIR /pelops_root 7 | COPY . . 8 | RUN pip install --no-cache-dir -r requirements.txt 9 | 10 | CMD python3 -m etl.convertCsvToJson -------------------------------------------------------------------------------- /docker/Dockerfile.images2vecs: -------------------------------------------------------------------------------- 1 | FROM l41-pelops-base 2 | 3 | MAINTAINER Lab41 4 | 5 | RUN mkdir INPUT_DIR 6 | RUN mkdir OUTPUT_DIR 7 | RUN mkdir MODEL_DIR 8 | 9 | CMD ["python", "/pelops_root/etl/makeFeaturesYOURMODEL.py","./INPUT_DIR","./OUTPUT_DIR"] 10 | -------------------------------------------------------------------------------- /docker/Dockerfile.notebook: -------------------------------------------------------------------------------- 1 | FROM l41-pelops-base 2 | 3 | MAINTAINER Lab41 4 | 5 | # The startup script installs Pelops with pip from this directory 6 | RUN mkdir /pelops 7 | WORKDIR /pelops 8 | 9 | # Run a notebook 10 | EXPOSE 8888 11 | 12 | # Install Jupyter notebook 13 | RUN conda install --quiet --yes \ 14 | 'notebook=4.1*' \ 15 | && conda clean -tipsy 16 | 17 | ADD pelops_start.sh / 18 | 19 | CMD ["/pelops_start.sh"] 20 | -------------------------------------------------------------------------------- /docker/Dockerfile.rankDirectories: -------------------------------------------------------------------------------- 1 | FROM l41-pelops-base 2 | 3 | MAINTAINER Lab41 4 | 5 | 6 | RUN mkdir INPUT_DIR1 7 | RUN mkdir INPUT_DIR2 8 | RUN mkdir MODEL_DIR 9 | RUN mkdir OUTPUT_DIR 10 | 11 | CMD ["python", "/pelops_root/etl/compareDirectory2Directory.py","./INPUT_DIR1","./INPUT_DIR2","./OUTPUT_DIR"] 12 | -------------------------------------------------------------------------------- /docker/Dockerfile.test: -------------------------------------------------------------------------------- 1 | FROM l41-pelops-base 2 | 3 | MAINTAINER Lab41 4 | 5 | # Run the tests 6 | CMD ["python", "-m","pytest","-v","-s"] 7 | -------------------------------------------------------------------------------- /docker/Dockerfile.vectorSiamese: -------------------------------------------------------------------------------- 1 | FROM l41-pelops-base 2 | 3 | MAINTAINER Lab41 4 | 5 | RUN mkdir INPUT_DIR1 6 | RUN mkdir INPUT_DIR2 7 | RUN mkdir MODEL_DIR 8 | RUN mkdir OUTPUT_DIR 9 | 10 | CMD ["python", "/pelops_root/etl/makeFeaturesTopSiamese.py","./INPUT_DIR1","./INPUT_DIR2","./OUTPUT_DIR"] 11 | -------------------------------------------------------------------------------- /docker/pelops_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2014, IPython: interactive computing in Python 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are met: 8 | # 9 | # * Redistributions of source code must retain the above copyright notice, this 10 | # list of conditions and the following disclaimer. 11 | # 12 | # * Redistributions in binary form must reproduce the above copyright notice, 13 | # this list of conditions and the following disclaimer in the documentation 14 | # and/or other materials provided with the distribution. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | # Strict mode 28 | set -euo pipefail 29 | 30 | # Clone the repo 31 | git clone https://github.com/lab41/pelops /pelops 32 | pip install /pelops 33 | 34 | # Launch the notebook 35 | jupyter notebook --no-browser --port 8888 --ip=* --NotebookApp.token= 36 | -------------------------------------------------------------------------------- /docs/chips_to_features.md: -------------------------------------------------------------------------------- 1 | # Turning Chips to Features 2 | 3 | 1. build the docker containers using make: 4 | 5 | ```bash 6 | make 7 | ``` 8 | 9 | 2. map folders with images and and output directory, and run: 10 | 11 | ```bash 12 | CHIPDIR1=/folder/with/chips && \ 13 | OUTPUTDIR=/folder/for/output && \ 14 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR l41-pelops-i2v 15 | ``` 16 | 17 | Note: Docker creates output files owned by root. Grant write privileges to OUTPUT_DIR for the current user and add `-u $(id -u $USER)` to the docker run command above to create output files owned by the current user. 18 | 19 | 3. Advanced, bring your own model: 20 | 21 | ```bash 22 | CHIPDIR1=/folder/with/chips && \ 23 | OUTPUTDIR=/folder/for/output && \ 24 | MODELDIR=/folder/with/models && \ 25 | MODELFILE=name_of_model_file && \ 26 | WEIGHTFILE=name_of_weight_file && \ 27 | LAYERNAME=layername && \ 28 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e LAYER="${LAYERNAME}" l41-pelops-i2v 29 | ``` 30 | 31 | Run the Siamese model as follows: 32 | 33 | ```bash 34 | CHIPDIR1=/folder/with/chips && \ 35 | CHIPDIR2=/folder/with/other/chips && \ 36 | OUTPUTDIR=/folder/for/output && \ 37 | MODELDIR=/folder/with/models && \ 38 | MODELFILE=name_of_model_file.json && \ 39 | WEIGHTFILE=name_of_weight_file.hdf5 && \ 40 | VECTORFILE=name_of_VECTOR_file.json && \ 41 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR1 -v ${CHIPDIR2}:/pelops_root/INPUT_DIR2 -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e VECTORS="/pelops_root/INPUT_DIR1/${VECTORFILE}" l41-pelops-siamese 42 | ``` 43 | 44 | Run the Ranker to compare two directories as follows: 45 | 46 | ```bash 47 | CHIPDIR1=/folder/with/chips && \ 48 | CHIPDIR2=/folder/with/other/chips && \ 49 | OUTPUTDIR=/folder/for/output && \ 50 | MODELDIR=/folder/with/models && \ 51 | MODELFILE=name_of_model_file.json && \ 52 | WEIGHTFILE=name_of_weight_file.hdf5 && \ 53 | LAYERNAME=layername && \ 54 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR1 -v ${CHIPDIR2}:/pelops_root/INPUT_DIR2 -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e LAYER="${LAYERNAME}" l41-pelops-ranker 55 | ``` 56 | 57 | Note: Docker creates output files owned by root. Grant write privileges to OUTPUT_DIR for the current user and add `-u $(id -u $USER)` to the docker run commands above to create output files owned by the current user. 58 | 59 | Run the CSV to JSON docker conversion operations as follows: 60 | 61 | ```bash 62 | CSV1=/path/to/file1.csv && \ 63 | CSV2=/path/to/file2.csv && \ 64 | MODE=product && \ 65 | JSON=/path/to/output.json && \ 66 | docker run -e pelops_csv_1="${CSV1}" -e pelops_csv_2="${CSV2}" -e pelops_csv_mode=${MODE} -e pelops_json="${JSON}" l41-pelops-c2j 67 | ``` 68 | -------------------------------------------------------------------------------- /etl/compareDirectory2Directory.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import json 3 | import os 4 | import sys 5 | import time 6 | 7 | import numpy as np 8 | import scipy.spatial.distance 9 | from keras.applications.resnet50 import preprocess_input 10 | from keras.models import Model 11 | from keras.models import model_from_json 12 | from keras.preprocessing import image 13 | 14 | 15 | def load_image(img_path): 16 | data = image.load_img(img_path, target_size=(224, 224)) 17 | x = image.img_to_array(data) 18 | x = np.expand_dims(x, axis=0) 19 | x = preprocess_input(x) 20 | return x 21 | 22 | 23 | def load_model_workaround(model_file, weight_file): 24 | # load json and create model 25 | json_file = open(model_file, 'r') 26 | loaded_model_json = json_file.read() 27 | json_file.close() 28 | loaded_model = model_from_json(loaded_model_json) 29 | # load weights into new model 30 | loaded_model.load_weights(weight_file) 31 | return loaded_model 32 | 33 | 34 | def get_models(model=None, weights=None): 35 | model = load_model_workaround(model, weights) 36 | return model 37 | 38 | 39 | def image_features(left, right, model): 40 | predictions = model.predict([left, right]) 41 | return predictions 42 | 43 | 44 | def find_images(topdir): 45 | retval = [] 46 | exten = ['jpg', 'bmp', 'png'] 47 | images = 'images' 48 | 49 | for dirpath, dirnames, files in os.walk(topdir): 50 | for name in files: 51 | if name.lower().split('.')[-1] in exten: 52 | if dirpath.lower().find(images): 53 | retval.append(os.path.join(dirpath, name)) 54 | return retval 55 | 56 | 57 | def write_data(vector_file, limage_file, rimage_file, feature): 58 | list_feature = feature.flatten().tolist() 59 | str_feature = ','.join(str(j) for j in list_feature) 60 | outdata = '{0},{1},{2}\n'.format(limage_file, rimage_file, str_feature) 61 | vector_file.write(outdata) 62 | vector_file.flush() 63 | 64 | 65 | def main(argv=None): 66 | if argv is None: 67 | argv = sys.argv 68 | image_dir_l = argv[1] 69 | image_dir_r = argv[2] 70 | vector_dir = argv[3] 71 | 72 | model_file = os.environ.get('MODEL', None) 73 | weights_file = os.environ.get('WEIGHTS', None) 74 | layer = os.environ.get('LAYER', None) 75 | 76 | vector_file_name = os.path.join( 77 | vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time())) 78 | 79 | vector_file = open(vector_file_name, 'w') 80 | 81 | images_left = find_images(image_dir_l) 82 | images_right = find_images(image_dir_r) 83 | 84 | model = get_models(model_file, weights_file) 85 | 86 | for limage_file in images_left: 87 | for rimage_file in images_right: 88 | 89 | l_img = load_image(limage_file) 90 | r_img = load_image(rimage_file) 91 | 92 | feature = image_features(l_img, r_img, model) 93 | 94 | write_data(vector_file, limage_file, rimage_file, feature) 95 | 96 | vector_file.close() 97 | 98 | if __name__ == "__main__": 99 | sys.exit(main()) 100 | -------------------------------------------------------------------------------- /etl/convertCsvToJson.py: -------------------------------------------------------------------------------- 1 | """ 2 | Conversion script for image2vecs feature vector csvs to siamese json 3 | 4 | Environment Variables: 5 | - pelops_csv_*: one or more file paths to csvs for conversion 6 | - pelops_csv_mode: 7 | - 'product': Combine using the cartesian product of the records from 2x csvs [default] 8 | - 'combo': Combine using pair-wise combinations of records for each csv (1 or more) 9 | - pelops_json: Path to output json file 10 | """ 11 | 12 | import os 13 | import sys 14 | import traceback 15 | from pelops.utils import prep_for_siamese 16 | 17 | if __name__ == '__main__': 18 | csv_files = [v for k, v in os.environ.items() if k.startswith('pelops_csv') and os.path.isfile(v)] 19 | 20 | if len(csv_files) == 0: 21 | print("No CSV files were provided for conversion") 22 | sys.exit(-1) 23 | print("Converting {} csv files:\n\t - {}".format(len(csv_files), '\n\t - '.join(csv_files))) 24 | 25 | mode = os.getenv('pelops_csv_mode', 'product') 26 | print("Mode: {}".format(mode)) 27 | 28 | out_json = os.getenv('pelops_json', None) 29 | if out_json is None: 30 | print("Output json file path was not specified") 31 | print("Json: {}".format(out_json)) 32 | 33 | try: 34 | prep_for_siamese(*csv_files, json_file=out_json, full_combos=(mode != 'product')) 35 | print("Conversion success") 36 | except: 37 | print("Conversion error occurred:\n{}".format(traceback.format_exc())) 38 | -------------------------------------------------------------------------------- /etl/makeFeaturesResNet50.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import json 3 | import os 4 | import sys 5 | import time 6 | 7 | import numpy as np 8 | import scipy.spatial.distance 9 | from keras.applications.resnet50 import ResNet50, preprocess_input 10 | from keras.models import Model 11 | from keras.preprocessing import image 12 | 13 | 14 | def load_image(img_path): 15 | data = image.load_img(img_path, target_size=(224, 224)) 16 | x = image.img_to_array(data) 17 | x = np.expand_dims(x, axis=0) 18 | x = preprocess_input(x) 19 | return x 20 | 21 | 22 | def get_models(): 23 | # include_top needs to be True for this to work 24 | base_model = ResNet50(weights='imagenet', include_top=True) 25 | model = Model(input=base_model.input, 26 | output=base_model.get_layer('flatten_1').output) 27 | return (model, base_model) 28 | 29 | 30 | def image_features(img, model): 31 | features = np.zeros((1, 2048), dtype=np.float16) 32 | predictions = model.predict(img) 33 | return predictions 34 | 35 | 36 | def find_images(topdir): 37 | retval = [] 38 | exten = ['jpg', 'bmp', 'png'] 39 | images = 'images' 40 | 41 | for dirpath, dirnames, files in os.walk(topdir): 42 | for name in files: 43 | if name.lower().split('.')[-1] in exten: 44 | if dirpath.lower().find(images): 45 | retval.append(os.path.join(dirpath, name)) 46 | return retval 47 | 48 | 49 | def write_data(vector_file, image_file, feature): 50 | list_feature = feature.flatten().tolist() 51 | str_feature = ','.join(str(j) for j in list_feature) 52 | outdata = '{0},{1}\n'.format(image_file, str_feature) 53 | vector_file.write(outdata) 54 | vector_file.flush() 55 | 56 | 57 | def main(argv=None): 58 | if argv is None: 59 | argv = sys.argv 60 | image_dir = argv[1] 61 | vector_dir = argv[2] 62 | vector_file_name = os.path.join( 63 | vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time())) 64 | vector_file = open(vector_file_name, 'w') 65 | 66 | images = find_images(image_dir) 67 | 68 | model, base_model = get_models() 69 | 70 | for image_file in images: 71 | img = load_image(image_file) 72 | feature = image_features(img, model) 73 | write_data(vector_file, image_file, feature) 74 | print('processed {0}'.format(image_file)) 75 | 76 | vector_file.close() 77 | 78 | if __name__ == "__main__": 79 | sys.exit(main()) 80 | -------------------------------------------------------------------------------- /etl/makeFeaturesTopSiamese.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | import time 5 | 6 | import keras.backend.tensorflow_backend as KTF 7 | import numpy as np 8 | import tensorflow as tf 9 | from keras import backend as K 10 | from keras.applications.resnet50 import preprocess_input 11 | from keras.applications.resnet50 import ResNet50 12 | from keras.callbacks import EarlyStopping 13 | from keras.callbacks import ModelCheckpoint 14 | from keras.callbacks import ReduceLROnPlateau 15 | from keras.callbacks import TensorBoard 16 | from keras.layers import Dense 17 | from keras.layers import GlobalAveragePooling2D 18 | from keras.layers import Input 19 | from keras.layers import Lambda 20 | from keras.layers import merge 21 | from keras.layers.normalization import BatchNormalization 22 | from keras.models import load_model 23 | from keras.models import Model 24 | from keras.models import model_from_json 25 | from keras.optimizers import RMSprop 26 | from keras.preprocessing import image 27 | from keras.utils.np_utils import to_categorical 28 | 29 | 30 | def just_the_top(num_training_classes, model_file, weights_file): 31 | 32 | def load_model_workaround(model_file, weight_file): 33 | # load json and create model 34 | json_file = open(model_file, 'r') 35 | loaded_model_json = json_file.read() 36 | json_file.close() 37 | loaded_model = model_from_json(loaded_model_json) 38 | # load weights into new model 39 | loaded_model.load_weights(weight_file) 40 | return loaded_model 41 | 42 | def s_distance(vects): 43 | """ 44 | return the abs difference between vectors 45 | """ 46 | x, y = vects 47 | s = K.abs(x - y) 48 | return s 49 | 50 | def s_shape(shapes): 51 | """ 52 | return the sape of the vector being used 53 | """ 54 | shape = list(shapes) 55 | outshape = (shape[0]) 56 | return tuple(outshape) 57 | 58 | original_model = load_model_workaround(model_file, weights_file) 59 | d1 = original_model.get_layer('dense_1') 60 | d1_len = d1_len = d1.get_output_shape_for(d1.get_input_shape_at(0))[1] 61 | d2 = original_model.get_layer('dense_2') 62 | b1 = original_model.get_layer('batchnormalization_1') 63 | 64 | input_left = Input(shape=(1, 1, 2048)) 65 | input_right = Input(shape=(1, 1, 2048)) 66 | 67 | # use a distance measure for making the join 68 | siamese_join = Lambda(s_distance, 69 | output_shape=s_shape)([input_left, input_right]) 70 | my_layer = GlobalAveragePooling2D()(siamese_join) 71 | my_d1 = Dense(d1_len, activation='relu')(my_layer) 72 | bn = BatchNormalization()(my_d1) 73 | predictions = Dense(num_training_classes, activation='sigmoid')(bn) 74 | model = Model([input_left, input_right], output=predictions) 75 | 76 | print(model.summary()) 77 | model.get_layer('dense_1').set_weights(d1.get_weights()) 78 | model.get_layer('dense_2').set_weights(d2.get_weights()) 79 | model.get_layer('batchnormalization_1').set_weights(b1.get_weights()) 80 | 81 | return model 82 | 83 | 84 | def write_data(vector_file, index, feature): 85 | list_feature = feature.flatten().tolist() 86 | str_feature = ','.join(str(j) for j in list_feature) 87 | outdata = '{0}|{1}\n'.format(index, str_feature) 88 | vector_file.write(outdata) 89 | vector_file.flush() 90 | 91 | 92 | def make_top(): 93 | a = np.ones((1, 1, 1, 2048)) 94 | top = just_the_top(3, 95 | '/pelops_root/MODEL_DIR/VeRi-siamese-weekend.model.json', 96 | '/pelops_root/MODEL_DIR/VeRi-siamese-weekend.weights.hdf5') 97 | print('*********** test **********') 98 | print(top.predict([a, a])[0]) 99 | # Out[8]: array([[ 0.98460394, 0.99653435, 0.99870515]], dtype=float32) 100 | print('*********** test **********') 101 | return top 102 | 103 | 104 | def main(argv=None): 105 | 106 | #model = make_top() 107 | # test() 108 | 109 | if argv is None: 110 | argv = sys.argv 111 | image_dir_l = argv[1] 112 | image_dir_r = argv[2] 113 | output_dir = argv[3] 114 | 115 | input_file_name = os.environ.get('VECTORS', None) 116 | model_file = os.environ.get('MODEL', None) 117 | weights_file = os.environ.get('WEIGHTS', None) 118 | 119 | vector_file_name = os.path.join( 120 | output_dir, 'vectorOutputFile_{0}.csv'.format(time.time())) 121 | 122 | vector_o_file = open(vector_file_name, 'w') 123 | vector_i_file = open(input_file_name, 'r') 124 | 125 | print(3, model_file, weights_file) 126 | model = just_the_top(3, model_file, weights_file) 127 | 128 | for index, line in enumerate(vector_i_file): 129 | line = line.strip() 130 | j_line = json.loads(line) 131 | left = j_line['left'] 132 | right = j_line['right'] 133 | np_l = np.array(left) 134 | np_r = np.array(right) 135 | np_l = np_l.reshape(1, 1, 1, 2048) 136 | np_r = np_r.reshape(1, 1, 1, 2048) 137 | data = [np_l, np_r] 138 | feature = model.predict(data) 139 | feature = feature[0] 140 | write_data(vector_o_file, index, feature) 141 | 142 | vector_o_file.close() 143 | 144 | if __name__ == "__main__": 145 | sys.exit(main()) 146 | -------------------------------------------------------------------------------- /etl/makeFeaturesYOURMODEL.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import sys 4 | import time 5 | 6 | import numpy as np 7 | from keras.applications.resnet50 import preprocess_input 8 | from keras.applications.resnet50 import ResNet50 9 | from keras.models import Model, model_from_json 10 | from keras.preprocessing import image 11 | 12 | DEFAULT_LAYER_NAME = 'flatten_1' 13 | 14 | 15 | def load_image(img_path): 16 | data = image.load_img(img_path, target_size=(224, 224)) 17 | x = image.img_to_array(data) 18 | x = np.expand_dims(x, axis=0) 19 | x = preprocess_input(x) 20 | return x 21 | 22 | 23 | def save_model_workaround(model, layer, model_output_file, weights_output_file, layer_output_file): 24 | print('saving model to {}'.format(model_output_file)) 25 | print('saving weights to {}'.format(weights_output_file)) 26 | print('saving layer to {}'.format(layer_output_file)) 27 | # serialize model to JSON 28 | model_json = model.to_json() 29 | with open(model_output_file, 'w') as json_file: 30 | json_file.write(model_json) 31 | # serialize weights to HDF5 32 | model.save_weights(weights_output_file) 33 | # Write layer name to text 34 | with open(layer_output_file, 'w') as lyr_out: 35 | lyr_out.write(layer) 36 | 37 | 38 | def load_model_workaround(model_file, weight_file): 39 | # load json and create model 40 | json_file = open(model_file, 'r') 41 | loaded_model_json = json_file.read() 42 | json_file.close() 43 | loaded_model = model_from_json(loaded_model_json) 44 | # load weights into new model 45 | loaded_model.load_weights(weight_file) 46 | return loaded_model 47 | 48 | 49 | def get_models(model=None, weights=None, layer=None): 50 | # include_top needs to be True for this to work 51 | if model is None or weights is None or layer is None: 52 | print('MODEL NOT FULLY SPECIFIED, USING RESNET FEATURES') 53 | base_model = ResNet50(weights='imagenet', include_top=True) 54 | model = Model(input=base_model.input, 55 | output=base_model.get_layer(DEFAULT_LAYER_NAME).output) 56 | else: 57 | base_model = load_model_workaround(model, weights) 58 | base_layer_names = {lyr.name for lyr in base_model.layers} 59 | base_is_siamese = all([(name in base_layer_names) for name in ['dense_1', 'dense_2', 'lambda_1']]) 60 | 61 | if base_is_siamese: 62 | print('Input model is siamese, extracting resnet.') 63 | fresh_resnet = ResNet50(weights='imagenet', include_top=True) 64 | fresh_resnet.set_weights(base_model.get_layer('resnet50').get_weights()) 65 | model = Model(input=fresh_resnet.input, 66 | output=fresh_resnet.get_layer(DEFAULT_LAYER_NAME).output) 67 | else: 68 | model = Model(input=base_model.input, 69 | output=base_model.get_layer(layer).output) 70 | return model 71 | 72 | 73 | def image_features(img, model): 74 | predictions = model.predict(img) 75 | return predictions 76 | 77 | 78 | def find_images(topdir): 79 | retval = [] 80 | exten = ['jpg', 'bmp', 'png'] 81 | images = 'images' 82 | 83 | for dirpath, dirnames, files in os.walk(topdir): 84 | for name in files: 85 | if name.lower().split('.')[-1] in exten: 86 | if dirpath.lower().find(images): 87 | retval.append(os.path.join(dirpath, name)) 88 | return retval 89 | 90 | 91 | def write_data(vector_file, image_file, feature): 92 | list_feature = feature.flatten().tolist() 93 | str_feature = ','.join(str(j) for j in list_feature) 94 | outdata = '{0},{1}\n'.format(image_file, str_feature) 95 | vector_file.write(outdata) 96 | vector_file.flush() 97 | 98 | 99 | def main(argv=None): 100 | if argv is None: 101 | argv = sys.argv 102 | image_dir = argv[1] 103 | vector_dir = argv[2] 104 | 105 | model_file = os.environ.get('MODEL', None) 106 | weights_file = os.environ.get('WEIGHTS', None) 107 | layer_name = os.environ.get('LAYER', None) 108 | 109 | vector_file_name = os.path.join( 110 | vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time())) 111 | vector_file = open(vector_file_name, 'w') 112 | 113 | images = find_images(image_dir) 114 | 115 | model = get_models(model_file, weights_file, layer_name) 116 | 117 | # Export model, weights, and layer if not originally supplied by the environment 118 | if all(map(lambda v: v is None, [model_file, weights_file, layer_name])): 119 | date_time = time.strftime('%Y%m%d_%H%M%S') 120 | make_out_file = lambda n: os.path.join(vector_dir, date_time + '.' + n) 121 | save_model_workaround(model, DEFAULT_LAYER_NAME, make_out_file('model'), 122 | make_out_file('weights'), make_out_file('layer')) 123 | 124 | for image_file in images: 125 | img = load_image(image_file) 126 | feature = image_features(img, model) 127 | write_data(vector_file, image_file, feature) 128 | print('processed {0}'.format(image_file)) 129 | 130 | vector_file.close() 131 | 132 | if __name__ == "__main__": 133 | sys.exit(main()) 134 | -------------------------------------------------------------------------------- /maintainers.md: -------------------------------------------------------------------------------- 1 | Listing of the project Maintainers -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | all: base image2vecs siamese ranker build-notebook csv2json 2 | 3 | # Base requirements for all containers 4 | base: 5 | docker build -t l41-pelops-base -f docker/Dockerfile.base . 6 | 7 | # Jupyter notebook server 8 | build-notebook: base 9 | docker build -t l41-pelops-notebook -f docker/Dockerfile.notebook ./docker/ 10 | 11 | notebook: build-notebook 12 | docker run -p 8888:8888 -it l41-pelops-notebook 13 | 14 | # Tests 15 | test: base 16 | docker build -t l41-pelops-tests -f docker/Dockerfile.test . 17 | docker run l41-pelops-tests 18 | 19 | # Image processing 20 | image2vecs: base 21 | docker build -t l41-pelops-i2v -f docker/Dockerfile.images2vecs . 22 | 23 | siamese: base 24 | docker build -t l41-pelops-siamese -f docker/Dockerfile.vectorSiamese . 25 | 26 | ranker: base 27 | docker build -t l41-pelops-ranker -f docker/Dockerfile.rankDirectories . 28 | 29 | # Conversion utility 30 | csv2json: 31 | docker build -t l41-pelops-c2j -f docker/Dockerfile.csv2json . 32 | -------------------------------------------------------------------------------- /misc/pelops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/misc/pelops.png -------------------------------------------------------------------------------- /pelops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/__init__.py -------------------------------------------------------------------------------- /pelops/analysis/CMC_Confidence.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#set some constants\n", 10 | "DATASETFILE = '/path/to/dataFile'\n", 11 | "ITEMSPERCAMERA = 10\n", 12 | "YRANDOM=1024\n", 13 | "CAMERAS=2\n", 14 | "DROPPED=0\n", 15 | "CMC=100\n", 16 | "EXPERIMENTS=100" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from pelops.datasets.featuredataset import FeatureDataset\n", 26 | "from pelops.experiment_api.experiment import ExperimentGenerator\n", 27 | "from pelops.analysis import analysis\n", 28 | "\n", 29 | "\n", 30 | "#do the math\n", 31 | "featureData = FeatureDataset(DATASETFILE)\n", 32 | "expGen = ExperimentGenerator(featureData, CAMERAS, ITEMSPERCAMERA, DROPPED, YRANDOM)\n", 33 | "experimentHolder = analysis.repeat_pre_cmc(featureData,expGen,NUMCMC=CMC,EXPPERCMC=EXPERIMENTS)\n", 34 | "stats,gdata = analysis.make_cmc_stats(experimentHolder,ITEMSPERCAMERA)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "%matplotlib inline\n", 44 | "import matplotlib.pyplot as plt\n", 45 | "\n", 46 | "#make the plots\n", 47 | "fig = plt.figure()\n", 48 | "ax = plt.subplot(111)\n", 49 | "\n", 50 | "ax.plot(gdata.transpose())\n", 51 | "plt.title('{} CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n", 52 | "ax.legend(('-stddev','avg','+stddev'),bbox_to_anchor=(1, -0.05),\n", 53 | " fancybox=True, shadow=True, ncol=5)" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "anaconda-cloud": {}, 59 | "kernelspec": { 60 | "display_name": "Python 3", 61 | "language": "python", 62 | "name": "python3" 63 | }, 64 | "language_info": { 65 | "codemirror_mode": { 66 | "name": "ipython", 67 | "version": 3.0 68 | }, 69 | "file_extension": ".py", 70 | "mimetype": "text/x-python", 71 | "name": "python", 72 | "nbconvert_exporter": "python", 73 | "pygments_lexer": "ipython3", 74 | "version": "3.5.2" 75 | } 76 | }, 77 | "nbformat": 4, 78 | "nbformat_minor": 0 79 | } -------------------------------------------------------------------------------- /pelops/analysis/CameraVsCamera.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "cd '~/work/pelops'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "%matplotlib inline\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "#import time\n", 21 | "import numpy as np\n", 22 | "from pelops.datasets.featuredataset import FeatureDataset\n", 23 | "from pelops.experiment_api.experiment import ExperimentGenerator\n", 24 | "from pelops.datasets import chip\n", 25 | "from pelops.models.makesvm import train_svm\n", 26 | "from pelops.analysis.comparecameras import mad_matrix" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "ITEMSPERCAMERA = 2\n", 36 | "NUMCAMERAS = 2\n", 37 | "TRAIN_RANDOM=1024\n", 38 | "DROPPED=0\n", 39 | "TRAIN_FEATURES = '/Users/dgrossman/image_NEW_TRAIN'\n", 40 | "EXAMPLES = 1000 \n", 41 | "\n", 42 | "fd_train = FeatureDataset(TRAIN_FEATURES)\n", 43 | "eg_train = ExperimentGenerator(fd_train, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TRAIN_RANDOM)\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "clf_train = train_svm(EXAMPLES,fd_train,eg_train)\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "g_train,b_train = mad_matrix(EXAMPLES,clf_train,fd_train,eg_train,'cam')" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "ITEMSPERCAMERA = 2\n", 71 | "NUMCAMERAS = 2\n", 72 | "TEST_RANDOM=1024\n", 73 | "DROPPED=0\n", 74 | "TEST_FEATURES = '/Users/dgrossman/image_NEW_TEST'\n", 75 | "EXAMPLES = 1000\n", 76 | "\n", 77 | "fd_test = FeatureDataset(TEST_FEATURES)\n", 78 | "eg_test = ExperimentGenerator(fd_test, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TEST_RANDOM)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "g_test,b_test = mad_matrix(EXAMPLES,clf_train,fd_test,eg_test,'cam')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "allset = set()\n", 97 | "for key in g_test.keys():\n", 98 | " l,r = key.split('|')\n", 99 | " allset.add(l)\n", 100 | " allset.add(r)\n", 101 | "for key in b_test.keys():\n", 102 | " l,r = key.split('|')\n", 103 | " allset.add(l)\n", 104 | " allset.add(r)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "len(allset)\n", 114 | "s = (len(allset),len(allset))\n", 115 | "names = [x for x in allset]\n", 116 | "name2index = dict()\n", 117 | "index2name = dict()\n", 118 | "for index,item in enumerate(names):\n", 119 | " name2index[item] = index\n", 120 | " index2name[index] = item" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "matrix = np.zeros(s)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "for carpcar in g_test.keys():\n", 139 | " n = g_test[carpcar]\n", 140 | " d = n\n", 141 | " if carpcar in b_test:\n", 142 | " d += b_test[carpcar]\n", 143 | " l,r = carpcar.split('|')\n", 144 | " matrix[name2index[l]][name2index[r]] = n / float(d)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "counts = list()\n", 154 | "spoo = list()\n", 155 | "\n", 156 | "for i in index2name:\n", 157 | " counts.append(i)\n", 158 | " spoo.append(index2name[i])" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "import numpy as np\n", 168 | "import matplotlib.pyplot as plt\n", 169 | "plt.imshow(matrix,cmap='hot')\n", 170 | "plt.colorbar()\n", 171 | "plt.xticks(counts, spoo)\n", 172 | "plt.yticks(counts,spoo)" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "anaconda-cloud": {}, 178 | "celltoolbar": "Raw Cell Format", 179 | "kernelspec": { 180 | "display_name": "Python [conda root]", 181 | "language": "python", 182 | "name": "conda-root-py" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.5.2" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 1 199 | } 200 | -------------------------------------------------------------------------------- /pelops/analysis/SVMBinaryCarMatch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "cd '~/work/pelops'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "%matplotlib inline\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "#import time\n", 21 | "#import numpy as np\n", 22 | "from pelops.datasets.featuredataset import FeatureDataset\n", 23 | "from pelops.experiment_api.experiment import ExperimentGenerator\n", 24 | "from pelops.datasets import chip\n", 25 | "from pelops.models.makesvm import train_svm, test_svm\n", 26 | "from pelops.analysis.comparecameras import mad_matrix" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "ITEMSPERCAMERA = 2\n", 36 | "NUMCAMERAS = 2\n", 37 | "TRAIN_RANDOM=1024\n", 38 | "DROPPED=0\n", 39 | "TRAIN_FEATURES = '/Users/dgrossman/image_NEW_TINY_TRAIN'\n", 40 | "\n", 41 | "fd_train = FeatureDataset(TRAIN_FEATURES)\n", 42 | "eg_train = ExperimentGenerator(fd_train, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TRAIN_RANDOM)\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "ITEMSPERCAMERA = 2\n", 52 | "NUMCAMERAS = 2\n", 53 | "TEST_RANDOM=1024\n", 54 | "DROPPED=0\n", 55 | "TEST_FEATURES = '/Users/dgrossman/image_NEW_TINY_TEST'\n", 56 | "\n", 57 | "fd_test = FeatureDataset(TEST_FEATURES)\n", 58 | "eg_test = ExperimentGenerator(fd_test, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TEST_RANDOM)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "testpoints = [50,100,200,400,800,1600]" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "results = list()\n", 77 | "for testpoint in testpoints:\n", 78 | " clf_train = train_svm(testpoint,fd_train,eg_train)\n", 79 | " result = test_svm(testpoint,clf_train,fd_test,eg_test)\n", 80 | " print ('items: {}, score {}'.format(testpoint,result))\n", 81 | " results.append((testpoint,result))" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "for i,s in results:\n", 91 | " print('items:{}, score:{}'.format(i,s))" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | } 101 | ], 102 | "metadata": { 103 | "anaconda-cloud": {}, 104 | "kernelspec": { 105 | "display_name": "Python [conda root]", 106 | "language": "python", 107 | "name": "conda-root-py" 108 | }, 109 | "language_info": { 110 | "codemirror_mode": { 111 | "name": "ipython", 112 | "version": 3 113 | }, 114 | "file_extension": ".py", 115 | "mimetype": "text/x-python", 116 | "name": "python", 117 | "nbconvert_exporter": "python", 118 | "pygments_lexer": "ipython3", 119 | "version": "3.5.2" 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 1 124 | } 125 | -------------------------------------------------------------------------------- /pelops/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/__init__.py -------------------------------------------------------------------------------- /pelops/analysis/analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from scipy.spatial.distance import cosine, euclidean 4 | 5 | 6 | # compute cosine distance 7 | # 0 -> things are closer 8 | # 1 -> things are farther 9 | def comp_cosine(cam1_feat, cam2_feat): 10 | retval = 1 - cosine(cam1_feat, cam2_feat) 11 | return (retval) 12 | 13 | 14 | # compute Euclidean distance 15 | # 0 -> things are closer 16 | # + -> things are farther 17 | def comp_euclid(cam1_feat, cam2_feat): 18 | retval = abs(euclidean(cam1_feat, cam2_feat)) 19 | return (retval) 20 | 21 | 22 | # do the comparisons between chips 23 | # cam1 - listing of chips seen at cam1 24 | # cam2 - listing of chips seen at cam2 25 | # comparison - function to compare 2 vectors should return small things 26 | # when comparison is close, large otherwise 27 | # verbose - return more info if true 28 | def is_correct_match(featureData, 29 | cam1, 30 | cam2, 31 | comparison=comp_cosine, verbose=False): 32 | similarities = [] 33 | for cam1_chip in cam1: 34 | cam1_feat = featureData.get_feats_for_chip(cam1_chip) 35 | for cam2_chip in cam2: 36 | cam2_feat = featureData.get_feats_for_chip(cam2_chip) 37 | similarity = comparison(cam1_feat, cam2_feat) 38 | similarities.append((similarity, cam1_chip, cam2_chip)) 39 | similarities.sort(reverse=True) 40 | for i, (similarity, chip1, chip2) in enumerate(similarities): 41 | # return best_match 42 | if chip1.car_id == chip2.car_id: 43 | if verbose: 44 | return i, similarities 45 | else: 46 | return i 47 | raise ValueError("Huh?") 48 | 49 | 50 | # do EXPPERCMC, determine 51 | # featureData - big table to look up data 52 | # experimentGen - function to create experiments 53 | # EXPPERCMC - number of experiments to run for a single CMC 54 | # comparison - function to compare 2 feature vectors 55 | def pre_cmc(featureData, experimentGen, 56 | EXPPERCMC=1000, comparison=comp_cosine): 57 | 58 | num_downs = defaultdict(int) 59 | for i in range(EXPPERCMC): 60 | a = experimentGen.generate() 61 | num_down = is_correct_match(featureData, a[0], a[1], 62 | comparison=comparison) 63 | num_downs[num_down] += 1 64 | 65 | keys = sorted(num_downs) 66 | vals = [num_downs[key] for key in keys] 67 | return((keys, np.array(vals)/EXPPERCMC)) 68 | 69 | 70 | # Generate unprocessed CMC curves 71 | # the data needs to be summed to make the correct 72 | # CMC curve 73 | # featureData - FeatureDataset of chips 74 | # experimentGen - ExperimentGenerator 75 | # NUMCMC - number of CMC to build 76 | # EXPPERCMC - number of experiments run per CMC 77 | # comparison - function that compares two feature vectors returning 78 | # distance measure, 0 -> close big -> far 79 | def repeat_pre_cmc(featureData, experimentGen, NUMCMC=100, 80 | EXPPERCMC=1000, comparison=comp_cosine): 81 | experimentHolder = [] 82 | for experiment in range(NUMCMC): 83 | experimentHolder.append(pre_cmc(featureData, experimentGen, 84 | EXPPERCMC=EXPPERCMC, 85 | comparison=comparison)) 86 | return experimentHolder 87 | 88 | 89 | # finalize creation of the CMC curves 90 | # generate statistics on the CMC curves 91 | # return all 92 | # experimentHolder - array of CMC curves 93 | # itemsPerCamera - number of items on a camera 94 | def make_cmc_stats(experimentHolder, itemsPerCamera): 95 | comparisons = itemsPerCamera*itemsPerCamera 96 | stats = np.zeros((len(experimentHolder), comparisons)) 97 | 98 | for index, (keys, vals) in enumerate(experimentHolder): 99 | for keyIndex in range(len(keys)): 100 | stats[index, keys[keyIndex]] = vals[keyIndex] 101 | 102 | for index in range(len(stats[:, ])): 103 | total_sum = 0.0 104 | offsetlen = len(stats[0]) 105 | for sample in range(offsetlen): 106 | total_sum += stats[index, sample] 107 | stats[index, sample] = total_sum 108 | 109 | gdata = np.zeros((3, comparisons)) 110 | 111 | for i in range(comparisons): 112 | gdata[1, i] = np.average(stats[:, i]) 113 | for i in range(comparisons): 114 | stddev = np.std(stats[:, i]) 115 | gdata[0, i] = gdata[1, i] - stddev 116 | gdata[2, i] = gdata[1, i] + stddev 117 | 118 | return (stats, gdata) 119 | -------------------------------------------------------------------------------- /pelops/analysis/camerautil.py: -------------------------------------------------------------------------------- 1 | """ utilities when working with cameras""" 2 | 3 | from collections import defaultdict 4 | 5 | 6 | def nameit_cam(first, second): 7 | """ 8 | concatenate chip names together in a seperable way 9 | first(chip) - first item 10 | second(chip) - second item 11 | """ 12 | return '{}|{}'.format(first.cam_id, second.cam_id) 13 | 14 | 15 | def nameit_car(first, second): 16 | """ 17 | concatenate chip.car names together in a seperable way 18 | first(chip) - first item 19 | second(chip) - second imte 20 | """ 21 | return '{}|{}'.format(first.car_id, second.car_id) 22 | 23 | 24 | def get_match_id(cameras): 25 | """ 26 | find the car of interest from a set of cameras 27 | 28 | cameras(list(list(chips)))): list of the cameras with cars in each camera 29 | """ 30 | chosendict = defaultdict(int) 31 | for camera in cameras: 32 | for car in camera: 33 | chosendict[car.car_id] += 1 34 | mymax = -1 35 | myid = None 36 | for k in chosendict.keys(): 37 | if chosendict[k] > mymax: 38 | mymax = chosendict[k] 39 | myid = k 40 | return myid 41 | 42 | 43 | def make_good_bad(cameras, car_id): 44 | """ 45 | make a list of cars of interest, and a list of other 46 | 47 | cameras(list(list(chips))): list of the cameras with the cars in each cameras 48 | car_id(): the id of the car of interest 49 | """ 50 | goodlist = list() 51 | bad_list = list() 52 | for camera in cameras: 53 | for car in camera: 54 | if car.car_id == car_id: 55 | goodlist.append(car) 56 | else: 57 | bad_list.append(car) 58 | return (goodlist, bad_list) 59 | 60 | 61 | def glue(vec_a, vec_b): 62 | """ 63 | concatenate two smaller vectors to a larger vector 64 | vec_a : first vector 65 | vec_b : second vector 66 | """ 67 | retval = list() 68 | retval.extend(vec_a) 69 | retval.extend(vec_b) 70 | return retval 71 | -------------------------------------------------------------------------------- /pelops/analysis/comparecameras.py: -------------------------------------------------------------------------------- 1 | """ camera comparison """ 2 | 3 | import itertools 4 | from collections import defaultdict 5 | 6 | import numpy as np 7 | from tqdm import tnrange 8 | 9 | from pelops.analysis.camerautil import (get_match_id, glue, make_good_bad, 10 | nameit_cam, nameit_car) 11 | 12 | 13 | def eval_good_bad(first, second, clf, featuredataset, goodmatches, badmatches, attribute_name): 14 | """ 15 | label examples of good and bad comparisons 16 | 17 | take two chips, concantenate their feature vectors 18 | and create a balanced dataset of matches and differences 19 | 20 | first(Chip): image to evaluate 21 | second(Chip): image to evaluate 22 | clr(classifier): classifier used to evaluate chips 23 | fd(featureDataset): maps chips to features 24 | goodmatches(defaultdictionary(int)): counts of good matches 25 | badmatches(defaultdictionary(int)): counts of bad matches 26 | attribute_name(str): which attribute to pull names from 27 | """ 28 | 29 | namefunc = None 30 | if attribute_name == 'car': 31 | namefunc = nameit_car 32 | else: 33 | namefunc = nameit_cam 34 | 35 | bigvec1 = glue(featuredataset.get_feats_for_chip(first), 36 | featuredataset.get_feats_for_chip(second)) 37 | 38 | bigvec1np = np.array(bigvec1) 39 | #bigvec1np.reshape(1, -1) 40 | 41 | bigvec2 = glue(featuredataset.get_feats_for_chip(second), 42 | featuredataset.get_feats_for_chip(first)) 43 | 44 | bigvec2np = np.array(bigvec2) 45 | # bigvec2np.reshape(1, -1)) 46 | 47 | decision = clf.predict(bigvec1np.reshape(1, -1)) 48 | name = namefunc(first, second) 49 | 50 | tally_decision(decision, goodmatches, name, badmatches) 51 | 52 | decision = clf.predict(bigvec2np.reshape(1, -1)) 53 | name = namefunc(second, first) 54 | 55 | tally_decision(decision, goodmatches, name, badmatches) 56 | 57 | 58 | def tally_decision(decision, goodpic, name, badpic): 59 | """ 60 | count the number of matches for a name 61 | 62 | decision(int): whether the classifier said they matched 63 | goodpic(defaultdict(int)): list of good matches 64 | badpic(defaultdict(int)): list of bad matches 65 | name(str): concatenation of names of first and second pics 66 | """ 67 | if decision == 1: 68 | goodpic[name] += 1 69 | else: 70 | badpic[name] += 1 71 | 72 | 73 | def mad_matrix(examples, clf, featuredataset, examplegenerator, attribute_name='car'): 74 | """ 75 | run examples experiments to see how cars are declaired 76 | the same or different by the clf classifier.abs 77 | 78 | examples(int): number of trials 79 | clf(classifier): classifier to make same/different distinciton 80 | fd(featureDataset) : allows joining of chip to features 81 | eg(experimentGenerator): makes expermients for testing 82 | """ 83 | 84 | ddg = defaultdict(int) 85 | ddb = defaultdict(int) 86 | 87 | for _ in tnrange(examples): 88 | cameras_test = examplegenerator.generate() 89 | match_id = get_match_id(cameras_test) 90 | goods, bads = make_good_bad(cameras_test, match_id) 91 | good0 = goods[0] 92 | good1 = goods[1] 93 | bad0 = bads[0] 94 | bad1 = bads[1] 95 | 96 | eval_good_bad(good0, good1, clf, featuredataset, 97 | ddg, ddb, attribute_name) 98 | eval_good_bad(bad0, bad1, clf, featuredataset, 99 | ddb, ddg, attribute_name) 100 | 101 | return(ddg, ddb) 102 | 103 | 104 | def make_work(fd_train, lessons, outcomes, items, label): 105 | """ 106 | makes a listing of work from chips for classification 107 | 108 | fd_train(featureDataset): training features 109 | lessons(list): feature vectors 110 | outcomes(list): expected outcome for the comparison 111 | items(list(chips)): list of chips for comparison 112 | label(int): expected label for the comparison 113 | """ 114 | workitems = itertools.permutations(items, 2) 115 | for workitem in workitems: 116 | item = glue(fd_train.get_feats_for_chip( 117 | workitem[0]), fd_train.get_feats_for_chip(workitem[1])) 118 | 119 | lessons.append(item) 120 | outcomes.append(label) 121 | 122 | item = glue(fd_train.get_feats_for_chip( 123 | workitem[1]), fd_train.get_feats_for_chip(workitem[0])) 124 | 125 | lessons.append(item) 126 | outcomes.append(label) 127 | -------------------------------------------------------------------------------- /pelops/analysis/isFileImage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "cd 'deep-learning-models/'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from resnet50 import ResNet50\n", 19 | "from keras.preprocessing import image\n", 20 | "from imagenet_utils import preprocess_input, decode_predictions\n", 21 | "import numpy as np\n", 22 | "import json\n", 23 | "import time\n", 24 | "from multiprocessing import Pool\n", 25 | "import functools" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "\n", 35 | "root = '/local_data/dgrossman/dgCars'\n", 36 | "allFiles = 'allImages'\n", 37 | "\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def readTasking(filename):\n", 47 | "\n", 48 | " af = open(allFiles,'r')\n", 49 | " data = list()\n", 50 | " fileProblems = list()\n", 51 | "\n", 52 | " for jline in af:\n", 53 | " jline = jline.strip()\n", 54 | " line = json.loads(jline)\n", 55 | " data.append(line)\n", 56 | " af.close()\n", 57 | " return data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "def procLine2(l, r):\n", 67 | " img_path = '{0}/{1}'.format(r,l['filename'])\n", 68 | " try:\n", 69 | " img = image.load_img(img_path, target_size=(224, 224))\n", 70 | " return (1,l['filename'])\n", 71 | " except:\n", 72 | " return (0,l['filename'])\n", 73 | " \n", 74 | "procLine = functools.partial(procLine2, r=root )" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "time.sleep(60*60*6) # sleep 6 hours then try to do the images" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "p = Pool(32)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "data = readTasking(allFiles)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "a = p.map(procLine,data)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "def goodBad(data):\n", 120 | " good= 0\n", 121 | " bad = 0\n", 122 | " for item in data:\n", 123 | " if item[0]==1:\n", 124 | " good = good + 1\n", 125 | " else:\n", 126 | " bad = bad + 1\n", 127 | " print('good',good,' bad',bad)\n", 128 | " return (good,bad)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "out = goodBad(a)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "bad = list()\n", 147 | "for item in a:\n", 148 | " if item[0] == 0:\n", 149 | " bad.append(item[1])\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "bad" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "newdata = list()\n", 168 | "for d in data:\n", 169 | " if d['filename'] not in bad:\n", 170 | " newdata.append(d)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "length(newdata)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "len(newdata)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "import json\n", 198 | "out = open('allImages','w')\n", 199 | "for d in newdata:\n", 200 | " out.write(json.dumps(d)+'\\n');\n", 201 | "out.close()" 202 | ] 203 | } 204 | ], 205 | "metadata": { 206 | "anaconda-cloud": {}, 207 | "celltoolbar": "Raw Cell Format", 208 | "kernelspec": { 209 | "display_name": "Python 3", 210 | "language": "python", 211 | "name": "python3" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": { 215 | "name": "ipython", 216 | "version": 3 217 | }, 218 | "file_extension": ".py", 219 | "mimetype": "text/x-python", 220 | "name": "python", 221 | "nbconvert_exporter": "python", 222 | "pygments_lexer": "ipython3", 223 | "version": "3.5.2" 224 | }, 225 | "nbpresent": { 226 | "slides": {}, 227 | "themes": { 228 | "default": "197aed3e-040e-45b3-b365-855332b06482", 229 | "theme": {} 230 | } 231 | } 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 1 235 | } 236 | -------------------------------------------------------------------------------- /pelops/analysis/labelImageCars.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys,os,os.path\n", 10 | "import tensorflow as tf\n", 11 | "os.environ['THEANO_FLAGS']='mode=FAST_RUN,device=gpu,floatX=float32'\n", 12 | "from keras import backend as K\n", 13 | "sess = tf.Session()\n", 14 | "K.set_session(sess)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "cd 'deep-learning-models/'" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import json\n", 33 | "import time\n", 34 | "allFiles = 'allImages'\n", 35 | "root = '/local_data/dgrossman/dgCars/'\n", 36 | "af = open(allFiles,'r')\n", 37 | "data = list()\n", 38 | "for d in af:\n", 39 | " d = d.strip()\n", 40 | " data.append(json.loads(d))" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from resnet50 import ResNet50\n", 50 | "from keras.preprocessing import image\n", 51 | "from imagenet_utils import preprocess_input, decode_predictions\n", 52 | "import numpy as np" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "s = time.time()\n", 62 | "model = ResNet50(weights='imagenet')\n", 63 | "print ('loadResNet50',time.time() - s)\n", 64 | "\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "good = list()\n", 74 | "bad = list()\n", 75 | "file = list()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "interesting = set()\n", 85 | "\n", 86 | "for x in ['police_van','moving_van','car','pickup','suv','truck','crossover','van','minivan','sports_car','cab','racer','convertible','car_wheel','jeep','ambulance']:\n", 87 | " interesting.add(x)\n", 88 | "\n", 89 | "count = 0\n", 90 | "im = 0\n", 91 | "#with tf.device('/gpu:0'):\n", 92 | "# print ('time:',time.time())\n", 93 | "s = time.time()\n", 94 | "print ('time:',time.time() - s)\n", 95 | "if True:\n", 96 | " for d in data: \n", 97 | " img_path = '{0}/{1}'.format(root,d['filename'])\n", 98 | " flag = True\n", 99 | " try:\n", 100 | " img = image.load_img(img_path, target_size=(224, 224))\n", 101 | "\n", 102 | " except:\n", 103 | " #print('FILE :',d['filename'])\n", 104 | " file.append(d)\n", 105 | " flag = False\n", 106 | "\n", 107 | " if flag:\n", 108 | " x = image.img_to_array(img)\n", 109 | " x = np.expand_dims(x, axis=0)\n", 110 | " x = preprocess_input(x)\n", 111 | " preds = model.predict(x)\n", 112 | " predictions = decode_predictions(preds)[0][:4]\n", 113 | " #out = ''\n", 114 | " found = False\n", 115 | " for prediction in predictions:\n", 116 | " i,t,score = prediction\n", 117 | " #out = d['filename'], prediction\n", 118 | " if t in interesting:\n", 119 | " #out = 'GOOD'+' ' + d['filename']+' '+t\n", 120 | " good.append((d,t))\n", 121 | " found = True\n", 122 | " break\n", 123 | " if not found:\n", 124 | " bad.append((d,predictions[0][1]))\n", 125 | " #out = 'BAD'+ ' ' + d['filename']+ ' ' + predictions[0][1]\n", 126 | " #print (out)\n", 127 | " #print ('Predicted',decode_predictions(preds)[0][:4],' sec:',time.time() - s )\n", 128 | "\n", 129 | " atOnce = 10000\n", 130 | " if count == atOnce:\n", 131 | " count = 0\n", 132 | " im = im + 1\n", 133 | " z = time.time() - s\n", 134 | " print('processed:',im * atOnce,'Images','good',len(good),'bad',len(bad),'file',len(file),z)\n", 135 | " s = time.time()\n", 136 | " count = count + 1" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "print('processed:',len(good) + len(bad) + len(file),'Images','good',len(good),'bad',len(bad),'file',len(file))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "from collections import defaultdict\n", 155 | "q = defaultdict(int)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "for b in good:\n", 165 | " q[b[1]] = q[b[1]]+1" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "for qq in q:\n", 175 | " print (qq,q[qq])\n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "gfile = open('foundCars','w')\n", 185 | " for g in good:\n", 186 | " gfile.write(g+'\\n')\n", 187 | "gfile.close()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "gfile = open('foundCars','w')\n", 197 | "for g in good:\n", 198 | " dat, classification = g\n", 199 | " dat['resnet50'] = classification\n", 200 | " gfile.write(json.dumps(dat)+'\\n')\n", 201 | "gfile.close()\n" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "good[0]" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "7+2" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [] 228 | } 229 | ], 230 | "metadata": { 231 | "anaconda-cloud": {}, 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.5.2" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 1 252 | } 253 | -------------------------------------------------------------------------------- /pelops/analysis/makeCMCplots.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#set some constants\n", 10 | "ITEMSPERCAMERA = 10\n", 11 | "YRANDOM=1024\n", 12 | "CAMERAS=2\n", 13 | "DROPPED=0\n", 14 | "CMC=100\n", 15 | "EXPERIMENTS=400" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "DATASETFILES = [('/local_data/dgrossman/image_body_type',4),\n", 25 | " ('/local_data/dgrossman/image_color_type',10),\n", 26 | " ('/local_data/dgrossman/image_color_body_type',40),\n", 27 | " ('/local_data/dgrossman/image_make_model_type',1057),\n", 28 | " ('/local_data/dgrossman/resnet50','-1')]\n", 29 | "DATASETFILES = [('/local_data/dgrossman/compcars_color',10),\n", 30 | " ('/local_data/dgrossman/compcars_make_model',284),\n", 31 | " ('/local_data/dgrossman/image_color_type',10),\n", 32 | " ('/local_data/dgrossman/resnet50','-1')]\n", 33 | "DATASETFILES = [('/local_data/dgrossman/resnet50','-1')]" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "from pelops.datasets.featuredataset import FeatureDataset\n", 43 | "from pelops.experiment_api.experiment import ExperimentGenerator\n", 44 | "from pelops.analysis import analysis\n", 45 | "\n", 46 | "alldata = list()\n", 47 | "for datasetfile,num in DATASETFILES:\n", 48 | " #do the math\n", 49 | " print(datasetfile)\n", 50 | " featureData = FeatureDataset(datasetfile)\n", 51 | " expGen = ExperimentGenerator(featureData, CAMERAS, ITEMSPERCAMERA, DROPPED, YRANDOM)\n", 52 | " experimentHolder = analysis.repeat_pre_cmc(featureData,expGen,NUMCMC=CMC,EXPPERCMC=EXPERIMENTS)\n", 53 | " stats,gdata = analysis.make_cmc_stats(experimentHolder,ITEMSPERCAMERA)\n", 54 | " alldata.append(gdata)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "import numpy as np\n", 64 | "stats = np.zeros((100,len(DATASETFILES)))\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "for x in range(len(DATASETFILES)):\n", 74 | " for y in range(100):\n", 75 | " stats[y][x] = alldata[x][1][y]" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "%matplotlib inline\n", 85 | "import matplotlib.pyplot as plt\n", 86 | "\n", 87 | "#make the plots\n", 88 | "fig = plt.figure()\n", 89 | "ax = plt.subplot(111)\n", 90 | "\n", 91 | "ax.plot(stats)\n", 92 | "#plt.title('color: 10\\ncolor mixed with structure: 40 \\n structure only: 4, 1057\\n CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n", 93 | "plt.title('compcars color:10\\ncompcars make model:284\\ndgcars color:10\\nuntrained resnet\\nCMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n", 94 | "plt.grid(True)\n", 95 | "#ax.legend(('4','10','40','1057','untrained'),bbox_to_anchor=(1, -0.05),fancybox=True, shadow=True, ncol=5)\n", 96 | "ax.legend(('10cc','284cc','10dg','untrained'),bbox_to_anchor=(1, -0.05),fancybox=True, shadow=True, ncol=5)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "%matplotlib inline\n", 106 | "import matplotlib.pyplot as plt\n", 107 | "\n", 108 | "#make the plots\n", 109 | "fig = plt.figure()\n", 110 | "ax = plt.subplot(111)\n", 111 | "\n", 112 | "ax.plot(stats)\n", 113 | "plt.title('[color out performs structure]\\n CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n", 114 | "plt.grid(True)\n", 115 | "ax.legend(('4','10','40','1057 classes'),bbox_to_anchor=(1, -0.05),\n", 116 | " fancybox=True, shadow=True, ncol=5)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [] 125 | } 126 | ], 127 | "metadata": { 128 | "anaconda-cloud": {}, 129 | "kernelspec": { 130 | "display_name": "Python 3", 131 | "language": "python", 132 | "name": "python3" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 3 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython3", 144 | "version": "3.5.2" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 2 149 | } 150 | -------------------------------------------------------------------------------- /pelops/analysis/makeFeatureFiles-TEST.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pelops.datasets.veri import VeriDataset\n", 10 | "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n", 11 | "import pelops.utils as utils\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n", 21 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n", 22 | "layer = 'avg_pool'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 32 | " set_type=utils.SetType.TEST.value)\n", 33 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_body_type_TEST',\n", 34 | " model_output_file,\n", 35 | " weights_output_file,\n", 36 | " layer)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n", 46 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n", 47 | "layer = 'avg_pool'" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 57 | " set_type=utils.SetType.TEST.value)\n", 58 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_type_TEST',\n", 59 | " model_output_file,\n", 60 | " weights_output_file,\n", 61 | " layer)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n", 71 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n", 72 | "layer = 'avg_pool'" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 82 | " set_type=utils.SetType.TEST.value)\n", 83 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_body_type_TEST',\n", 84 | " model_output_file,\n", 85 | " weights_output_file,\n", 86 | " layer)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n", 96 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n", 97 | "layer = 'avg_pool'" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 107 | " set_type=utils.SetType.TEST.value)\n", 108 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_make_model_type_TEST',\n", 109 | " model_output_file,\n", 110 | " weights_output_file,\n", 111 | " layer)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.model.json'\n", 121 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.weights.hdf5'\n", 122 | "layer = 'avg_pool'" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n", 132 | "extract_feats_from_chips(veri, '/local_data/dgrossman/resnet50_TEST',\n", 133 | " model_output_file,\n", 134 | " weights_output_file,\n", 135 | " layer)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n", 145 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n", 146 | "layer = 'avg_pool'" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n", 156 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_make_model_TEST',\n", 157 | " model_output_file,\n", 158 | " weights_output_file,\n", 159 | " layer)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n", 169 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n", 170 | "layer = 'avg_pool'" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n", 180 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_color_TEST',\n", 181 | " model_output_file,\n", 182 | " weights_output_file,\n", 183 | " layer)" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "anaconda-cloud": {}, 189 | "kernelspec": { 190 | "display_name": "Python 3", 191 | "language": "python", 192 | "name": "python3" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": { 196 | "name": "ipython", 197 | "version": 3 198 | }, 199 | "file_extension": ".py", 200 | "mimetype": "text/x-python", 201 | "name": "python", 202 | "nbconvert_exporter": "python", 203 | "pygments_lexer": "ipython3", 204 | "version": "3.5.2" 205 | } 206 | }, 207 | "nbformat": 4, 208 | "nbformat_minor": 2 209 | } 210 | -------------------------------------------------------------------------------- /pelops/analysis/makeFeatureFiles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pelops.datasets.veri import VeriDataset\n", 10 | "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n", 11 | "import pelops.utils as utils\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n", 21 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n", 22 | "layer = 'avg_pool'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 32 | " set_type=utils.SetType.TRAIN.value)\n", 33 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_body_type',\n", 34 | " model_output_file,\n", 35 | " weights_output_file,\n", 36 | " layer)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n", 46 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n", 47 | "layer = 'avg_pool'" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 57 | " set_type=utils.SetType.TRAIN.value)\n", 58 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_type',\n", 59 | " model_output_file,\n", 60 | " weights_output_file,\n", 61 | " layer)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n", 71 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n", 72 | "layer = 'avg_pool'" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 82 | " set_type=utils.SetType.TRAIN.value)\n", 83 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_body_type',\n", 84 | " model_output_file,\n", 85 | " weights_output_file,\n", 86 | " layer)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n", 96 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n", 97 | "layer = 'avg_pool'" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n", 107 | " set_type=utils.SetType.TRAIN.value)\n", 108 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_make_model_type',\n", 109 | " model_output_file,\n", 110 | " weights_output_file,\n", 111 | " layer)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.model.json'\n", 121 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.weights.hdf5'\n", 122 | "layer = 'avg_pool'" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n", 132 | "extract_feats_from_chips(veri, '/local_data/dgrossman/resnet50',\n", 133 | " model_output_file,\n", 134 | " weights_output_file,\n", 135 | " layer)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n", 145 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n", 146 | "layer = 'avg_pool'" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n", 156 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_make_model',\n", 157 | " model_output_file,\n", 158 | " weights_output_file,\n", 159 | " layer)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n", 169 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n", 170 | "layer = 'avg_pool'" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n", 180 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_color',\n", 181 | " model_output_file,\n", 182 | " weights_output_file,\n", 183 | " layer)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "1+1\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [] 201 | } 202 | ], 203 | "metadata": { 204 | "anaconda-cloud": {}, 205 | "kernelspec": { 206 | "display_name": "Python 3", 207 | "language": "python", 208 | "name": "python3" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.5.2" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /pelops/analysis/makeVeri.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pelops.datasets.veri import VeriDataset 3 | from pelops.etl.extract_feats_from_chips import extract_feats_from_chips 4 | 5 | # make the stuff that we run on 6 | if __name__ == '__main__': 7 | # path to the veri dataset 8 | v_file_name = sys.argv[0] 9 | 10 | # filename of where to place the output 11 | out_file_name = sys.argv[1] 12 | 13 | veri = VeriDataset(v_file_name) 14 | extract_feats_from_chips(veri, out_file_name) 15 | -------------------------------------------------------------------------------- /pelops/analysis/recomputeCorpus.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pelops.analysis.unsorted.recompute.compute import do_training" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "tbld = '/local_data/dgrossman/tensorboard_logs'\n", 19 | "mcfs = '/local_data/dgrossman/model_save_dir/dg_carsweights.{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5'\n", 20 | "batch_size=32\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "training_basepath = '/local_data/dgrossman/keras/make_model/train'\n", 30 | "validation_basepath = '/local_data/dgrossman/keras/make_model/validate'\n", 31 | "\n", 32 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n", 33 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n", 34 | "\n", 35 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "training_basepath = '/local_data/dgrossman/keras/color/train'\n", 45 | "validation_basepath = '/local_data/dgrossman/keras/color/validate'\n", 46 | "\n", 47 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n", 48 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n", 49 | "\n", 50 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "training_basepath = '/local_data/dgrossman/keras/color_body_type/train'\n", 60 | "validation_basepath = '/local_data/dgrossman/keras/color_body_type/validate'\n", 61 | "\n", 62 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n", 63 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n", 64 | "\n", 65 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "training_basepath = '/local_data/dgrossman/keras/body_type/train'\n", 75 | "validation_basepath = '/local_data/dgrossman/keras/body_type/validate'\n", 76 | "\n", 77 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n", 78 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n", 79 | "\n", 80 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "training_basepath = '/local_data/teams/pelops/compcars_keras/make_model/train'\n", 90 | "validation_basepath = '/local_data/teams/pelops/compcars_keras/make_model/test'\n", 91 | "\n", 92 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n", 93 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n", 94 | "\n", 95 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "training_basepath = '/local_data/dgrossman/compcars/colors/train'\n", 105 | "validation_basepath = '/local_data/dgrossman/compcars/colors/test'\n", 106 | "\n", 107 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n", 108 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n", 109 | "\n", 110 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.5.2" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 2 135 | } 136 | -------------------------------------------------------------------------------- /pelops/analysis/saveExtractFeatsFromChips.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pelops.datasets.veri import VeriDataset\n", 10 | "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n", 11 | "import pelops.utils as utils\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "veri = VeriDataset('/local_data/dgrossman/newVeRi',set_type=utils.SetType.TRAIN.value)\n", 21 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TRAIN')" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "veri = VeriDataset('/local_data/dgrossman/newVeRi',set_type=utils.SetType.TEST.value)\n", 31 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TEST')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "veri = VeriDataset('/local_data/dgrossman/tinyVeRi',set_type=utils.SetType.TRAIN.value)\n", 48 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TINY_TRAIN')" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "veri = VeriDataset('/local_data/dgrossman/tinyVeRi',set_type=utils.SetType.TEST.value)\n", 58 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TINY_TEST')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [] 67 | } 68 | ], 69 | "metadata": { 70 | "anaconda-cloud": {}, 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.5.2" 87 | } 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 2 91 | } 92 | -------------------------------------------------------------------------------- /pelops/analysis/splitDataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "from random import shuffle\n", 11 | "import glob\n", 12 | "import shutil\n", 13 | "import tqdm" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def make_dir(path):\n", 23 | " if not os.path.exists(path):\n", 24 | " os.makedirs(path)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "def move_link(src,dst):\n", 34 | " real_src = os.path.realpath(src)\n", 35 | " #print(real_src,dst)\n", 36 | " os.symlink(real_src,dst)\n", 37 | " os.unlink(src)\n", 38 | " #os.rename(src,dst)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "def prep_datasets(srcpath,destpath,percent=0.3):\n", 48 | " \n", 49 | " if percent >1 or percent < 0:\n", 50 | " print ('bad')\n", 51 | " raise ValueError('percent needs to be in [0,1]')\n", 52 | " found = 0\n", 53 | " moved = 0\n", 54 | " for image_class_filepath in tqdm.tqdm(glob.glob(os.path.join(srcpath, '*'))):\n", 55 | " \n", 56 | " if os.path.isdir(image_class_filepath):\n", 57 | " image_class_num = int(os.path.basename(image_class_filepath))\n", 58 | " \n", 59 | " directory_name = os.path.join(destpath, '{}'.format(image_class_num))\n", 60 | " #print(directory_name)\n", 61 | " make_dir(directory_name)\n", 62 | " \n", 63 | " dir_contents = list()\n", 64 | " \n", 65 | " for filename in glob.glob(os.path.join(image_class_filepath, '*')):\n", 66 | " found+=1\n", 67 | " dir_contents.append(filename)\n", 68 | " \n", 69 | " \n", 70 | " shuffle(dir_contents)\n", 71 | " threshold = int (percent * len(dir_contents))\n", 72 | " mixed = dir_contents[:threshold]\n", 73 | " for filename in mixed:\n", 74 | " moved +=1\n", 75 | " #print ('filename:',os.path.basename(filename))\n", 76 | " src = os.path.join(srcpath,'{}'.format(image_class_num),filename)\n", 77 | " #print(directory_name,filename)\n", 78 | " dst = os.path.join(directory_name,os.path.basename(filename))\n", 79 | " #print('src:{0}\\ndst:{1}'.format(src,dst))\n", 80 | " move_link(src,dst)\n", 81 | " print('total:',found,'moved:',moved,'remains:',found-moved)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "train = '/local_data/dgrossman/keras/color/train'\n", 91 | "test = '/local_data/dgrossman/keras/color/test'\n", 92 | "validate = '/local_data/dgrossman/keras/color/validate'" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "prep_datasets(train,test,0.3)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "prep_datasets(test,validate,0.3)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.5.2" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /pelops/analysis/test_analysis.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pelops.analysis import analysis 3 | 4 | 5 | class experimentGen(): 6 | def __init__(self): 7 | self.fd = featureData() 8 | self.experiment = list() 9 | c1 = ['a', 'b', 'c', 'd'] 10 | c2 = ['a', 'e', 'f', 'g'] 11 | cam1 = list() 12 | cam2 = list() 13 | 14 | for c in c1: 15 | cam1.append(self.fd.getchip(c)) 16 | 17 | for c in c2: 18 | cam2.append(self.fd.getchip(c)) 19 | 20 | self.experiment.append(cam1) 21 | self.experiment.append(cam2) 22 | 23 | def generate(self): 24 | return self.experiment 25 | 26 | 27 | class chip(): 28 | def __init__(self, x): 29 | self.car_id = x[0] 30 | self.feature = x[1] 31 | 32 | 33 | class featureData(): 34 | def __init__(self): 35 | self.data = list() 36 | 37 | fun = [('a', [1, 2, 3, 4, 5, 6, 7]), 38 | ('b', [10, 20, 30, 40, 11, 9, 2.7]), 39 | ('c', [100, 20, 30, 40, 11, 9, 2.7]), 40 | ('d', [10, 200, 30, 40, 11, 9, 2.7]), 41 | ('e', [10, 20, 300, 40, 11, 9, 2.7]), 42 | ('f', [10, 20, 30, 400, 11, 9, 2.7]), 43 | ('g', [10, 20, 30, 40, 110, 9, 2.7]), 44 | ('h', [10, 20, 30, 40, 11, 90, 2.7]), 45 | ('i', [10, 20, 30, 40, 11, 9, 27.0])] 46 | for f in fun: 47 | self.data.append(chip(f)) 48 | 49 | def get_feats_for_chip(self, chip): 50 | for d in self.data: 51 | if d.car_id == chip.car_id: 52 | return d.feature 53 | 54 | def getchip(self, id): 55 | for d in self.data: 56 | if d.car_id == id: 57 | return d 58 | 59 | # test the comparisons 60 | 61 | 62 | def test_cosine(): 63 | a = [1, 2, 3, 4, 5, 6, 7] 64 | b = [10, 20, 30, 40, 11, 9, 2.7] 65 | out = analysis.comp_cosine(a, b) 66 | assert(abs(out - 0.63837193721375185) < 0.0000001) 67 | 68 | 69 | def test_euclidean(): 70 | a = [1, 2, 3, 4, 5, 6, 7] 71 | b = [10, 20, 30, 40, 11, 9, 2.7] 72 | out = analysis.comp_euclid(a, b) 73 | assert(abs(out - 49.93485756463114) < 0.0000001) 74 | 75 | # test the matching works correctly 76 | 77 | 78 | def test_is_correct_match(): 79 | fd = featureData() 80 | 81 | c1 = ['a', 'b', 'c', 'd'] 82 | c2 = ['a', 'e', 'f', 'g'] 83 | cam1 = list() 84 | cam2 = list() 85 | 86 | for c in c1: 87 | cam1.append(fd.getchip(c)) 88 | 89 | for c in c2: 90 | cam2.append(fd.getchip(c)) 91 | 92 | out = analysis.is_correct_match(fd, cam1, cam2) 93 | assert (out == 0) 94 | 95 | 96 | def test_pre_cmc(): 97 | eg = experimentGen() 98 | fd = featureData() 99 | keys, values = analysis.pre_cmc(fd, eg, EXPPERCMC=10) 100 | assert values[0] == 1.0 101 | 102 | 103 | #test the statistics are being generated correctly 104 | def test_make_cmc_stats(): 105 | eg = experimentGen() 106 | fd = featureData() 107 | experimentHolder = analysis.repeat_pre_cmc(fd, eg, NUMCMC=10, EXPPERCMC=10) 108 | stats, gdata = analysis.make_cmc_stats(experimentHolder, 4) 109 | 110 | for x in range(len(gdata[0])): 111 | assert ( gdata[1][x] ==gdata[2][x] == gdata[0][x]) 112 | -------------------------------------------------------------------------------- /pelops/analysis/unsorted/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/unsorted/__init__.py -------------------------------------------------------------------------------- /pelops/analysis/unsorted/makeH5pyFile.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "cd '/local_data/dgrossman/VeRi/'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import h5py\n", 19 | "import json\n", 20 | "import numpy as np\n", 21 | "\n", 22 | "def makeJsonList(fileName):\n", 23 | " retval = list()\n", 24 | " with open(fileName,'r') as f:\n", 25 | " for line in f:\n", 26 | " line = line.strip()\n", 27 | " line = json.loads(line)\n", 28 | " retval.append(line)\n", 29 | " return retval\n", 30 | "\n", 31 | "def extractColumn(colName,jsonList,t):\n", 32 | " retval = list()\n", 33 | " for line in jsonList:\n", 34 | " if t == str:\n", 35 | " retval.append(str(line[colName]).encode('ascii','ignore'))\n", 36 | " if t == int:\n", 37 | " retval.append(int(line[colName]))\n", 38 | " if t == float:\n", 39 | " for element in line[colName]:\n", 40 | " retval.append(float(element))\n", 41 | " return retval\n", 42 | "\n", 43 | "def make5file(file5Name, names, jsonList):\n", 44 | " with h5py.File(file5Name,'w') as f: \n", 45 | " for o, i, t, t2 in names:\n", 46 | " print(o,i)\n", 47 | " temp = extractColumn(o,jsonList,t)\n", 48 | " f.create_dataset(i,data=temp,dtype=t2)\n", 49 | "\n", 50 | "def main(inFileName,outFileName):\n", 51 | " jsonList = makeJsonList(inFileName)\n", 52 | " f = np.dtype('float')\n", 53 | " c = h5py.special_dtype(vlen=bytes)\n", 54 | " names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]\n", 55 | " make5file(outFileNAme,names,jsonList)\n", 56 | " \n", 57 | "if __name__ == '__main__':\n", 58 | " main(sys.argv[1],sys.argv[2])" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "main('./test_uniqfile.json','./test_uniqfile.p5')" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "jsonList[0].keys()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "f = np.dtype('float')\n", 86 | "c = h5py.special_dtype(vlen=bytes)\n", 87 | "names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "make5file('1test_features',names,jsonList)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [] 105 | } 106 | ], 107 | "metadata": { 108 | "anaconda-cloud": {}, 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.5.2" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 1 129 | } 130 | -------------------------------------------------------------------------------- /pelops/analysis/unsorted/recompute/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/unsorted/recompute/__init__.py -------------------------------------------------------------------------------- /pelops/analysis/unsorted/recompute/extract_feats_from_chips.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.applications.resnet50 import preprocess_input 3 | from keras.applications.resnet50 import ResNet50 4 | from keras.models import Model 5 | from keras.models import model_from_json 6 | from keras.preprocessing import image 7 | 8 | from pelops.datasets.featuredataset import FeatureDataset 9 | 10 | 11 | def load_image(img_path, resizex=224, resizey=224): 12 | data = image.load_img(img_path, target_size=(resizex, resizey)) 13 | x = image.img_to_array(data) 14 | x = np.expand_dims(x, axis=0) 15 | x = preprocess_input(x) 16 | return x 17 | 18 | 19 | def save_model_workaround(model, model_file, weight_file): 20 | # serialize model to JSON 21 | model_json = model.to_json() 22 | with open(model_file, 'w') as json_file: 23 | json_file.write(model_json) 24 | # serialize weights to HDF5 25 | model.save_weights(weight_file) 26 | 27 | 28 | def load_model_workaround(model_file, weight_file): 29 | # load json and create model 30 | json_file = open(model_file, 'r') 31 | loaded_model_json = json_file.read() 32 | json_file.close() 33 | loaded_model = model_from_json(loaded_model_json) 34 | # load weights into new model 35 | loaded_model.load_weights(weight_file) 36 | return loaded_model 37 | 38 | # load the imagenet networks 39 | 40 | 41 | def get_models(model_file, weight_file, layer): 42 | # include_top needs to be True for this to work 43 | base_model = load_model_workaround(model_file, weight_file) 44 | output_layer = base_model.get_layer(layer) 45 | output_layer = output_layer.output 46 | model = Model(input=base_model.input, output=output_layer) 47 | # output=base_model.get_layer('flatten_1').output) 48 | return (model, base_model) 49 | 50 | # return feature vector for a given img, and model 51 | 52 | 53 | def image_features(img, model): 54 | features = model.predict(img) 55 | return features 56 | 57 | 58 | def extract_feats_from_chips(chipdataset, output_fname, model_file, weight_file, layer): 59 | model, base_model = get_models(model_file, weight_file, layer) 60 | 61 | features = np.zeros((len(chipdataset), 2048), dtype=np.float16) 62 | chips = [] 63 | chip_keys = [] 64 | for index, (chip_key, chip) in enumerate(chipdataset.chips.items()): 65 | chip_keys.append(chip_key) 66 | chips.append(chip) 67 | img_path = chip.filepath 68 | img_data = load_image(img_path) 69 | features[index] = image_features(img_data, model) 70 | 71 | FeatureDataset.save(output_fname, chip_keys, chips, features) 72 | return True 73 | -------------------------------------------------------------------------------- /pelops/const.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class _Const: 4 | """ Create a constant class 5 | """ 6 | class ConstError(TypeError): pass 7 | 8 | def __setattr__(self, name, value): 9 | if name in self.__dict__: 10 | raise self.ConstError("Cannot rebind constant {}".format(name)) 11 | self. __dict__[name] = value 12 | 13 | def __delattr__(self, name): 14 | if name in self.__dict__: 15 | raise self.ConstError("Cannot unbind constant {}".format(name)) 16 | raise NameError(name) 17 | 18 | sys.modules[__name__] = _Const() -------------------------------------------------------------------------------- /pelops/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/datasets/__init__.py -------------------------------------------------------------------------------- /pelops/datasets/chip.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import collections 3 | import os 4 | import pelops.utils as utils 5 | 6 | # ================================================================================ 7 | # Chip Factory 8 | # ================================================================================ 9 | 10 | 11 | class DatasetFactory(object): 12 | @staticmethod 13 | def create_dataset(dataset_type, dataset_path, set_type=None): 14 | for cls in ChipDataset.__subclasses__(): 15 | if cls.check_dataset_type(dataset_type): 16 | return cls(dataset_path, set_type) 17 | 18 | # ================================================================================ 19 | # Chip Dataset 20 | # ================================================================================ 21 | 22 | 23 | class ChipDataset(metaclass = abc.ABCMeta): 24 | def __init__(self, dataset_path, set_type=None): 25 | self.dataset_path = dataset_path 26 | self.__set_set_type(set_type) 27 | self.chips = dict() 28 | self.chips_by_cam_id = None 29 | self.chips_by_car_id = None 30 | 31 | def __set_set_type(self, set_type): 32 | self.set_type = None 33 | 34 | # The Default ALL 35 | if set_type is None: 36 | self.set_type = utils.SetType.ALL 37 | 38 | # If passed a SetType 39 | if isinstance(set_type, utils.SetType): 40 | self.set_type = set_type 41 | 42 | # If passed a string 43 | if isinstance(set_type, str): 44 | set_type = set_type.lower() 45 | for st in utils.SetType: 46 | if set_type == st.value: 47 | self.set_type = st 48 | 49 | if self.set_type is None: 50 | raise ValueError("set_type is not a valid string or SetType enum") 51 | 52 | 53 | @classmethod 54 | def check_dataset_type(self, dataset_type): 55 | return dataset_type == self.__name__ 56 | 57 | def get_all_chips_by_car_id(self, car_id): 58 | if self.chips_by_car_id is None: 59 | self.chips_by_car_id = collections.defaultdict(list) 60 | for chip_key, chip in self.chips.items(): 61 | self.chips_by_car_id[chip.car_id].append(chip_key) 62 | return [self.chips[chip_key] for chip_key in self.chips_by_car_id[car_id]] 63 | 64 | def get_all_chips_by_car_id_camera_id(self, car_id, cam_id): 65 | output = [] 66 | for chip in self.get_all_chips_by_car_id(car_id): 67 | if chip.cam_id == cam_id: 68 | output.append(chip) 69 | return output 70 | 71 | def get_all_chips_by_cam_id(self, cam_id): 72 | if self.chips_by_cam_id is None: 73 | self.chips_by_cam_id = collections.defaultdict(list) 74 | for chip_key, chip in self.chips.items(): 75 | self.chips_by_cam_id[chip.cam_id].append(chip_key) 76 | 77 | return [self.chips[chip_key] for chip_key in self.chips_by_cam_id[cam_id]] 78 | 79 | def get_distinct_cams_by_car_id(self, car_id): 80 | # TODO: Look at performance 81 | return self.get_distinct_cams_per_car()[car_id] 82 | 83 | def get_distinct_cams_per_car(self): 84 | # TODO: Look at performance 85 | list_of_cameras_per_car = collections.defaultdict(set) 86 | for chip in self.chips.values(): 87 | list_of_cameras_per_car[chip.car_id].add(chip.cam_id) 88 | return list_of_cameras_per_car 89 | 90 | def get_all_cam_ids(self): 91 | return list(set(chip.cam_id for chip in self.chips.values())) 92 | 93 | def get_all_car_ids(self): 94 | return list(set(chip.car_id for chip in self.chips.values())) 95 | 96 | def __iter__(self): 97 | for chip in self.chips.values(): 98 | yield chip 99 | raise StopIteration() 100 | 101 | def __len__(self): 102 | return len(self.chips) 103 | 104 | # ================================================================================ 105 | # Chip Base 106 | # ================================================================================ 107 | 108 | 109 | # chip_id is the filepath 110 | Chip = collections.namedtuple("Chip", 111 | ["filepath", 112 | "car_id", 113 | "cam_id", 114 | "time", 115 | "misc"]) 116 | -------------------------------------------------------------------------------- /pelops/datasets/compcar.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import os 3 | import scipy.io 4 | 5 | import pelops.datasets.chip as chip 6 | import pelops.utils as utils 7 | 8 | 9 | class CompcarDataset(chip.ChipDataset): 10 | filenames = collections.namedtuple( 11 | "filenames", 12 | [ 13 | "image_dir", 14 | "name_train", 15 | "name_test", 16 | "model_mat", 17 | "color_mat", 18 | ] 19 | ) 20 | filepaths = filenames ( 21 | "image", 22 | "train_surveillance.txt", 23 | "test_surveillance.txt", 24 | "sv_make_model_name.mat", 25 | "color_list.mat", 26 | ) 27 | 28 | def __init__(self, dataset_path, set_type=None): 29 | super().__init__(dataset_path, set_type) 30 | self.__set_filepaths() # set self.__filepaths 31 | self.__extract_color_labels() # set self.__color_map 32 | self.__extract_model_labels() # set self.__model_map 33 | self.__set_chips() 34 | 35 | def __set_filepaths(self): 36 | self.__filepaths = self.filenames( 37 | os.path.join(self.dataset_path, CompcarDataset.filepaths.image_dir), 38 | os.path.join(self.dataset_path, CompcarDataset.filepaths.name_train), 39 | os.path.join(self.dataset_path, CompcarDataset.filepaths.name_test), 40 | os.path.join(self.dataset_path, CompcarDataset.filepaths.model_mat), 41 | os.path.join(self.dataset_path, CompcarDataset.filepaths.color_mat), 42 | ) 43 | 44 | def __extract_color_labels(self): 45 | self.__color_map = {} 46 | 47 | # Map color_id to its respective name 48 | color_map = { 49 | -1: None, 50 | 0: "black", 51 | 1: "white", 52 | 2: "red", 53 | 3: "yellow", 54 | 4: "blue", 55 | 5: "green", 56 | 6: "purple", 57 | 7: "brown", 58 | 8: "champagne", 59 | 9: "silver", 60 | } 61 | 62 | # Load the matrix of colors 63 | color_matrix = scipy.io.loadmat( 64 | self.__filepaths.color_mat)["color_list"] 65 | 66 | # File is an length 1 array, color_num is a 1x1 matrix 67 | for file_array, color_num_matrix in color_matrix: 68 | filepath = file_array[0] 69 | color_num = int(color_num_matrix[0][0]) 70 | self.__color_map[filepath] = color_map[color_num] 71 | 72 | def __extract_model_labels(self): 73 | self.__model_map = {} 74 | 75 | model_matrix = scipy.io.loadmat( 76 | self.__filepaths.model_mat)["sv_make_model_name"] 77 | for car_id, model_matrix in enumerate(model_matrix): 78 | # correct car_id 79 | car_id = int(car_id) + 1 80 | # make contains only the make of the car and occasionally contains whitespaces after 81 | make = model_matrix[0][0].strip() 82 | # correct instance when make is misspelled that affects the model 83 | if make == "Zoyte": 84 | make = "Zotye" 85 | # model sometimes contains both make and model, so ensure that model only contains model 86 | make_and_model = model_matrix[1][0] 87 | model = make_and_model.replace(make, "").strip() 88 | # model_id contains the model id used in the web 89 | model_id = int(model_matrix[2][0][0]) 90 | # correct instance when make is misspelled 91 | if make == "BWM": 92 | make = "BMW" 93 | self.__model_map[car_id] = [make, model, model_id] 94 | 95 | def __set_chips(self): 96 | # identify all the chips, default query to all 97 | all_names_filepaths = { 98 | utils.SetType.ALL: [self.__filepaths.name_test, self.__filepaths.name_train], 99 | utils.SetType.TEST: [self.__filepaths.name_test], 100 | utils.SetType.TRAIN: [self.__filepaths.name_train], 101 | }.get(self.set_type, [self.__filepaths.name_test, self.__filepaths.name_train]) 102 | # create chip objects based on the names listed in the files 103 | for name_filepath in all_names_filepaths: 104 | for name in open(name_filepath): 105 | current_chip = self.__create_chip(self.__filepaths.image_dir, name.strip()) 106 | self.chips[current_chip.filepath] = current_chip 107 | 108 | def __create_chip(self, img_dir, img_name): 109 | splitter = img_name.split("/") 110 | misc = dict() 111 | 112 | filepath = os.path.join(img_dir, img_name) 113 | car_id = int(splitter[0]) 114 | cam_id = None 115 | time = None 116 | misc["color"] = self.__color_map[img_name] 117 | make, model, model_id = self.__model_map[car_id] 118 | misc["make"] = make 119 | misc["model"] = model 120 | misc["model_id"] = model_id 121 | 122 | return chip.Chip(filepath, car_id, cam_id, time, misc) 123 | -------------------------------------------------------------------------------- /pelops/datasets/dgcars.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import json 3 | import os.path 4 | 5 | import pelops.datasets.chip as chip 6 | import pelops.utils as utils 7 | 8 | 9 | class DGCarsDataset(chip.ChipDataset): 10 | filenames = collections.namedtuple( 11 | "filenames", 12 | [ 13 | "all_list", 14 | "train_list", 15 | "test_list", 16 | ] 17 | ) 18 | filepaths = filenames( 19 | "allFiles", 20 | "training", 21 | "testing", 22 | ) 23 | 24 | def __init__(self, dataset_path, set_type=None): 25 | super().__init__(dataset_path, set_type) 26 | self.__set_filepaths() # set self.__filepaths 27 | self.__set_chips() 28 | 29 | def __set_filepaths(self): 30 | self.__filepaths = self.filenames( 31 | os.path.join(self.dataset_path, DGCarsDataset.filepaths.all_list), 32 | os.path.join(self.dataset_path, DGCarsDataset.filepaths.train_list), 33 | os.path.join(self.dataset_path, DGCarsDataset.filepaths.test_list), 34 | ) 35 | 36 | def __set_chips(self): 37 | # identify all the chips, default query to all 38 | name_filepath = { 39 | utils.SetType.ALL: self.__filepaths.all_list, 40 | utils.SetType.TEST: self.__filepaths.test_list, 41 | utils.SetType.TRAIN: self.__filepaths.train_list, 42 | }.get(self.set_type, self.__filepaths.all_list) 43 | 44 | # create chip objects based on the names listed in the files 45 | for dg_chip in utils.read_json(name_filepath): 46 | filepath = os.path.normpath(os.path.join(self.dataset_path, dg_chip["filename"])) 47 | car_id = None 48 | cam_id = None 49 | time = None 50 | misc = dg_chip 51 | current_chip = chip.Chip(filepath, car_id, cam_id, time, misc) 52 | 53 | self.chips[filepath] = current_chip 54 | -------------------------------------------------------------------------------- /pelops/datasets/featuredataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import datetime 3 | import h5py 4 | import numpy as np 5 | from pelops.datasets.chip import ChipDataset, Chip 6 | 7 | class FeatureDataset(ChipDataset): 8 | def __init__(self, filename): 9 | super().__init__(filename) 10 | self.chip_index_lookup, self.chips, self.feats = self.load(filename) 11 | self.filename_lookup = {} 12 | for chip_key, chip in self.chips.items(): 13 | self.filename_lookup[chip.filepath] = chip_key 14 | 15 | def get_feats_for_chip(self, chip): 16 | chip_key = self.filename_lookup[chip.filepath] 17 | return self.feats[self.chip_index_lookup[chip_key]] 18 | 19 | @staticmethod 20 | def load(filename): 21 | with h5py.File(filename) as fIn: 22 | feats = np.array(fIn['feats']) 23 | 24 | num_items = fIn['feats'].shape[0] 25 | # Hack to deal with performance of extracting single items 26 | local_hdf5 = {} 27 | local_hdf5['chip_keys'] = np.array(fIn['chip_keys']) 28 | local_hdf5['filepath'] = np.array(fIn['filepath']) 29 | local_hdf5['car_id'] = np.array(fIn['car_id']) 30 | local_hdf5['cam_id'] = np.array(fIn['cam_id']) 31 | local_hdf5['time'] = np.array(fIn['time']) 32 | local_hdf5['misc'] = np.array(fIn['misc']) 33 | 34 | chips = {} 35 | chip_index_lookup = {} 36 | for i in range(num_items): 37 | filepath = local_hdf5['filepath'][i].decode('utf-8') 38 | car_id = local_hdf5['car_id'][i] 39 | cam_id = local_hdf5['cam_id'][i] 40 | timestamp = local_hdf5['time'][i] 41 | if isinstance(timestamp, str) or isinstance(timestamp, bytes): 42 | # Catch the case where we have encoded time as a string timestamp 43 | timestamp = datetime.datetime.fromtimestamp(float(timestamp)) 44 | misc = json.loads(local_hdf5['misc'][i].decode('utf-8')) 45 | chip_key = local_hdf5['chip_keys'][i] 46 | if isinstance(chip_key, bytes): 47 | chip_key = chip_key.decode('utf-8') 48 | chip_index_lookup[chip_key] = i 49 | chips[chip_key] = Chip(filepath, car_id, cam_id, timestamp, misc) 50 | return chip_index_lookup, chips, feats 51 | 52 | @staticmethod 53 | def _save_field(fOut, field_example, field_name, value_array): 54 | if isinstance(field_example, datetime.datetime): 55 | # Encode time as a string seconds since epoch 56 | times = np.array([str(val.timestamp()).encode('ascii', 'ignore') for val in value_array]) 57 | fOut.create_dataset(field_name, 58 | data=times, 59 | dtype=h5py.special_dtype(vlen=bytes)) 60 | elif isinstance(field_example, str): 61 | output_vals = [val.encode('ascii', 'ignore') for val in value_array] 62 | fOut.create_dataset(field_name, 63 | data= output_vals, 64 | dtype=h5py.special_dtype(vlen=bytes)) 65 | elif isinstance(field_example, dict): 66 | output_vals = [json.dumps(val).encode('ascii', 'ignore') for val in value_array] 67 | fOut.create_dataset(field_name, 68 | data=output_vals, 69 | dtype=h5py.special_dtype(vlen=bytes)) 70 | else: 71 | fOut.create_dataset(field_name, data=value_array) 72 | 73 | @staticmethod 74 | def save(filename, chip_keys, chips, features): 75 | """ Save a feature dataset 76 | """ 77 | with h5py.File(filename, 'w') as fOut: 78 | fOut.create_dataset('feats', data=features) 79 | 80 | FeatureDataset._save_field(fOut, 81 | chip_keys[0], 82 | 'chip_keys', 83 | chip_keys) 84 | 85 | first_chip = chips[0] 86 | fields = first_chip._fields 87 | for field in fields: 88 | field_example = getattr(first_chip, field) 89 | output_data = [getattr(chip, field) for chip in chips] 90 | FeatureDataset._save_field(fOut, field_example, field, output_data) 91 | -------------------------------------------------------------------------------- /pelops/datasets/slice.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import io 3 | import itertools 4 | import os 5 | import re 6 | import sys 7 | from datetime import datetime 8 | 9 | import pelops.datasets.chip as chip 10 | 11 | # ================================================================================ 12 | # SLiCE Test Dataset (labeled by STR) 13 | # ================================================================================ 14 | 15 | 16 | class SliceDataset(chip.ChipDataset): 17 | 18 | def __init__(self, dataset_path, set_type=None, debug=False): 19 | super().__init__(dataset_path, set_type) 20 | self.__noise_seq = 0 21 | self.__debug = debug 22 | self.__set_chips() 23 | 24 | @staticmethod 25 | def __decode_truth_file(truth_file): 26 | """The labels for the STR processed SLiCE chips are in a 'truth.txt' file which this function parses.""" 27 | 28 | with open(truth_file) as truth_hdl: 29 | truth_text = truth_hdl.read() 30 | for char in [' ', '%']: 31 | truth_text = truth_text.replace(char, '') 32 | truth_fobj = io.StringIO(truth_text) 33 | return {(int(dct['obSetIdx']), int(dct['chipIdx'])): int(dct['targetID']) 34 | for dct in csv.DictReader(truth_fobj)} 35 | 36 | @staticmethod 37 | def index_chip(file_path): 38 | """Parses an arbitrary file path and identifies paths of valid image chips. 39 | Returns None for non-chip file paths.""" 40 | 41 | # We have to handle two cases: 42 | # 43 | # 1) The STR San Antonio DOT chips, which have the form: 44 | # ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png 45 | # 46 | # 2) The SLICE chips, which have the form: 47 | # ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg 48 | # 49 | # The epoch on the SLICE chips is per chip, whereas it is per 50 | # observation set for the STR chips. The SLICE chip file names have the 51 | # follow information after the ObSet and chip id: 52 | # 53 | # Obset-ChipID-label-time_unused 54 | 55 | # Split the file path into pieces to extract the information from it 56 | file_path = os.path.normpath(file_path) 57 | directory, img_type, file = file_path.split(os.sep)[-3:] 58 | 59 | # Sometimes we have masks, reject those 60 | if img_type != "images": 61 | return 62 | 63 | # Sometimes we get the truth.txt file, which we do not want 64 | if file == "truth.txt": 65 | return 66 | 67 | # Get the observation set, time, and name from the directory 68 | obset_str, epoch_str, *name = directory.split("_") 69 | name = "_".join(name) 70 | 71 | # We slice off the first part of the string that is non-numeric, where 72 | # 5 = len("ObSet") 73 | obset_int = int(obset_str[5:]) 74 | 75 | # Get the chip ID, and perhaps more, from the name of the file 76 | _, chip_id_str, *misc = file.split("-") 77 | 78 | # SLICE chips have more information 79 | if misc: 80 | chip_id_int = int(chip_id_str) 81 | _, time = misc 82 | # Remove file extension 83 | time, _ = os.path.splitext(time) 84 | # Remove _1 at end of each time and convert to microseconds 85 | time = time[:-2] + "000" 86 | # Get milliseconds since the unix epoch 87 | epoch = datetime.utcfromtimestamp(0) 88 | dt = datetime.strptime(time, "%Y%m%d_%H%M%S.%f") 89 | epoch_str = str(int((dt - epoch).total_seconds())) 90 | else: 91 | chip_id, _ = os.path.splitext(chip_id_str) 92 | chip_id_int = int(chip_id) 93 | 94 | idx_key = (obset_int, chip_id_int) 95 | idx_val = { 96 | 'file': file_path, 97 | 'meta': { 98 | 'obSetName': name, 99 | 'epoch': epoch_str, 100 | }, 101 | } 102 | return idx_key, idx_val 103 | 104 | def __create_chip(self, file_info, truth_value): 105 | """Converts parsing / indexing results into a pelops.datasets.chip.Chip object""" 106 | if truth_value == 0: 107 | self.__noise_seq += 1 108 | car_id = 'unk-{:09d}'.format(self.__noise_seq) 109 | else: 110 | car_id = 'tgt-{:09d}'.format(truth_value) 111 | 112 | chip_params = [ 113 | file_info['file'], 114 | car_id, 115 | file_info['meta']['obSetName'], 116 | file_info['meta']['epoch'], 117 | file_info['meta'] 118 | ] 119 | return chip.Chip(*chip_params) 120 | 121 | def __set_chips(self): 122 | """Sets the chips dict of the superclass to contain chip files for the dataset.""" 123 | 124 | # Scan filesystem 125 | root_files = [root_file for root_file in os.walk(self.dataset_path)] 126 | 127 | # Decode truth.txt file 128 | truth_files = [os.path.join(walked[0], 'truth.txt') for walked in root_files if 'truth.txt' in walked[2]] 129 | if len(truth_files) == 0: 130 | raise IOError("No truth file found.") 131 | elif len(truth_files) > 1: 132 | raise IOError("Too many truth files available.") 133 | 134 | truth_data = self.__decode_truth_file(truth_files.pop()) 135 | if len(truth_data) < 1: 136 | raise IOError("No truth loaded") 137 | if self.__debug: 138 | print("{} truth records loaded.".format(len(truth_data))) 139 | 140 | # Index all image chips 141 | file_paths = [[os.path.join(walked[0], wfile) for wfile in walked[2]] for walked in root_files] 142 | chip_idx = dict(filter(lambda t: t is not None, map(self.index_chip, itertools.chain(*file_paths)))) 143 | 144 | if len(chip_idx) != len(truth_data): 145 | raise IOError("Number of truth records not equal to number of chips.") 146 | if self.__debug: 147 | print("{} image chips loaded.".format(len(chip_idx))) 148 | 149 | # Create and store chips 150 | self.chips = {meta['file']: self.__create_chip(meta, truth_data[idx]) for idx, meta in chip_idx.items()} 151 | if self.__debug: 152 | print("{} chip.Chips loaded.".format(len(self.chips))) 153 | -------------------------------------------------------------------------------- /pelops/datasets/str.py: -------------------------------------------------------------------------------- 1 | 2 | import collections 3 | import os 4 | 5 | import pelops.datasets.chip as chip 6 | import pelops.utils as utils 7 | 8 | # ================================================================================ 9 | # STR_SA Dataset 10 | # ================================================================================ 11 | 12 | 13 | class StrDataset(chip.ChipDataset): 14 | # define paths to files and directories 15 | filenames = collections.namedtuple( 16 | "filenames", 17 | [ 18 | "dir_all" 19 | ] 20 | ) 21 | filepaths = filenames ( 22 | "crossCameraMatches" 23 | ) 24 | 25 | def __init__(self, dataset_path, set_type=None): 26 | super().__init__(dataset_path, set_type) 27 | self.__set_filepaths() # set self.__filepaths 28 | self.__set_chips() 29 | # STR does not differentiate the set type 30 | 31 | def __set_filepaths(self): 32 | self.__filepaths = StrDataset.filenames( 33 | os.path.join(self.dataset_path, StrDataset.filepaths.dir_all) 34 | ) 35 | 36 | def __set_chips(self): 37 | directory = self.__filepaths.dir_all 38 | for file in os.listdir(directory): 39 | path = os.path.join(directory, file) 40 | 41 | # Only interested in certain files 42 | is_valid = os.path.isfile(path) 43 | is_png = path.endswith(".png") 44 | is_mask = "mask" in path 45 | if not is_valid or not is_png or is_mask: 46 | continue 47 | 48 | # Set all Chip variables 49 | car_id = get_sa_car_id(path) 50 | cam_id = get_sa_cam_id(path) 51 | 52 | time = None # No timestamp information 53 | misc = None # No miscellaneous information 54 | 55 | # Make chip 56 | current_chip = chip.Chip( 57 | path, 58 | car_id, 59 | cam_id, 60 | time, 61 | misc 62 | ) 63 | 64 | self.chips[path] = current_chip 65 | 66 | 67 | def int_from_string(string, start_chars, int_len): 68 | # We only want to use the filename, not the directory names 69 | base_string = os.path.basename(string) 70 | loc = base_string.find(start_chars) 71 | 72 | # Not found 73 | if loc < 0: 74 | return None 75 | 76 | start = loc + len(start_chars) 77 | end = start + int_len 78 | str_num = base_string[start:end] 79 | return int(str_num) 80 | 81 | 82 | def get_sa_cam_id(string): 83 | return int_from_string(string, start_chars="_cam", int_len=2) 84 | 85 | 86 | def get_sa_car_id(string): 87 | return int_from_string(string, start_chars="match", int_len=5) 88 | -------------------------------------------------------------------------------- /pelops/datasets/veri.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import datetime 3 | import os 4 | import xml.etree.ElementTree 5 | 6 | import pelops.datasets.chip as chip 7 | import pelops.utils as utils 8 | 9 | # ================================================================================ 10 | # Veri Dataset 11 | # ================================================================================ 12 | 13 | 14 | class VeriDataset(chip.ChipDataset): 15 | filenames = collections.namedtuple( 16 | "filenames", 17 | [ 18 | "name_query", 19 | "name_test", 20 | "name_train", 21 | "dir_query", 22 | "dir_test", 23 | "dir_train", 24 | "list_color", 25 | "list_type", 26 | "ground_truths", 27 | "junk_images", 28 | "label_train" 29 | ] 30 | ) 31 | filepaths = filenames( 32 | "name_query.txt", 33 | "name_test.txt", 34 | "name_train.txt", 35 | "image_query", 36 | "image_test", 37 | "image_train", 38 | "list_color.txt", 39 | "list_type.txt", 40 | "gt_image.txt", 41 | "jk_image.txt", 42 | "train_label.xml" 43 | ) 44 | 45 | def __init__(self, dataset_path, set_type=None): 46 | super().__init__(dataset_path, set_type) 47 | self.__set_filepaths() # set self.__filepaths 48 | self.__color_type = {} 49 | if self.set_type is utils.SetType.ALL or self.set_type is utils.SetType.TRAIN: 50 | self.__build_metadata_dict() 51 | self.__set_chips() 52 | 53 | def __build_metadata_dict(self): 54 | """Extract car type and color from the label file.""" 55 | try: 56 | root = xml.etree.ElementTree.parse(self.__filepaths.label_train).getroot() 57 | except ValueError as e: 58 | URL = "https://github.com/Lab41/pelops/issues/72" 59 | ERROR = ( 60 | str(e) + "\n\n" 61 | "The label file 'train_label.xml' comes malformed from the\n" 62 | "source. The first line needs to be changed to:\n" 63 | "''\n" 64 | "if it is not already.\n" 65 | "See: " + URL 66 | ) 67 | raise ValueError(ERROR) 68 | 69 | colors = { 70 | 1: "yellow", 2: "orange", 3: "green", 4: "gray", 5: "red", 71 | 6: "blue", 7: "white", 8: "golden", 9: "brown", 10: "black", 72 | } 73 | types = { 74 | 0: "unknown", 1: "sedan", 2: "suv", 3: "van", 4: "hatchback", 75 | 5: "mpv", 6: "pickup", 7: "bus", 8: "truck", 9: "estate", 76 | } 77 | 78 | # Version 1.0 of the VeRI data has a bug where several cars are labeled 79 | # as the illegal type 0: 80 | # 81 | # https://github.com/Lab41/pelops/issues/76 82 | # 83 | # These cars are actually SUVs (or, cross-overs) and hence should by 84 | # type 2. 85 | if root.attrib["Version"] == "1.0": 86 | types[0] = "suv" 87 | URL = "https://github.com/Lab41/pelops/issues/76" 88 | output = ( 89 | "VeRI Version 1.0 found! Patching `typeID=0` to `typeID=2`.\n" 90 | "See: " + URL 91 | ) 92 | print(output) 93 | 94 | self.__color_type = {} 95 | for child in root.iter("Item"): 96 | # Get the IDs from the XML node 97 | vehicle_id = child.attrib["vehicleID"] 98 | color = child.attrib["colorID"] 99 | body_type = child.attrib["typeID"] 100 | 101 | vehicle_id_int = int(vehicle_id) 102 | color_id = int(color) 103 | body_id = int(body_type) 104 | str_color = colors[color_id] 105 | str_body = types[body_id] 106 | 107 | self.__color_type[vehicle_id_int] = (str_color, str_body) 108 | 109 | def __set_filepaths(self): 110 | self.__filepaths = VeriDataset.filenames( 111 | os.path.join(self.dataset_path, VeriDataset.filepaths.name_query), 112 | os.path.join(self.dataset_path, VeriDataset.filepaths.name_test), 113 | os.path.join(self.dataset_path, VeriDataset.filepaths.name_train), 114 | os.path.join(self.dataset_path, VeriDataset.filepaths.dir_query), 115 | os.path.join(self.dataset_path, VeriDataset.filepaths.dir_test), 116 | os.path.join(self.dataset_path, VeriDataset.filepaths.dir_train), 117 | os.path.join(self.dataset_path, VeriDataset.filepaths.list_color), 118 | os.path.join(self.dataset_path, VeriDataset.filepaths.list_type), 119 | os.path.join(self.dataset_path, VeriDataset.filepaths.ground_truths), 120 | os.path.join(self.dataset_path, VeriDataset.filepaths.junk_images), 121 | os.path.join(self.dataset_path, VeriDataset.filepaths.label_train), 122 | ) 123 | 124 | def __set_chips(self): 125 | # TODO: ignore images labeled as query, so we do not have to keep tabs for identical chips 126 | # identify all the chips 127 | all_names_filepaths = { 128 | utils.SetType.ALL: [self.__filepaths.name_query, self.__filepaths.name_test, self.__filepaths.name_train], 129 | utils.SetType.QUERY: [self.__filepaths.name_query], 130 | utils.SetType.TEST: [self.__filepaths.name_test], 131 | utils.SetType.TRAIN: [self.__filepaths.name_train], 132 | }.get(self.set_type) 133 | # create chip objects based on the names listed in the files 134 | for name_filepath in all_names_filepaths: 135 | if VeriDataset.filepaths.name_query in name_filepath: 136 | img_dir = self.__filepaths.dir_query 137 | elif VeriDataset.filepaths.name_test in name_filepath: 138 | img_dir = self.__filepaths.dir_test 139 | else: # VeriDataset.filepaths.name_train in filepath 140 | img_dir = self.__filepaths.dir_train 141 | for name in open(name_filepath): 142 | current_chip = self.__create_chip(img_dir, name.strip()) 143 | self.chips[current_chip.filepath] = current_chip 144 | 145 | def __create_chip(self, img_dir, img_name): 146 | # information about the chip resides in the chip's name 147 | splitter = img_name.split("_") 148 | misc = {} 149 | 150 | filepath = os.path.join(img_dir, img_name) 151 | car_id = int(splitter[0]) 152 | cam_id = int(utils.get_numeric(splitter[1])) 153 | time = datetime.datetime.fromtimestamp(int(splitter[2])) 154 | misc["binary"] = int(os.path.splitext(splitter[3])[0]) 155 | 156 | color, vehicle_type = self.__color_type.get(car_id, (None, None)) 157 | misc["color"] = color 158 | misc["vehicle_type"] = vehicle_type 159 | 160 | return chip.Chip(filepath, car_id, cam_id, time, misc) 161 | -------------------------------------------------------------------------------- /pelops/etl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/etl/__init__.py -------------------------------------------------------------------------------- /pelops/etl/computeMatrixCMC.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from collections import defaultdict 4 | 5 | from matplotlib import pyplot 6 | 7 | 8 | def makeTransDicts(reindexFile): 9 | reindex = open(reindexFile, 'r') 10 | file2num = dict() 11 | num2file = dict() 12 | index = 0 13 | for line in reindex: 14 | line = line.strip() 15 | file2num[line] = index 16 | num2file[index] = line 17 | index += 1 18 | return (file2num, num2file) 19 | 20 | 21 | def makeMatrix(matrixFilename, num2file, file2num, measure='cosine'): 22 | 23 | a = open(matrixFilename, 'r') 24 | lines = 0 25 | for line in a: 26 | lines += 1 27 | a.close() 28 | 29 | Matrix = [[0 for x in range(lines)] for y in range(lines)] 30 | matrixFile = open(matrixFilename, 'r') 31 | for line in matrixFile: 32 | 33 | line = line.strip() 34 | line = json.loads(line) 35 | x = file2num[line['x']] 36 | y = file2num[line['y']] 37 | Matrix[x][y] = line[measure] 38 | Matrix[y][x] = line[measure] 39 | 40 | for index in range(0, lines): 41 | Matrix[index][index] = 8675309 42 | return Matrix 43 | 44 | 45 | def getrank(car, s, maxval=-1): 46 | for sidx, work in enumerate(s): 47 | # sval = work[0] 48 | scar = work[1] 49 | if scar == car: 50 | return sidx 51 | return maxval 52 | 53 | 54 | def preCMC(Matrix, num2file, downto=50): 55 | retval = defaultdict(int) 56 | start = time.time() 57 | size = len(Matrix[0]) 58 | 59 | for oindex in range(size): 60 | if oindex % 1000 == 0: 61 | print('index:{0} time:{1}'.format(oindex, time.time() - start)) 62 | start = time.time() 63 | 64 | car = num2file[oindex].split('_')[0] 65 | 66 | current = list() 67 | 68 | for idx, val in enumerate(Matrix[oindex]): 69 | current.append((float(val), num2file[idx].split('_')[0])) 70 | 71 | s = sorted(current, key=lambda tup: tup[0])[:downto] 72 | maxSearch = downto + 1 73 | r = getrank(car, s, maxval=maxSearch) 74 | retval[r] += 1 75 | return retval 76 | 77 | 78 | def computeCMC(rawCounts, num): 79 | idx = sorted(rawCounts) 80 | sum = 0 81 | CMC = list() 82 | for index in range(0, len(idx)): 83 | sum += rawCounts[index] 84 | print (index, sum) 85 | CMC.append(sum / float(num)) 86 | return CMC 87 | 88 | 89 | testFilesName = '/local_data/dgrossman/VeRi/test_uniqfiles' 90 | matrixFilename = '/local_data/dgrossman/VeRi/matrixFile.test_uniqfile' 91 | file2num, num2file = makeTransDicts(testFilesName) 92 | Matrix = makeMatrix(matrixFilename, num2file, file2num) 93 | rawCounts = preCMC(Matrix, num2file) 94 | CMC = computeCMC(rawCounts, len(Matrix[0])) 95 | 96 | # pyplot.ylim(0,1) 97 | pyplot.plot(CMC[:-1]) 98 | pyplot.show() 99 | -------------------------------------------------------------------------------- /pelops/etl/json2h5.py: -------------------------------------------------------------------------------- 1 | '''transform the json files into h5py files 2 | 3 | Input: 4 | one json encoded dict / line 5 | dict should have the following keys: 6 | colorID - colorID of the vehicle 7 | vehicleID - vehicle ID 8 | resnet50 - feature vector of the vehicle 9 | imageName - name of the file in storage 10 | typeID - ?? 11 | cameraID - which camera took the image 12 | 13 | Output: 14 | h5py file with the following datasets 15 | colorID - int colorID of the vehicle 16 | vehicleID - int vehicle ID 17 | resnet50 - [float] feature vector of the vehicle 18 | imageName - str name of the file in storage 19 | typeID - int ?? 20 | cameraID - which camera took the image 21 | 22 | Usage: 23 | json2h5.py [-hv] 24 | json2h5.py -i -o 25 | 26 | Arguments: 27 | INFILE - json infile name 28 | OUTFILE - h5py outfile name 29 | 30 | Options: 31 | -h, --help :show this message 32 | -v, --version :Version of the program 33 | -i, --input= :input file for the program 34 | -o, --output= :output file for the program 35 | 36 | ''' 37 | import docopt 38 | import h5py 39 | import json 40 | import numpy as np 41 | import sys 42 | 43 | 44 | def makeJsonList(fileName): 45 | retval = list() 46 | with open(fileName, 'r') as f: 47 | for line in f: 48 | line = line.strip() 49 | line = json.loads(line) 50 | retval.append(line) 51 | return retval 52 | 53 | 54 | def extractColumn(colName, jsonList, t): 55 | retval = list() 56 | for line in jsonList: 57 | if t == str: 58 | retval.append(str(line[colName]).encode('ascii', 'ignore')) 59 | if t == int: 60 | retval.append(int(line[colName])) 61 | if t == float: 62 | vector = list() 63 | for element in line[colName]: 64 | vector.append(float(element)) 65 | retval.append(vector) 66 | return retval 67 | 68 | 69 | def make5file(file5Name, names, jsonList): 70 | with h5py.File(file5Name, 'w') as f: 71 | for o, i, t, t2 in names: 72 | sys.stdout.write('converting column {0}'.format(o)) 73 | temp = extractColumn(o, jsonList, t) 74 | sys.stdout.write('...Done\n') 75 | sys.stdout.write('making dataset {0}'.format(i)) 76 | f.create_dataset(i, data=temp, dtype=t2) 77 | sys.stdout.write('...Done\n') 78 | 79 | 80 | def main(args): 81 | try: 82 | inFileName = args['--input'] 83 | outFileName = args['--output'] 84 | except docopt.DocoptExit as e: 85 | sys.exit('error: input invalid options: {0}'.format(e)) 86 | 87 | f = np.dtype('float') 88 | c = h5py.special_dtype(vlen=bytes) 89 | names = [('colorID', 'colorID', int, int), 90 | ('vehicleID', 'vehicleID', int, int), 91 | ('resnet50', 'feats', float, f), 92 | ('imageName', 'ids', str, c), 93 | ('typeID', 'typeID', int, int), 94 | ('cameraID', 'cameraID', str, c)] 95 | 96 | sys.stdout.write('Reading {0}'.format(inFileName)) 97 | jsonList = makeJsonList(inFileName) 98 | sys.stdout.write('...Done\n') 99 | 100 | make5file(outFileName, names, jsonList) 101 | 102 | if __name__ == '__main__': 103 | args = docopt.docopt(__doc__, version='json2h5.py 1.0') 104 | main(args) 105 | -------------------------------------------------------------------------------- /pelops/etl/makeDistMatrix.py: -------------------------------------------------------------------------------- 1 | """ make the data for comparing entity id 2 | 3 | Input: 4 | processsed json file containing the list of images to compare 5 | the file must contain the 'resnet50' feature vector 6 | 7 | Output: 8 | all pairs comparison between images using resnet50 9 | output file lines will have the form of json dict 10 | dict will contain the following keys 11 | x - image name of the first image in comparison 12 | y - image name of the second image in comparison 13 | cosine - cosine distance bewteen the images 14 | euclidean - euclidian distance between the images 15 | 16 | Usage: 17 | makeDistMatrix [-hv] 18 | makeDistMatrix -i [-w ] 19 | 20 | Arguments: 21 | INPUT_FILE :file of the json description of the VeRi files 22 | WORKERS :number of threads in the pool 23 | 24 | Options: 25 | -i, --inputFile= :file location of the input 26 | -w,--workers= :num of workers in threadpool [default: 10] 27 | """ 28 | import docopt 29 | import json 30 | import sys 31 | from multiprocessing import Pool 32 | import scipy.spatial.distance 33 | import itertools 34 | import numpy as np 35 | import time 36 | 37 | 38 | # read the list of things to compare 39 | def makeWork(vectorFileName): 40 | vfile = open(vectorFileName, 'r') 41 | retval = list() 42 | for line in vfile: 43 | line = line.strip() 44 | line = json.loads(line) 45 | retval.append(line) 46 | vfile.close() 47 | return retval 48 | 49 | 50 | # help by chopping work into chunks 51 | def grouper(n, iterable): 52 | it = iter(iterable) 53 | while True: 54 | chunk = tuple(itertools.islice(it, n)) 55 | if not chunk: 56 | return 57 | yield chunk 58 | 59 | 60 | # my distance measures 61 | def my_dist(workList): 62 | retval = list() 63 | 64 | for pair in workList: 65 | x = pair[0] 66 | y = pair[1] 67 | fx = np.asarray(x['resnet50']) 68 | fy = np.asarray(y['resnet50']) 69 | workItem = dict() 70 | dc = str(float(scipy.spatial.distance.cosine(fx, fy))) 71 | de = str(float(scipy.spatial.distance.euclidean(fx, fy))) 72 | workItem['x'] = x['imageName'] 73 | workItem['y'] = y['imageName'] 74 | workItem['cosine'] = dc 75 | workItem['euclidean'] = de 76 | retval.append(workItem) 77 | 78 | return (retval) 79 | 80 | 81 | # takes in a json file with vectors and creates all the pairwise 82 | # distance calculations, saves output to file 83 | def main(args, atOnceOuter=100000, atOnceInner=10000): 84 | try: 85 | pworkers = args['--workers'] 86 | inFileName = args['--inputFile'] 87 | except docopt.DocoptExit as e: 88 | sys.exit('ERROR: input invalid options {0}'.format(e)) 89 | 90 | inFileName = sys.argv[1] 91 | work = makeWork(inFileName) 92 | p = Pool(pworkers) 93 | 94 | outFileName = 'matrixFile.{0}'.format(inFileName) 95 | matrixFile = open(outFileName, 'w') 96 | 97 | total = 0 98 | for batch in grouper(atOnceOuter, itertools.combinations(work, 2)): 99 | start = time.time() 100 | batched = list() 101 | 102 | for workbatch in grouper(atOnceInner, batch): 103 | batched.append(workbatch) 104 | 105 | retval = p.map(my_dist, batched) 106 | end = time.time() 107 | start2 = time.time() 108 | for listLine in retval: 109 | for line in listLine: 110 | total = total + 1 111 | matrixFile.write(json.dumps(line)+'\n') 112 | end2 = time.time() 113 | 114 | fstr = 'proc elapsed:{0} sec proc:{1} total{2}' 115 | print(fstr.format(end-start, atOnceOuter, total)) 116 | print('IO elapsed:{0}\n'.format(end2-start2)) 117 | matrixFile.close() 118 | 119 | if __name__ == '__main__': 120 | args = docopt.docopt(__doc__,version='makeDistMatrix 1.0') 121 | main() 122 | -------------------------------------------------------------------------------- /pelops/etl/makeFeaturesResNet50.py: -------------------------------------------------------------------------------- 1 | """ Generate resnet50 features 2 | 3 | Input: 4 | infile shold be a list of json lines one json/line 5 | 6 | Output: 7 | appending of resnet50 features to each json line 8 | 9 | Usage: 10 | makeFeaturesResNet50 [-hv] 11 | makeFeaturesResNet50 -i -p 12 | 13 | Arguments: 14 | INPUT_FILENAME : location of the file to enrich with resnet features 15 | IMAGE_DIR : full path to where the images live 16 | 17 | Options: 18 | -h, --help : Show this help message. 19 | -v, --version : Show the version number. 20 | -i, --inFile : input file to enrich with reset fetures 21 | -p, --path : Path to the directory holding the images 22 | 23 | 24 | """ 25 | 26 | import docopt 27 | import numpy as np 28 | from keras.applications.resnet50 import ResNet50 29 | from keras.preprocessing import image 30 | from keras.applications.imagenet_utils import preprocess_input 31 | from keras.models import Model 32 | import os 33 | import time 34 | import json 35 | import sys 36 | 37 | 38 | # return an image from a file, default resize to 224,224 39 | def load_image(img_path, resizex=224, resizey=224): 40 | data = image.load_img(img_path, target_size=(resizex, resizey)) 41 | x = image.img_to_array(data) 42 | x = np.expand_dims(x, axis=0) 43 | x = preprocess_input(x) 44 | return x 45 | 46 | 47 | # load the imagenet networks 48 | def get_models(): 49 | # include_top needs to be True for this to work 50 | base_model = ResNet50(weights='imagenet', include_top=True) 51 | model = Model(input=base_model.input, 52 | output=base_model.get_layer('flatten_1').output) 53 | return (model, base_model) 54 | 55 | 56 | # return feature vector for a given img, and model 57 | def image_features(img, model): 58 | features = model.predict(img) 59 | return features 60 | 61 | 62 | # read the files to process 63 | def getList(name): 64 | retval = list() 65 | f = open('name', 'r') 66 | for line in f: 67 | line = line.strip() 68 | line = json.loads(line) 69 | retval.append(line) 70 | f.close() 71 | return retval 72 | 73 | 74 | # perform the file by file processing 75 | def process(trainingList, prefix, model, outFilename, batchSize=1000): 76 | outFile = open(outFilename, 'w') 77 | start = time.time() 78 | for idx, line in enumerate(trainingList): 79 | tempd = dict() 80 | if idx % batchSize == 0: 81 | end = time.time() - start 82 | start = time.time() 83 | fstring = 'total {0} batch {1} images in {2} seconds' 84 | print (fstring.format(idx, batchSize, end)) 85 | path = os.path.join(prefix, line['imageName']) 86 | img = load_image(path) 87 | feature = image_features(img, model) 88 | tempd['resnet50'] = feature.tolist()[0] 89 | tempd.update(line) 90 | outFile.write(json.dumps(tempd)+'\n') 91 | outFile.close() 92 | 93 | 94 | # read json file append feature vector to each line dict 95 | def main(args): 96 | try: 97 | lineFileName = args['--inFile'] 98 | prefix = args['--path'] 99 | 100 | except docopt.DocoptExit as e: 101 | sys.exit('Error: input invalid options {0}'.format(e)) 102 | 103 | outFilename = '{0}.resnet50.json'.format(lineFileName) 104 | model, base_model = get_models() 105 | 106 | print('loading...') 107 | trainingList = getList(lineFileName) 108 | 109 | print('processing...') 110 | process(trainingList, prefix, model, outFilename) 111 | 112 | print('done.') 113 | 114 | 115 | if __name__ == '__main__': 116 | args = docopt.docopt(__doc__, version='1.0') 117 | main(args) 118 | -------------------------------------------------------------------------------- /pelops/etl/veriFileList2Json.py: -------------------------------------------------------------------------------- 1 | """ turn the list of files into a list of json dicts about the files 2 | 3 | Input: 4 | Take the VeRi datset that contains the following information: 5 | * 49358 images (1679 query images, 11580 test images, 37779 train images) 6 | * 776 vehicles 7 | * 20 cameras 8 | * covering 1.0 km^2 area in 24 hours 9 | 10 | convert the name_* files into json files for processing 11 | 12 | Output: 13 | json file with the following attributes in a dict per line: 14 | imageName 15 | vehicleID 16 | cameraID 17 | colorID 18 | typeID 19 | 20 | Usage: 21 | veriFileList2Json [-hv] 22 | veriFileList2Json -i 23 | 24 | Arguments: 25 | INFILE_NAME :file path to the VeRI name_ file 26 | 27 | Options: 28 | -h, --help :Show this message 29 | -v, --version :Version of the prog 30 | -i, --inputFile :location of the VeRi name_ file to process 31 | 32 | 33 | 34 | """ 35 | import docopt 36 | import json 37 | import sys 38 | 39 | 40 | # turn the list of files into json for working with 41 | def main(args): 42 | try: 43 | inFileName = args['--inputFile'] 44 | except docopt.DocoptExit as e: 45 | sys.exit('error: input invalid options: {0}'.format(e)) 46 | 47 | outFileName = '{0}.json'.format(inFileName) 48 | 49 | inFile = open(inFileName, 'r') 50 | outFile = open(outFileName, 'w') 51 | 52 | for line in inFile: 53 | d = dict() 54 | line = line.strip() 55 | attrs = line.split('_') 56 | d['imageName'] = line 57 | d['vehicleID'] = attrs[0] 58 | d['cameraID'] = attrs[1] 59 | d['colorID'] = str(-1) 60 | d['typeID'] = str(-1) 61 | outFile.write(json.dumps(d)+'\n') 62 | inFile.close() 63 | 64 | 65 | if __name__ == '__main__': 66 | args = docopt.docopt(__doc__,version='veriFileList2Json 1.0') 67 | main(args) 68 | -------------------------------------------------------------------------------- /pelops/experiment_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/experiment_api/__init__.py -------------------------------------------------------------------------------- /pelops/experiment_api/run_metric.sh: -------------------------------------------------------------------------------- 1 | python3 metric.py -c -w VeriDataset -y 2 -r 5 "/path/to/veri/dataset" "/path/to/image/feature/json/file" 2 | -------------------------------------------------------------------------------- /pelops/features/feature_producer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | from pelops.datasets.chipper import Chipper 5 | from pelops.datasets.featuredataset import FeatureDataset 6 | 7 | 8 | class FeatureProducer(object): 9 | def __init__(self, chip_producer): 10 | self.chip_producer = chip_producer 11 | self.set_variables() 12 | 13 | def return_features(self): 14 | if isinstance(self.chip_producer, Chipper): 15 | chips = [] 16 | chip_keys = [] 17 | for chip_list in self.chip_producer: 18 | chips.extend(chip_list) 19 | for i, chip in enumerate(chip_list): 20 | chip_keys.append('{}_{}'.format(chip.frame_number, i)) 21 | 22 | else: 23 | chips = [] 24 | chip_keys = [] 25 | for chip_key, chip in self.chip_producer.chips.items(): 26 | chips.append(chip) 27 | chip_keys.append(chip_key) 28 | 29 | feats = np.zeros((len(chips), self.feat_size), dtype=np.float32) 30 | for i, chip in enumerate(chips): 31 | feats[i] = self.produce_features(chip) 32 | return chip_keys, chips, feats 33 | 34 | @staticmethod 35 | def get_image(chip): 36 | if hasattr(chip, 'img_data'): 37 | img = Image.fromarray(chip.img_data) 38 | return img.convert('RGB') 39 | else: 40 | return Image.open(chip.filepath) 41 | 42 | def produce_features(self, chip): 43 | """Takes a chip object and returns a feature vector of size 44 | self.feat_size. """ 45 | raise NotImplementedError("produce_features() not implemented") 46 | 47 | def save_features(self, output_filename): 48 | """ 49 | Calculate features and save as a "FeatureDataset" 50 | Args: 51 | filename: 52 | 53 | Returns: 54 | 55 | """ 56 | # TODO: See if this function should save the features in memory 57 | if isinstance(self.chip_producer, Chipper): 58 | raise NotImplementedError("Only ChipDatasets are supported at this time") 59 | chip_keys, chips, features = self.return_features() 60 | FeatureDataset.save(output_filename, chip_keys, chips, features) 61 | 62 | def set_variables(self): 63 | """Child classes should use this to set self.feat_size, and any other 64 | needed variables. """ 65 | self.feat_size = None # Set this in your inherited class 66 | raise NotImplementedError("set_variables() is not implemented") 67 | -------------------------------------------------------------------------------- /pelops/features/hog.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | from skimage import color 4 | from skimage.feature import hog 5 | 6 | from pelops.features.feature_producer import FeatureProducer 7 | 8 | 9 | class HOGFeatureProducer(FeatureProducer): 10 | 11 | def __init__(self, chip_producer, image_size=(224,224), cells=(16, 16), orientations=8, histogram_bins_per_channel=256): 12 | self.image_size = image_size 13 | self.cells = cells 14 | self.orientations = orientations 15 | self.histogram_bins_per_channel = histogram_bins_per_channel 16 | super().__init__(chip_producer) 17 | 18 | def produce_features(self, chip): 19 | """Takes a chip object and returns a feature vector of size 20 | self.feat_size. """ 21 | img = self.get_image(chip) 22 | img = img.resize(self.image_size, Image.BICUBIC) 23 | img_x, img_y = img.size 24 | 25 | # Calculate histogram of each channel 26 | channels = img.split() 27 | hist_features = np.full(shape=3 * self.histogram_bins_per_channel, fill_value=-1) 28 | 29 | # We expect RGB images. If something else is passed warn the user and 30 | # continue. 31 | if len(channels) < 3: 32 | print("Non-RBG image! Vector will be padded with -1!") 33 | if len(channels) > 3: 34 | print("Non-RBG image! Channels beyond the first three will be ignored!") 35 | channels = channel[:3] 36 | 37 | for i, channel in enumerate(channels): 38 | channel_array = np.array(channel) 39 | values, _ = np.histogram(channel_array.flat, bins=self.histogram_bins_per_channel) 40 | start = i * self.histogram_bins_per_channel 41 | end = (i+1) * self.histogram_bins_per_channel 42 | hist_features[start:end] = values 43 | 44 | # Calculate HOG features, which require a grayscale image 45 | img = color.rgb2gray(np.array(img)) 46 | features = hog( 47 | img, 48 | orientations=self.orientations, 49 | pixels_per_cell=(img_x / self.cells[0], img_y / self.cells[1]), 50 | cells_per_block=self.cells, # Normalize over the whole image 51 | ) 52 | 53 | return np.concatenate((features, hist_features)) 54 | 55 | def set_variables(self): 56 | hog_size = self.cells[0] * self.cells[1] * self.orientations 57 | hist_size = 3 * self.histogram_bins_per_channel 58 | self.feat_size = hog_size + hist_size 59 | -------------------------------------------------------------------------------- /pelops/features/keras_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.preprocessing import image 3 | from keras.applications.resnet50 import preprocess_input 4 | from keras.models import load_model, Model, model_from_json 5 | 6 | from PIL import Image as PIL_Image 7 | from pelops.features.feature_producer import FeatureProducer 8 | 9 | 10 | class KerasModelFeatureProducer(FeatureProducer): 11 | def __init__(self, chip_producer, model_filename, layer_name, weight_filename=None): 12 | global resnet_model 13 | super().__init__(chip_producer) 14 | 15 | if weight_filename is None: 16 | self.original_model = load_model(model_filename) 17 | else: 18 | self.original_model = self.load_model_workaround(model_filename,weight_filename) 19 | 20 | self.keras_model = Model(input=self.original_model.input, 21 | output=self.original_model.get_layer(layer_name).output) 22 | 23 | @staticmethod 24 | def load_model_workaround(model_filename,weight_filename): 25 | # load json and create model 26 | json_file = open(model_filename, 'r') 27 | loaded_model_json = json_file.read() 28 | json_file.close() 29 | 30 | loaded_model = model_from_json(loaded_model_json) 31 | 32 | # load weights into new model 33 | loaded_model.load_weights(weight_filename) 34 | return loaded_model 35 | 36 | @staticmethod 37 | def preprocess_image(img, x_dim=224, y_dim=224): 38 | img = img.resize((x_dim,y_dim), PIL_Image.BICUBIC) 39 | x = image.img_to_array(img) 40 | x = np.expand_dims(x, axis=0) 41 | x = preprocess_input(x) 42 | return x 43 | 44 | def produce_features(self, chip): 45 | pil_image = self.get_image(chip) 46 | preprocessed_image = self.preprocess_image(pil_image) 47 | image_features = self.keras_model.predict(preprocessed_image) 48 | return image_features 49 | 50 | def set_variables(self): 51 | self.feat_size = 2048 52 | -------------------------------------------------------------------------------- /pelops/features/resnet50.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.applications.resnet50 import ResNet50 3 | from keras.preprocessing import image 4 | from keras.applications.resnet50 import preprocess_input 5 | from keras.models import Model 6 | 7 | from PIL import Image as PIL_Image 8 | from pelops.features.feature_producer import FeatureProducer 9 | 10 | # Use global so we only load the resnet model once 11 | # TODO: find a better way to do this 12 | resnet_model = None 13 | 14 | 15 | class ResNet50FeatureProducer(FeatureProducer): 16 | def __init__(self, chip_producer): 17 | global resnet_model 18 | super().__init__(chip_producer) 19 | 20 | if resnet_model is None: 21 | # include_top needs to be True for this to work 22 | base_model = ResNet50(weights='imagenet', include_top=True) 23 | resnet_model = Model(input=base_model.input, 24 | output=base_model.get_layer('flatten_1').output) 25 | 26 | self.resnet_model = resnet_model 27 | 28 | @staticmethod 29 | def preprocess_image(img, x_dim=224, y_dim=224): 30 | if img.size != (x_dim, y_dim): 31 | img = img.resize((x_dim,y_dim), PIL_Image.BICUBIC) 32 | x = image.img_to_array(img) 33 | x = np.expand_dims(x, axis=0) 34 | x = preprocess_input(x) 35 | return x 36 | 37 | def produce_features(self, chip): 38 | pil_image = self.get_image(chip) 39 | preprocessed_image = self.preprocess_image(pil_image) 40 | image_features = self.resnet_model.predict(preprocessed_image) 41 | return image_features 42 | 43 | def set_variables(self): 44 | self.feat_size = 2048 45 | -------------------------------------------------------------------------------- /pelops/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/models/__init__.py -------------------------------------------------------------------------------- /pelops/models/makesvm.py: -------------------------------------------------------------------------------- 1 | """ work with SVM and chips """ 2 | import time 3 | 4 | import sklearn 5 | from scipy.stats import uniform as sp_rand 6 | from sklearn import svm 7 | from sklearn.externals import joblib 8 | from sklearn.model_selection import RandomizedSearchCV 9 | from tqdm import tnrange 10 | 11 | from pelops.analysis.camerautil import get_match_id, make_good_bad 12 | from pelops.analysis.comparecameras import make_work 13 | 14 | 15 | def train_svm(examples, fd_train, eg_train): 16 | """ 17 | train a support vector machine 18 | 19 | examples(int): number of examples to generate 20 | fd_train(featureDataset): where to join features to chips 21 | eg_train(experimentGenerator): makes experiments 22 | 23 | clf(SVM): scm classifier trainined on the input examples 24 | """ 25 | lessons_train = list() 26 | outcomes_train = list() 27 | for _ in tnrange(examples): 28 | cameras_train = eg_train.generate() 29 | match_id = get_match_id(cameras_train) 30 | goods, bads = make_good_bad(cameras_train, match_id) 31 | make_work(fd_train, lessons_train, outcomes_train, goods, 1) 32 | make_work(fd_train, lessons_train, outcomes_train, bads, 0) 33 | 34 | clf = svm.SVC() 35 | 36 | print('fitting') 37 | start = time.time() 38 | clf.fit(lessons_train, outcomes_train) 39 | end = time.time() 40 | print('fitting took {} seconds'.format(end - start)) 41 | return clf 42 | 43 | 44 | def search(examples, fd_train, eg_train, iterations): 45 | """ 46 | beginnnings of hyperparameter search for svm 47 | """ 48 | param_grid = {'C': sp_rand()} 49 | lessons_train = list() 50 | outcomes_train = list() 51 | for _ in tnrange(examples): 52 | cameras_train = eg_train.generate() 53 | match_id = get_match_id(cameras_train) 54 | goods, bads = make_good_bad(cameras_train, match_id) 55 | make_work(fd_train, lessons_train, outcomes_train, goods, 1) 56 | make_work(fd_train, lessons_train, outcomes_train, bads, 0) 57 | clf = svm.SVC() 58 | print('searching') 59 | start = time.time() 60 | rsearch = RandomizedSearchCV( 61 | estimator=clf, param_distributions=param_grid, n_iter=iterations) 62 | rsearch.fit(lessons_train, outcomes_train) 63 | end = time.time() 64 | print('searching took {} seconds'.format(end - start)) 65 | print(rsearch.best_score_) 66 | print(rsearch.best_estimator_.C) 67 | 68 | 69 | def save_model(model, filename): 70 | """ 71 | save a model to disk 72 | 73 | model(somemodel): trained model to save 74 | filename(str): location to safe the model 75 | """ 76 | joblib.dump(model, filename) 77 | 78 | 79 | def load_model(filename): 80 | """ 81 | load a model from disk. make sure that models only 82 | show up from version 0.18.1 of sklearn as other versions 83 | may not load correctly 84 | 85 | filename(str): name of file to load 86 | """ 87 | if sklearn.__version__ == '0.18.1': 88 | model = joblib.load(filename) 89 | return model 90 | else: 91 | print('upgrade sklearn to version 0.18.1') 92 | 93 | 94 | def test_svm(examples, clf_train, fd_test, eg_test): 95 | """ 96 | score the trained SVM against test features 97 | 98 | examples(int): number of examples to run 99 | clf_train(modle): model for evaluating testing data 100 | fd_test(featureDataset): testing dataset 101 | eg_test(experimentGenerator): generated experiments from testing dataset 102 | 103 | out(int): score from the model 104 | """ 105 | lessons_test = list() 106 | outcomes_test = list() 107 | 108 | for _ in tnrange(examples): 109 | cameras_test = eg_test.generate() 110 | match_id = get_match_id(cameras_test) 111 | goods, bads = make_good_bad(cameras_test, match_id) 112 | make_work(fd_test, lessons_test, outcomes_test, goods, 1) 113 | make_work(fd_test, lessons_test, outcomes_test, bads, 0) 114 | 115 | print('scoring') 116 | start = time.time() 117 | out = clf_train.score(lessons_test, outcomes_test) 118 | end = time.time() 119 | print('scoring took {} seconds'.format(end - start)) 120 | return out 121 | -------------------------------------------------------------------------------- /pelops/transform_img/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/transform_img/__init__.py -------------------------------------------------------------------------------- /pelops/transform_img/run.sh: -------------------------------------------------------------------------------- 1 | python3 transform.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | logger 5 | python-resize-image 6 | h5py 7 | imageio 8 | scikit-image 9 | keras 10 | tensorflow 11 | tqdm 12 | pytest 13 | hdfs3 14 | opencv-python 15 | docopt 16 | scikit-learn -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from setuptools import find_packages 3 | from setuptools import setup 4 | 5 | setup( 6 | name="Pelops", 7 | version="0.1.1", 8 | description="Car re-identification via deep learning", 9 | url="https://www.python.org/sigs/distutils-sig/", 10 | author="Lab41", 11 | author_email="lab41@iqt.org", 12 | license="Apache Software License", 13 | packages=find_packages(), 14 | classifiers=[ 15 | "Development Status :: 3 - Alpha", 16 | "Environment :: Console", 17 | "Intended Audience :: Science/Research", 18 | "License :: OSI Approved :: Apache Software License", 19 | "Natural Language :: English", 20 | "Operating System :: OS Independent", 21 | "Programming Language :: Python :: 3", 22 | "Programming Language :: Python :: 3.5", 23 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 24 | ], 25 | keywords=[ 26 | "computer vision", 27 | "deep learning", 28 | "resnet", 29 | "vehicle re-identification", 30 | ], 31 | ) 32 | -------------------------------------------------------------------------------- /testci/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called by the "install" step defined in 3 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 4 | # The behavior of the script is controlled by environment variabled defined 5 | # in the .travis.yml in the top level folder of the project. 6 | # 7 | # This script is adapted from a similar script from the scikit-learn repository. 8 | # Taken from: https://github.com/aabadie/nilearn/blob/circle_ci_anaconda/continuous_integration/install.sh 9 | # 10 | # License: 3-clause BSD 11 | 12 | set -e 13 | 14 | # Fix the compilers to workaround avoid having the Python 3.4 build 15 | # lookup for g++44 unexpectedly. 16 | export CC=gcc 17 | export CXX=g++ 18 | 19 | create_new_venv() { 20 | # At the time of writing numpy 1.9.1 is included in the travis 21 | # virtualenv but we want to be in control of the numpy version 22 | # we are using for example through apt-get install 23 | deactivate 24 | virtualenv --system-site-packages testvenv 25 | source testvenv/bin/activate 26 | pip install nose 27 | } 28 | 29 | print_conda_requirements() { 30 | # Echo a conda requirement string for example 31 | # "pip nose python='2.7.3 scikit-learn=*". It has a hardcoded 32 | # list of possible packages to install and looks at _VERSION 33 | # environment variables to know whether to install a given package and 34 | # if yes which version to install. For example: 35 | # - for numpy, NUMPY_VERSION is used 36 | # - for scikit-learn, SCIKIT_LEARN_VERSION is used 37 | TO_INSTALL_ALWAYS="pip nose" 38 | REQUIREMENTS="$TO_INSTALL_ALWAYS" 39 | TO_INSTALL_MAYBE="python numpy scipy matplotlib scikit-learn flake8" 40 | for PACKAGE in $TO_INSTALL_MAYBE; do 41 | # Capitalize package name and add _VERSION 42 | PACKAGE_VERSION_VARNAME="${PACKAGE^^}_VERSION" 43 | # replace - by _, needed for scikit-learn for example 44 | PACKAGE_VERSION_VARNAME="${PACKAGE_VERSION_VARNAME//-/_}" 45 | # dereference $PACKAGE_VERSION_VARNAME to figure out the 46 | # version to install 47 | PACKAGE_VERSION="${!PACKAGE_VERSION_VARNAME}" 48 | if [ -n "$PACKAGE_VERSION" ]; then 49 | REQUIREMENTS="$REQUIREMENTS $PACKAGE=$PACKAGE_VERSION" 50 | fi 51 | done 52 | echo $REQUIREMENTS 53 | } 54 | 55 | create_new_conda_env() { 56 | # Skip Travis related code on circle ci. 57 | if [ -z $CIRCLECI ]; then 58 | # Deactivate the travis-provided virtual environment and setup a 59 | # conda-based environment instead 60 | deactivate 61 | fi 62 | 63 | # Use the miniconda installer for faster download / install of conda 64 | # itself 65 | wget https://repo.continuum.io/miniconda/Miniconda3-4.2.11-Linux-x86_64.sh \ 66 | -O ~/miniconda.sh 67 | chmod +x ~/miniconda.sh && ~/miniconda.sh -b 68 | export PATH=$HOME/miniconda2/bin:$PATH 69 | echo $PATH 70 | conda update --quiet --yes conda 71 | 72 | # Configure the conda environment and put it in the path using the 73 | # provided versions 74 | REQUIREMENTS=$(print_conda_requirements) 75 | echo "conda requirements string: $REQUIREMENTS" 76 | conda create -n testenv --quiet --yes $REQUIREMENTS 77 | source activate testenv 78 | 79 | if [[ "$INSTALL_MKL" == "true" ]]; then 80 | # Make sure that MKL is used 81 | conda install --quiet --yes mkl 82 | elif [[ -z $CIRCLECI ]]; then 83 | # Travis doesn't use MKL but circle ci does for speeding up examples 84 | # generation in the html documentation. 85 | # Make sure that MKL is not used 86 | conda remove --yes --features mkl || echo "MKL not installed" 87 | fi 88 | } 89 | 90 | if [[ "$DISTRIB" == "conda" ]]; then 91 | create_new_conda_env 92 | pip install nose-timer 93 | # Note: nibabel is in setup.py install_requires so nibabel will 94 | # always be installed eventually. Defining NIBABEL_VERSION is only 95 | # useful if you happen to want a specific nibabel version rather 96 | # than the latest available one. 97 | if [ -n "$NIBABEL_VERSION" ]; then 98 | pip install nibabel=="$NIBABEL_VERSION" 99 | fi 100 | 101 | else 102 | echo "Unrecognized distribution ($DISTRIB); cannot setup CI environment." 103 | exit 1 104 | fi 105 | 106 | pip install psutil memory_profiler 107 | 108 | if [[ "$COVERAGE" == "true" ]]; then 109 | pip install coverage coveralls 110 | fi 111 | -------------------------------------------------------------------------------- /testci/small.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/testci/small.hdf5 -------------------------------------------------------------------------------- /testci/small.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "keras_version": "1.2.0", "config": [{"class_name": "Dense", "config": {"bias": true, "trainable": true, "W_regularizer": null, "input_dtype": "float32", "output_dim": 12, "name": "dense_8", "activation": "relu", "batch_input_shape": [null, 8], "init": "uniform", "activity_regularizer": null, "input_dim": 8, "b_constraint": null, "b_regularizer": null, "W_constraint": null}}]} -------------------------------------------------------------------------------- /testci/test_chip.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pelops.datasets.chip import ChipDataset, Chip 4 | 5 | 6 | @pytest.fixture 7 | def chips(): 8 | CHIPS = ( 9 | # filepath, car_id, cam_id, time, misc 10 | ("car1_cam1.png", 1, 1, 100, None), 11 | ("car1_cam2.png", 1, 2, 105, None), 12 | ("car1_cam3.png", 1, 3, 110, None), 13 | ("car2_cam1.png", 2, 1, 100, None), 14 | ("car2_cam2.png", 2, 1, 102, None), 15 | ("car2_cam3.png", 2, 1, 104, None), 16 | ) 17 | 18 | chips = {} 19 | for filepath, car_id, cam_id, time, misc in CHIPS: 20 | chip = Chip(filepath, car_id, cam_id, time, misc) 21 | chips[filepath] = chip 22 | 23 | return chips 24 | 25 | 26 | @pytest.fixture 27 | def chip_dataset(chips): 28 | """ Set up a instance of ChipDataset(). """ 29 | # Setup the class 30 | instantiated_class = ChipDataset(dataset_path="Test") 31 | 32 | # Monkey Patch in a fake chips dictionary 33 | instantiated_class.chips = chips 34 | 35 | return instantiated_class 36 | 37 | 38 | def test_chips_len(chip_dataset, chips): 39 | """ Test that ChipDataset.chips is the correct length """ 40 | assert len(chips) == len(chip_dataset) 41 | 42 | 43 | def get_all_function_tester(in_chips, in_chipbase, index, test_function): 44 | """ Check that a chip getting function gets all the correct chips. 45 | 46 | This function tests a chip getting function, such as 47 | `get_all_chips_by_carid()` by creating a list of every correct chip from 48 | the true list of chips, and comparing it to the list returned by the 49 | function. 50 | 51 | Args: 52 | in_chips: The output of chips() 53 | in_chipbase: The output of chipbase() 54 | index: The location of the id in the chips object to use to compare. 55 | 0 is the filepath (aka chip_id), 1 is the car_id, 2 is the cam_id. 56 | test_function: The function to test, it should return a list of chips 57 | selected by some id value. 58 | 59 | Returns: 60 | None 61 | """ 62 | seen_ids = [] 63 | for tup in in_chips.values(): 64 | test_id = tup[index] 65 | # Generate all the chips by hand, and compare 66 | if test_id in seen_ids: 67 | continue 68 | seen_ids.append(test_id) 69 | chips_list = [] 70 | for _, val in in_chipbase.chips.items(): 71 | if val[index] == test_id: 72 | chips_list.append(val) 73 | 74 | chips_list.sort() 75 | test_chips = sorted(test_function(test_id)) 76 | assert chips_list == test_chips 77 | 78 | 79 | def test_get_all_chips_by_car_id(chip_dataset, chips): 80 | """ Test ChipDataset.get_all_chips_by_carid() """ 81 | CAR_ID_INDEX = 1 82 | get_all_function_tester(chips, chip_dataset, CAR_ID_INDEX, 83 | chip_dataset.get_all_chips_by_car_id) 84 | 85 | 86 | def test_get_all_chips_by_cam_id(chip_dataset, chips): 87 | """ Test ChipDataset.get_all_chips_by_camid() """ 88 | CAM_ID_INDEX = 2 89 | get_all_function_tester(chips, chip_dataset, CAM_ID_INDEX, 90 | chip_dataset.get_all_chips_by_cam_id) 91 | 92 | 93 | def test_get_distinct_cams_by_car_id(chip_dataset): 94 | """ Test ChipDataset.get_distinct_cams_by_car_id() and get_distinct_cams_per_car() """ 95 | CAR_ID = 1 96 | TEST_CAMS = [1, 2, 3] 97 | for test_cam, cam in zip(TEST_CAMS, sorted(chip_dataset.get_distinct_cams_by_car_id(CAR_ID))): 98 | assert test_cam == cam 99 | 100 | 101 | def test_get_all_cam_ids(chip_dataset): 102 | """ Test ChipDataset.get_all_cam_ids() """ 103 | TEST_CAMS = [1, 2, 3] 104 | for test_cam, cam in zip(TEST_CAMS, sorted(chip_dataset.get_all_cam_ids())): 105 | assert test_cam == cam 106 | 107 | 108 | def test_get_all_car_ids(chip_dataset): 109 | TEST_CARS = [1, 2] 110 | for test_car, car in zip (TEST_CARS, sorted(chip_dataset.get_all_car_ids())): 111 | assert test_car == car 112 | 113 | 114 | def test_chipdataset_iter(chip_dataset, chips): 115 | """ Test iteration over ChipDataset() """ 116 | for chip in chip_dataset: 117 | assert chip in chips.values() 118 | -------------------------------------------------------------------------------- /testci/test_chipper.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import datetime as dt 4 | 5 | # OpenCV is *VERY* hard to install in CircleCI, so if we don't have it, skip these tests 6 | cv2 = pytest.importorskip("cv2") # Skip all tests if not found 7 | from pelops.datasets.chipper import FrameProducer 8 | 9 | 10 | @pytest.fixture 11 | def frame_time_fp(tmpdir): 12 | # Define a FrameProducer with just enough information to run __get_frame_time() 13 | ifp = FrameProducer( 14 | file_list = [], 15 | ) 16 | ifp.vid_metadata = {"fps": 30} 17 | 18 | return ifp 19 | 20 | 21 | @pytest.fixture 22 | def frame_time_fp_data(tmpdir): 23 | # Data to test __get_frame_time() 24 | DATA = ( 25 | # (filename, frame number), (answer) 26 | (("/foo/bar/baz_20000101T000000-00000-006000.mp4", 0), dt.datetime(2000, 1, 1)), 27 | (("/foo/bar/baz_20000101T000000-00600-012000.mp4", 0), dt.datetime(2000, 1, 1, 0, 10)), 28 | (("/foo/bar/baz_20000101T000000-00000-006000.mp4", 1), dt.datetime(2000, 1, 1, 0, 0, 0, 33333)), 29 | (("/foo/bar/baz_20000101T000000-00600-012000.mp4", 10), dt.datetime(2000, 1, 1, 0, 10, 0, 333333)), 30 | ) 31 | return DATA 32 | 33 | 34 | def test_get_frame_time(frame_time_fp, frame_time_fp_data): 35 | for input, answer in frame_time_fp_data: 36 | output = frame_time_fp._FrameProducer__get_frame_time(input[0], input[1]) 37 | assert output == answer 38 | -------------------------------------------------------------------------------- /testci/test_dgcars.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import os.path 4 | import json 5 | 6 | import pelops.utils as utils 7 | from pelops.datasets.dgcars import DGCarsDataset 8 | from pelops.datasets.chip import Chip 9 | from pelops.utils import SetType 10 | 11 | 12 | @pytest.fixture 13 | def dgcars(tmpdir): 14 | # Define some test and training data, all will be the sum 15 | TRAIN = [ 16 | {"url": "http://example.com/img.jpg", "hash": "2a8cedfa145b4345aed3fd9e82796c3e", "resnet50": "minivan", "model": "ZX2", "filename": "black/Ford/2a8cedfa145b4345aed3fd9e82796c3e.jpg", "make": "Ford", "color": "black"}, 17 | {"url": "http://example.com/img.jpg", "hash": "8241daf452ace679162c69386f26ddc7", "resnet50": "sports_car", "model": "Mazda6 Sport", "filename": "red/Mazda/8241daf452ace679162c69386f26ddc7.jpg", "make": "Mazda", "color": "red"}, 18 | {"url": "http://example.com/img.jpg", "hash": "e8dc3fb78206b14fe3568c1b28e5e5a1", "resnet50": "cab", "model": "XJ Series", "filename": "yellow/Jaguar/e8dc3fb78206b14fe3568c1b28e5e5a1.jpg", "make": "Jaguar", "color": "yellow"}, 19 | ] 20 | TEST = [ 21 | {"url": "http://example.com/img.jpg", "hash": "8881e7b561393f1d778a70dd449433e9", "resnet50": "racer", "model": "IS F", "filename": "yellow/Lexus/8881e7b561393f1d778a70dd449433e9.jpg", "make": "Lexus", "color": "yellow"}, 22 | {"url": "http://example.com/img.jpg", "hash": "38e857d5235afda4315676c0b7756832", "resnet50": "pickup", "model": "Mark VII", "filename": "silver/Lincoln/38e857d5235afda4315676c0b7756832.jpg", "make": "Lincoln", "color": "silver"}, 23 | {"url": "http://example.com/img.jpg", "hash": "6eb2b407cc398e70604bfd336bb2efad", "resnet50": "pickup", "model": "Lightning", "filename": "orange/Ford/6eb2b407cc398e70604bfd336bb2efad.jpg", "make": "Ford", "color": "orange"}, 24 | {"url": "http://example.com/img.jpg", "hash": "eb3811772ec012545c8952d88906d355", "resnet50": "racer", "model": "Rockette", "filename": "green/Fairthorpe/eb3811772ec012545c8952d88906d355.jpg", "make": "Fairthorpe", "color": "green"}, 25 | {"url": "http://example.com/img.jpg", "hash": "8dbbc1d930c7f2e4558efcc596728945", "resnet50": "minivan", "model": "S70", "filename": "white/Volvo/8dbbc1d930c7f2e4558efcc596728945.jpg", "make": "Volvo", "color": "white"}, 26 | {"url": "http://example.com/img.jpg", "hash": "ed45784812d1281bcb61f217f4422ab5", "resnet50": "convertible", "model": "A8", "filename": "green/Audi/ed45784812d1281bcb61f217f4422ab5.jpg", "make": "Audi", "color": "green"}, 27 | {"url": "http://example.com/img.jpg", "hash": "763ca4abbbb9b042b21f19fd80986179", "resnet50": "pickup", "model": "W126", "filename": "green/Mercedes-Benz/763ca4abbbb9b042b21f19fd80986179.jpg", "make": "Mercedes-Benz", "color": "green"}, 28 | ] 29 | 30 | WRITE_LIST = ( 31 | # filename, data list, settype 32 | ("allFiles", TRAIN + TEST, SetType.ALL), 33 | ("training", TRAIN, SetType.TRAIN), 34 | ("testing", TEST, SetType.TEST), 35 | ) 36 | 37 | output_chips = { 38 | SetType.ALL: [], 39 | SetType.TRAIN: [], 40 | SetType.TEST: [], 41 | } 42 | for filename, data_list, settype in WRITE_LIST: 43 | fn = tmpdir.join(filename) 44 | with open(fn.strpath, "w") as f: 45 | for d in data_list: 46 | # Write the data list files 47 | line = json.dumps(d) 48 | f.write(line + "\n") 49 | 50 | # Make a chip 51 | fp = os.path.join(tmpdir.strpath, d["filename"]) 52 | chip = Chip(fp, None, None, None, d) 53 | output_chips[settype].append(chip) 54 | 55 | # Instantiate a DGCarsDataset() class 56 | output_classes = { 57 | SetType.ALL: DGCarsDataset(tmpdir.strpath, SetType.ALL), 58 | SetType.TRAIN: DGCarsDataset(tmpdir.strpath, SetType.TRAIN), 59 | SetType.TEST: DGCarsDataset(tmpdir.strpath, SetType.TEST), 60 | } 61 | 62 | return (output_classes, output_chips) 63 | 64 | 65 | def test_dgcars_chips_len(dgcars): 66 | classes = dgcars[0] 67 | answer_chips = dgcars[1] 68 | # check that self.chips has been created, is not empty, and has the right 69 | # number of entries 70 | for key, cls in classes.items(): 71 | ans = answer_chips[key] 72 | assert len(cls.chips) == len(ans) 73 | 74 | def test_dgcars_chips_vals(dgcars): 75 | classes = dgcars[0] 76 | answer_chips = dgcars[1] 77 | 78 | for key, cls in classes.items(): 79 | ans = answer_chips[key] 80 | for chip in cls: 81 | # The chip must match one of our hand built chips 82 | assert chip in ans 83 | # Various values are None 84 | assert chip.car_id is None 85 | assert chip.cam_id is None 86 | assert chip.time is None 87 | # Misc and filepath should exist 88 | assert chip.filepath 89 | assert chip.misc 90 | # Misc is a dictionary like object 91 | assert hasattr(chip.misc, "get") 92 | 93 | 94 | def test_get_all_chips_by_car_id(dgcars): 95 | classes = dgcars[0] 96 | answer_chips = dgcars[1] 97 | 98 | for key, cls in classes.items(): 99 | ans = answer_chips[key] 100 | 101 | # All car_id values are None in DG Cars 102 | all_chips = sorted(cls.get_all_chips_by_car_id(None)) 103 | assert all_chips == sorted(ans) 104 | 105 | 106 | def test_get_all_chips_by_cam_id(dgcars): 107 | classes = dgcars[0] 108 | answer_chips = dgcars[1] 109 | 110 | for key, cls in classes.items(): 111 | ans = answer_chips[key] 112 | 113 | # All cam_id values are None in DG Cars 114 | all_chips = sorted(cls.get_all_chips_by_cam_id(None)) 115 | assert all_chips == sorted(ans) 116 | 117 | 118 | def test_get_distinct_cams_by_car_id(dgcars): 119 | classes = dgcars[0] 120 | answer_chips = dgcars[1] 121 | 122 | for key, cls in classes.items(): 123 | ans = answer_chips[key] 124 | 125 | # All car_id values are None in DG Cars 126 | assert cls.get_distinct_cams_by_car_id(None) == {None} 127 | 128 | 129 | def test_get_all_cam_ids(dgcars): 130 | classes = dgcars[0] 131 | answer_chips = dgcars[1] 132 | 133 | for key, cls in classes.items(): 134 | ans = answer_chips[key] 135 | 136 | # All cam_id values are None in DG Cars 137 | assert cls.get_all_cam_ids() == [None] 138 | 139 | 140 | def test_get_all_car_ids(dgcars): 141 | classes = dgcars[0] 142 | answer_chips = dgcars[1] 143 | 144 | for key, cls in classes.items(): 145 | ans = answer_chips[key] 146 | 147 | # All car_id values are None in DG Cars 148 | assert cls.get_all_car_ids() == [None] 149 | 150 | 151 | def test_dgcars_iter(dgcars): 152 | classes = dgcars[0] 153 | answer_chips = dgcars[1] 154 | 155 | for key, cls in classes.items(): 156 | ans = answer_chips[key] 157 | 158 | # Ensure that we can iterate and get all of the items 159 | for chip in cls: 160 | assert chip in ans 161 | 162 | # Ensure list can access the iterator, and that there are no extra 163 | # chips 164 | cls_chips = list(cls) 165 | for chip in ans: 166 | assert chip in cls_chips 167 | -------------------------------------------------------------------------------- /testci/test_experiment_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pelops.utils as utils 4 | 5 | 6 | def test_SetType(): 7 | vals = utils.SetType.__members__ 8 | assert 'ALL' in vals 9 | assert 'QUERY' in vals 10 | assert 'TEST' in vals 11 | assert 'TRAIN' in vals 12 | 13 | 14 | def test_get_index_of_tuple(): 15 | TEST_LIST = [ 16 | (0, 'Who', 'John'), 17 | (1, 'What', 'Pizza'), 18 | (2, 'Where', 'Little Caesar'), 19 | (3, 'When', 'Noon'), 20 | (4, 'How', 'Eat'), 21 | (5, None, None), 22 | ] 23 | 24 | # Test that we can find ints, strings, and Nones 25 | assert 1 == utils.get_index_of_tuple(TEST_LIST, 0, 0) 26 | assert 2 == utils.get_index_of_tuple(TEST_LIST, 1, 'What') 27 | assert 6 == utils.get_index_of_tuple(TEST_LIST, 1, None) 28 | 29 | # Test that we report the last position if we don't find an answer 30 | assert len(TEST_LIST) == utils.get_index_of_tuple( 31 | TEST_LIST, 0, 'NOT THERE') 32 | 33 | def test_get_index_of_pairs(): 34 | TEST_LIST = [ 35 | (0, 0, 'Mozart'), 36 | (1, 'Twinkle', 'Twinkle'), 37 | (2, 'Where', 'Little Caesar'), 38 | (3, 'When', 'Noon'), 39 | (4, 'How', 'Eat'), 40 | (5, None, None), 41 | ] 42 | 43 | # Test that we can find ints, strings, and Nones 44 | assert 1 == utils.get_index_of_pairs(TEST_LIST, 0, 1, 0) 45 | assert 2 == utils.get_index_of_pairs(TEST_LIST, 1, 2, 'Twinkle') 46 | assert 6 == utils.get_index_of_pairs(TEST_LIST, 1, 2, None) 47 | 48 | # Test that we report the last position if we don't find an answer 49 | assert len(TEST_LIST) == utils.get_index_of_pairs( 50 | TEST_LIST, 0, 1, 'NOT THERE') 51 | 52 | 53 | def test_get_basename(): 54 | TEST_FILEPATHS = ( 55 | ("/path/to/file/hello.py", "hello.py"), 56 | ("hello.py", "hello.py") 57 | ) 58 | 59 | for test_input, answer in TEST_FILEPATHS: 60 | assert answer == utils.get_basename(test_input) 61 | 62 | 63 | def test_get_numeric(): 64 | TEST_STRINGS = ( 65 | ('c002.jpg', '002'), 66 | ('_012_', '012'), 67 | ) 68 | 69 | for test_input, answer in TEST_STRINGS: 70 | assert answer == utils.get_numeric(test_input) 71 | 72 | 73 | def test_get_timestamp(): 74 | assert "2012-09-16 12:03:04" == str(utils.get_timestamp(datetime.datetime(2012, 9, 16, 12, 3, 4))) 75 | assert 1 == utils.get_timestamp(1) 76 | assert "Saturday" == utils.get_timestamp("Saturday") 77 | 78 | 79 | def test_should_drop(): 80 | # Never drop 81 | assert utils.should_drop(1.) is True 82 | # Always drop 83 | assert utils.should_drop(0.) is False 84 | -------------------------------------------------------------------------------- /testci/test_featuredataset.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pytest 3 | 4 | import numpy as np 5 | from pelops.datasets.chip import ChipDataset, Chip 6 | from pelops.datasets.featuredataset import FeatureDataset 7 | 8 | FEAT_LENGTH = 2048 9 | 10 | @pytest.fixture 11 | def chips(): 12 | CHIPS = ( 13 | # filepath, car_id, cam_id, time, misc 14 | ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=100), {}), 15 | ("car1_cam2.png", 1, 2, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=105), {}), 16 | ("car1_cam3.png", 1, 3, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=110), {}), 17 | ("car2_cam1.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=100), {}), 18 | ("car2_cam2.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=102), {}), 19 | ("car2_cam3.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=104), {}), 20 | ) 21 | 22 | chips = {} 23 | for filepath, car_id, cam_id, time, misc in CHIPS: 24 | chip = Chip(filepath, car_id, cam_id, time, misc) 25 | chips[filepath] = chip 26 | 27 | return chips 28 | 29 | @pytest.fixture 30 | def feature_dataset(chips, tmpdir): 31 | OUTPUT_FNAME = tmpdir.join("test_feature_dataset.hdf5").strpath 32 | feat_data = np.random.random((len(chips), FEAT_LENGTH)) 33 | FeatureDataset.save(OUTPUT_FNAME, list(chips.keys()), list(chips.values()), feat_data) 34 | return FeatureDataset(OUTPUT_FNAME) 35 | 36 | def test_get_feats(chips, feature_dataset): 37 | chip_key = next(iter(chips)) 38 | chip = chips[chip_key] 39 | assert len(feature_dataset.get_feats_for_chip(chip)) == FEAT_LENGTH 40 | 41 | def test_load_save(chips, feature_dataset): 42 | chip_key = next(iter(chips)) 43 | assert feature_dataset.chips[chip_key] == chips[chip_key] 44 | -------------------------------------------------------------------------------- /testci/test_featureproducer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import datetime 3 | import pytest 4 | import numpy as np 5 | from PIL import Image 6 | 7 | from pelops.features.feature_producer import FeatureProducer 8 | 9 | 10 | @pytest.fixture 11 | def img_data(): 12 | DATA = [[[ 0, 0, 0], 13 | [255, 255, 255], 14 | [ 0, 0, 0]], 15 | [[255, 255, 255], 16 | [ 0, 0, 0], 17 | [255, 255, 255]], 18 | [[ 0, 0, 0], 19 | [255, 255, 255], 20 | [ 0, 0, 0]]] 21 | return np.array(DATA, dtype=np.uint8) 22 | 23 | 24 | @pytest.fixture 25 | def chip_producer(img_data): 26 | Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"]) 27 | ChipProducer = collections.namedtuple("ChipProducer", ["chips"]) 28 | CHIPS = ( 29 | # filepath, car_id, cam_id, time, img_data, misc 30 | ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100), img_data, {}), 31 | ) 32 | 33 | chip_producer = ChipProducer({}) 34 | for filepath, car_id, cam_id, time, img_data, misc in CHIPS: 35 | print(img_data.shape) 36 | chip = Chip(filepath, car_id, cam_id, time, img_data, misc) 37 | chip_producer.chips[filepath] = chip 38 | 39 | return chip_producer 40 | 41 | 42 | @pytest.fixture 43 | def monkey_feature_producer(chip_producer): 44 | # Monkey patch the __init__() function so that it will succeed 45 | def new_init(self, chip_producer): 46 | self.chip_producer = chip_producer 47 | self.feat_size = 1 48 | 49 | FeatureProducer.__init__ = new_init 50 | 51 | return FeatureProducer(chip_producer) 52 | 53 | 54 | def test_set_variables_raises(): 55 | with pytest.raises(NotImplementedError): 56 | fp = FeatureProducer(None) 57 | 58 | 59 | def test_produce_features_raises(monkey_feature_producer): 60 | with pytest.raises(NotImplementedError): 61 | monkey_feature_producer.produce_features(None) 62 | 63 | 64 | def test_get_image_img_data(monkey_feature_producer, chip_producer, img_data): 65 | for key, chip in chip_producer.chips.items(): 66 | image = monkey_feature_producer.get_image(chip) 67 | image_array = np.array(image) 68 | assert np.array_equal(img_data, np.array(image)) 69 | 70 | 71 | def test_return_features_raises(monkey_feature_producer): 72 | with pytest.raises(NotImplementedError): 73 | monkey_feature_producer.return_features() 74 | -------------------------------------------------------------------------------- /testci/test_hog_feature.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from skimage import color 3 | from skimage.feature import hog 4 | import collections 5 | import datetime 6 | import numpy as np 7 | import pytest 8 | from itertools import product 9 | 10 | from pelops.features.hog import HOGFeatureProducer 11 | 12 | def hog_features(img): 13 | img = color.rgb2gray(np.array(img)) 14 | features = hog(img, orientations=8, pixels_per_cell=(14, 14), cells_per_block=(16, 16)) 15 | return features 16 | 17 | 18 | def hist_features(img): 19 | MAX_CHANNELS = 3 20 | BINS = 256 21 | 22 | channels = img.split() 23 | 24 | # Remove alpha channels 25 | if len(channels) > MAX_CHANNELS: 26 | channels = channel[:MAX_CHANNELS] 27 | 28 | # Calculate features 29 | hist_features = np.zeros(MAX_CHANNELS * BINS) 30 | for i, channel in enumerate(channels): 31 | channel_array = np.array(channel) 32 | values, _ = np.histogram(channel_array.flat, bins=BINS) 33 | start = i * BINS 34 | end = (i+1) * BINS 35 | hist_features[start:end] = values 36 | 37 | return hist_features 38 | 39 | 40 | @pytest.fixture(scope="module") 41 | def img_data(): 42 | data = { 43 | "DATA_1":{}, 44 | "DATA_3":{}, 45 | "DATA_4":{}, 46 | } 47 | 48 | # Raw data 49 | data["DATA_1"]["array"] = np.array([ 50 | [[ 0, 0, 0], 51 | [255, 255, 255], 52 | [ 0, 0, 0]], 53 | ], dtype=np.uint8) 54 | 55 | data["DATA_3"]["array"] = np.array([ 56 | [[ 0, 0, 0], 57 | [255, 255, 255], 58 | [ 0, 0, 0]], 59 | [[255, 255, 255], 60 | [ 0, 0, 0], 61 | [255, 255, 255]], 62 | [[ 0, 0, 0], 63 | [255, 255, 255], 64 | [ 0, 0, 0]], 65 | ], dtype=np.uint8) 66 | 67 | data["DATA_4"]["array"] = np.array([ 68 | [[ 0, 0, 0], 69 | [255, 255, 255], 70 | [ 0, 0, 0]], 71 | [[255, 255, 255], 72 | [ 0, 0, 0], 73 | [255, 255, 255]], 74 | [[ 0, 0, 0], 75 | [255, 255, 255], 76 | [ 0, 0, 0]], 77 | [[ 0, 0, 0], 78 | [ 0, 0, 0], 79 | [ 0, 0, 0]], 80 | ], dtype=np.uint8) 81 | 82 | # PIL images 83 | for data_id in data: 84 | arr = data[data_id]["array"] 85 | img = Image.fromarray(arr) 86 | img = img.convert("RGB") 87 | img = img.resize((224, 224), Image.BICUBIC) 88 | data[data_id]["image"] = img 89 | 90 | # Calculate HOG features 91 | for data_id in data: 92 | img = data[data_id]["image"] 93 | hog = hog_features(img) 94 | data[data_id]["hog_features"] = hog 95 | 96 | # Calculate Histogram features 97 | for data_id in data: 98 | img = data[data_id]["image"] 99 | hist = hist_features(img) 100 | data[data_id]["hist_features"] = hist 101 | 102 | return data 103 | 104 | 105 | @pytest.fixture 106 | def chip_producer(img_data): 107 | Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"]) 108 | CHIPS = [] 109 | for i, data_id in enumerate(img_data): 110 | data = img_data[data_id] 111 | arr = data["array"] 112 | # We use the data_id as the filepath since we do not actually open the 113 | # file and it only needs to be unique 114 | # 115 | # filepath, car_id, cam_id, time, img_data, misc 116 | chip = (data_id, i, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100+i), arr, {}) 117 | CHIPS.append(chip) 118 | 119 | chip_producer = {"chips": {}} 120 | for filepath, car_id, cam_id, time, data, misc in CHIPS: 121 | chip = Chip(filepath, car_id, cam_id, time, data, misc) 122 | chip_producer["chips"][filepath] = chip 123 | 124 | return chip_producer 125 | 126 | 127 | @pytest.fixture 128 | def feature_producer(chip_producer): 129 | hog = HOGFeatureProducer(chip_producer) 130 | 131 | return hog 132 | 133 | 134 | def test_features(feature_producer, chip_producer, img_data): 135 | fp = feature_producer 136 | 137 | for _, chip in chip_producer["chips"].items(): 138 | data_id = chip.filepath 139 | data = img_data[data_id] 140 | hog_features = data["hog_features"] 141 | hist_features = data["hist_features"] 142 | hog_len = len(hog_features) 143 | hist_len = len(hist_features) 144 | 145 | features = feature_producer.produce_features(chip) 146 | assert len(features) == hog_len + hist_len 147 | 148 | total_features = np.concatenate((hog_features, hist_features)) 149 | assert np.array_equal(features, total_features) 150 | 151 | 152 | def test_inputs(chip_producer): 153 | pix_sizes = (32, 64, 128, 256, 512) 154 | cell_counts = (1, 2, 4, 16) 155 | orientation_counts = (2, 4, 8, 16) 156 | histogram_bins = (32, 64, 128, 256) 157 | for pix, cell, orientation, histogram_bin in product(pix_sizes, cell_counts, orientation_counts, histogram_bins): 158 | hog = HOGFeatureProducer( 159 | chip_producer, 160 | image_size=(pix, pix), 161 | cells=(cell, cell), 162 | orientations=orientation, 163 | histogram_bins_per_channel=histogram_bin, 164 | ) 165 | for _, chip in chip_producer["chips"].items(): 166 | features = hog.produce_features(chip) 167 | assert len(features) == ((cell**2) * orientation) + (3 * histogram_bin) 168 | -------------------------------------------------------------------------------- /testci/test_keras_load_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from pelops.features.keras_model import KerasModelFeatureProducer 6 | 7 | 8 | def test_load_model_workaround(): 9 | # @TODO get some environment variable set when in CI environment 10 | # test to see, modify path... 11 | if os.getenv('CIRCLECI', None) is not None: 12 | model_filename = '/home/ubuntu/pelops/testci/small.json' 13 | weight_filename = '/home/ubuntu/pelops/testci/small.hdf5' 14 | if os.getenv('INDOCKERCONTAINER', None) is not None: 15 | model_filename = '/pelops_root/testci/small.json' 16 | weight_filename = '/pelops_root/testci/small.hdf5' 17 | 18 | model = KerasModelFeatureProducer.load_model_workaround( 19 | model_filename, weight_filename) 20 | assert model.layers[0].name == 'dense_8' 21 | -------------------------------------------------------------------------------- /testci/test_keras_model_feature.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import collections 3 | import datetime 4 | import numpy as np 5 | import pytest 6 | 7 | from pelops.features.keras_model import KerasModelFeatureProducer 8 | 9 | 10 | @pytest.fixture 11 | def img_data(): 12 | DATA = [[[ 0, 0, 0], 13 | [255, 255, 255], 14 | [ 0, 0, 0]], 15 | [[255, 255, 255], 16 | [ 0, 0, 0], 17 | [255, 255, 255]], 18 | [[ 0, 0, 0], 19 | [255, 255, 255], 20 | [ 0, 0, 0]]] 21 | return np.array(DATA, dtype=np.uint8) 22 | 23 | 24 | def test_preprocess_image(img_data): 25 | img = Image.fromarray(img_data) 26 | img_resized = KerasModelFeatureProducer.preprocess_image(img, 224, 224) 27 | assert img_resized.shape == (1, 224, 224, 3) 28 | -------------------------------------------------------------------------------- /testci/test_resnet50_feature.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import collections 3 | import datetime 4 | import numpy as np 5 | import pytest 6 | 7 | from pelops.features.resnet50 import ResNet50FeatureProducer 8 | 9 | 10 | @pytest.fixture 11 | def img_data(): 12 | DATA = [[[ 0, 0, 0], 13 | [255, 255, 255], 14 | [ 0, 0, 0]], 15 | [[255, 255, 255], 16 | [ 0, 0, 0], 17 | [255, 255, 255]], 18 | [[ 0, 0, 0], 19 | [255, 255, 255], 20 | [ 0, 0, 0]]] 21 | return np.array(DATA, dtype=np.uint8) 22 | 23 | 24 | @pytest.fixture 25 | def chip_producer(img_data): 26 | Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"]) 27 | CHIPS = ( 28 | # filepath, car_id, cam_id, time, img_data, misc 29 | ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100), img_data, {}), 30 | ) 31 | 32 | chip_producer = {"chips": {}} 33 | for filepath, car_id, cam_id, time, img_data, misc in CHIPS: 34 | chip = Chip(filepath, car_id, cam_id, time, img_data, misc) 35 | chip_producer["chips"][filepath] = chip 36 | 37 | return chip_producer 38 | 39 | 40 | @pytest.fixture 41 | def feature_producer(chip_producer): 42 | res = ResNet50FeatureProducer(chip_producer) 43 | return res 44 | 45 | 46 | def test_features(feature_producer, chip_producer): 47 | for _, chip in chip_producer["chips"].items(): 48 | features = feature_producer.produce_features(chip) 49 | assert features.shape == (1, 2048) 50 | assert np.sum(features) != 0 51 | 52 | 53 | def test_preprocess_image(feature_producer, img_data): 54 | img = Image.fromarray(img_data) 55 | img_resized = feature_producer.preprocess_image(img, 224, 224) 56 | assert img_resized.shape == (1, 224, 224, 3) 57 | -------------------------------------------------------------------------------- /testci/test_slice.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import datetime 3 | import io 4 | 5 | import pytest 6 | 7 | import pelops.datasets.slice as slice 8 | 9 | 10 | @pytest.fixture 11 | def slice_env(tmpdir): 12 | """Setup mock STR SLiCE dataset""" 13 | work_dir = tmpdir.mkdir('pelops_testing') 14 | truth = [ 15 | ['% obSetIdx', ' chipIdx', ' targetID'], 16 | ['1', ' 1', '0'], 17 | ['1', ' 2', '1'], 18 | ['1', ' 3', '0'], 19 | ['2', ' 1', '1'], 20 | ['100', ' 1', '2'], 21 | ] 22 | 23 | truth_file = work_dir.join('truth.txt') 24 | with io.StringIO(newline='') as truth_hdl: 25 | csv.writer(truth_hdl).writerows(truth) 26 | truth_hdl.seek(0) 27 | truth_file.write(truth_hdl.read()) 28 | 29 | for obset, chipid in {(row[0], row[1].strip()) for row in truth[1:]}: 30 | obset_dir = work_dir.join('ObSet00{}_1492560663_TestDir'.format(obset)) 31 | obset_dir.ensure(dir=True) 32 | img_dir = obset_dir.join('images') 33 | img_dir.ensure(dir=True) 34 | img_file = img_dir.join('ObSet001-00{}.png'.format(chipid)) 35 | img_file.ensure(dir=False) 36 | 37 | yield work_dir.strpath 38 | 39 | 40 | def test_slice_chip_load(slice_env): 41 | """Test that SLiCE chips load without error""" 42 | slice_dataset = slice.SliceDataset(slice_env) 43 | assert len(slice_dataset.chips) == 5 44 | 45 | 46 | def test_slice_chip_tgt_car_id(slice_env): 47 | """Test that SLiCE chips for target vehicles are processed properly.""" 48 | slice_dataset = slice.SliceDataset(slice_env) 49 | target_ids = [chip.car_id for chip in slice_dataset.chips.values() if chip.car_id.startswith('tgt-')] 50 | assert 'tgt-000000001' in target_ids 51 | assert len(target_ids) == 3 52 | assert len(set(target_ids)) == 2 53 | 54 | 55 | def test_slice_chip_unk_car_id(slice_env): 56 | """Test that SLiCE chips for non-target vehicles are processed properly.""" 57 | slice_dataset = slice.SliceDataset(slice_env) 58 | unk_ids = [chip.car_id for chip in slice_dataset.chips.values() if chip.car_id.startswith('unk-')] 59 | assert 'unk-000000001' in unk_ids 60 | assert len(unk_ids) == 2 61 | 62 | 63 | def test_slice_chip_dtg(slice_env): 64 | """Test that date/times encoded in filenames are processed properly.""" 65 | slice_dataset = slice.SliceDataset(slice_env) 66 | dtgs = {datetime.datetime.fromtimestamp(float(chip.time)).isoformat() for chip in slice_dataset.chips.values()} 67 | assert len(dtgs) == 1 68 | 69 | 70 | def test_slice_index_chip(): 71 | TRUTH = ( 72 | # STR like chip 73 | ( 74 | "ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png", 75 | ( 76 | (9, 14), 77 | { 78 | 'file': "ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png", 79 | 'meta': { 80 | 'obSetName': "IH37_Jones", 81 | 'epoch': "1473015765", 82 | }, 83 | }, 84 | ), 85 | ), 86 | # STR like chip 87 | ( 88 | "/root/data/stuff/ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png", 89 | ( 90 | (9, 14), 91 | { 92 | 'file': "/root/data/stuff/ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png", 93 | 'meta': { 94 | 'obSetName': "IH37_Jones", 95 | 'epoch': "1473015765", 96 | }, 97 | }, 98 | ), 99 | ), 100 | # SLICE like chip 101 | ( 102 | "ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg", 103 | ( 104 | (101, 1), 105 | { 106 | 'file': "ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg", 107 | 'meta': { 108 | 'obSetName': "day5_camera3", 109 | 'epoch': "1473101743", 110 | }, 111 | }, 112 | ), 113 | ), 114 | # SLICE like chip 115 | ( 116 | "/test/test/data/ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg", 117 | ( 118 | (101, 1), 119 | { 120 | 'file': "/test/test/data/ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg", 121 | 'meta': { 122 | 'obSetName': "day5_camera3", 123 | 'epoch': "1473101743", 124 | }, 125 | }, 126 | ), 127 | ), 128 | # Special cases 129 | ("/test/test/truth.txt", None), 130 | ("/test/masks/image_mask.png", None), 131 | ) 132 | 133 | for file_path, answer in TRUTH: 134 | assert answer == slice.SliceDataset.index_chip(file_path) 135 | -------------------------------------------------------------------------------- /testci/test_str.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | 4 | from pelops.datasets.str import get_sa_cam_id 5 | from pelops.datasets.str import get_sa_car_id 6 | from pelops.datasets.str import int_from_string 7 | from pelops.datasets.str import StrDataset 8 | 9 | 10 | @pytest.fixture 11 | def str_sa(tmpdir): 12 | """ Set up some test files and an instance of StrDataset(). """ 13 | # Write a file to read back 14 | FILE_NAMES = ( 15 | # filepath, car_id, cam_id, time, misc 16 | ("match00001_cam02.png", 1, 2, None, None), 17 | ("match00001_cam01_mask.png", None, None, None, None), 18 | ("match00010_cam01.png", 10, 1, None, None), 19 | ("match00011_cam02_mask.png", None, None, None, None) 20 | ) 21 | # The contents of the files do not matter, the name is enough 22 | internal_dir = tmpdir.mkdir("crossCameraMatches") 23 | for name, _, _, _, _ in FILE_NAMES: 24 | out_file = internal_dir.join(name) 25 | out_file.write("TEST") 26 | 27 | # Setup the class 28 | instantiated_class = StrDataset(os.path.dirname(out_file.dirname)) 29 | 30 | # Rename filepath 31 | FILE_NAMES = ( 32 | (os.path.join(out_file.dirname, "match00001_cam02.png"), 1, 2, None, None), 33 | (os.path.join(out_file.dirname, "match00001_cam01_mask.png"), None, None, None, None), 34 | (os.path.join(out_file.dirname, "match00010_cam01.png"), 10, 1, None, None), 35 | (os.path.join(out_file.dirname, "match00011_cam02_mask.png"), None, None, None, None) 36 | ) 37 | 38 | # Filter out the files that were not read 39 | RET_FILE_NAMES = tuple(t for t in FILE_NAMES if t[1] is not None) 40 | return (instantiated_class, RET_FILE_NAMES) 41 | 42 | 43 | def test_str_sa_chips_len(str_sa): 44 | """ Test that StrDataset.chips is the correct length """ 45 | instantiated_class = str_sa[0] 46 | FILE_NAMES = str_sa[1] 47 | # check that self.chips has been created, is not empty, and has the right 48 | # number of entries 49 | assert len(FILE_NAMES) 50 | assert len(FILE_NAMES) == len(instantiated_class.chips) 51 | 52 | 53 | def test_str_sa_chips_vals(str_sa): 54 | """ Test that StrDataset chips have the correct values. """ 55 | instantiated_class = str_sa[0] 56 | FILE_NAMES = str_sa[1] 57 | 58 | # Check that the correct chips exist 59 | for filepath, car_id, cam_id, time, misc in FILE_NAMES: 60 | chip = instantiated_class.chips[filepath] 61 | assert car_id == chip.car_id 62 | assert cam_id == chip.cam_id 63 | # No time data 64 | assert chip.time is None 65 | # No misc data 66 | assert chip.misc is None 67 | # Filepath should be filled 68 | assert chip.filepath 69 | 70 | 71 | def test_get_all_chips_by_car_id(str_sa): 72 | """ Test StrDataset.get_all_chips_by_car_id() """ 73 | instantiated_class = str_sa[0] 74 | FILE_NAMES = str_sa[1] 75 | 76 | seen_ids = [] 77 | for filepath, car_id, cam_id, time, misc in FILE_NAMES: 78 | # Generate all the chips by hand, and compare 79 | if car_id in seen_ids: 80 | continue 81 | seen_ids.append(car_id) 82 | chips = [] 83 | for key, val in instantiated_class.chips.items(): 84 | if val.car_id == car_id: 85 | chips.append(val) 86 | 87 | chips.sort() 88 | test_chips = sorted(instantiated_class.get_all_chips_by_car_id(car_id)) 89 | assert chips == test_chips 90 | 91 | 92 | def test_get_all_chips_by_cam_id(str_sa): 93 | """ Test StrDataset.get_all_chips_by_cam_id() """ 94 | instantiated_class = str_sa[0] 95 | FILE_NAMES = str_sa[1] 96 | 97 | seen_ids = [] 98 | for filepath, car_id, cam_id, time, misc in FILE_NAMES: 99 | # Generate all the chips by hand, and compare 100 | if cam_id in seen_ids: 101 | continue 102 | seen_ids.append(cam_id) 103 | chips = [] 104 | for key, val in instantiated_class.chips.items(): 105 | if val.cam_id == cam_id: 106 | chips.append(val) 107 | 108 | chips.sort() 109 | test_chips = sorted(instantiated_class.get_all_chips_by_cam_id(cam_id)) 110 | assert chips == test_chips 111 | 112 | 113 | def test_get_distinct_cams_by_car_id(str_sa): 114 | """ Test StrDataset.get_distinct_cams_by_car_id() and get_distinct_cams_per_car """ 115 | instantiated_class = str_sa[0] 116 | CAR_ID = 1 117 | TEST_CAMS = [2] 118 | for test_cam, cam in zip(TEST_CAMS, sorted(instantiated_class.get_distinct_cams_by_car_id(CAR_ID))): 119 | assert test_cam == cam 120 | 121 | def test_get_all_cam_ids(str_sa): 122 | """ Test StrDataset.get_distinct_cams_by_car_id() """ 123 | instantiated_class = str_sa[0] 124 | TEST_CAMS = [1, 2] 125 | for test_cam, cam in zip(TEST_CAMS, sorted(instantiated_class.get_all_cam_ids())): 126 | assert test_cam == cam 127 | 128 | def test_get_all_car_ids(str_sa): 129 | """ Test StrDataset.get_distinct_cams_by_car_id() """ 130 | instantiated_class = str_sa[0] 131 | TEST_CARS = [1, 10] 132 | for test_car, car in zip (TEST_CARS, sorted(instantiated_class.get_all_car_ids())): 133 | assert test_car == car 134 | 135 | 136 | def test_str_sa_iter(str_sa): 137 | """ Test StrDataset.__iter__() """ 138 | instantiated_class = str_sa[0] 139 | FILE_NAMES = str_sa[1] 140 | chip_ids = tuple(i for i, _, _, _, _ in FILE_NAMES) 141 | 142 | for chip in instantiated_class: 143 | assert chip.filepath in chip_ids 144 | 145 | 146 | def test_int_from_string(): 147 | """ Test int_from_string() """ 148 | TEST_STRINGS = ( 149 | # String, Args, Answer 150 | ("test_010_test", ("test_", 3), 10), 151 | ("test_010_test", ("FAIL_", 3), None), 152 | ("test_010", ("test_", 3), 10), 153 | ("test_11_test", ("test_", 2), 11), 154 | ("010_test", ("", 3), 10), 155 | ("/foo/bar/bass/test_/test_010_test", ("test_", 3), 10), 156 | ) 157 | 158 | for test_string, args, answer in TEST_STRINGS: 159 | assert answer == int_from_string(test_string, args[0], args[1]) 160 | 161 | 162 | def test_get_sa_cam_id(): 163 | """ Test get_sa_cam_id() """ 164 | TEST_STRINGS = ( 165 | # String, Answer 166 | ("match00001_cam02.png", 2), 167 | ("match00001_cam01_mask.png", 1), 168 | ("match00010_cam01.png", 1), 169 | ("match00011_cam02_mask.png", 2), 170 | ) 171 | 172 | for test_string, answer in TEST_STRINGS: 173 | assert answer == get_sa_cam_id(test_string) 174 | 175 | 176 | def test_get_sa_car_id(): 177 | """ Test get_sa_car_id() """ 178 | TEST_STRINGS = ( 179 | # String, Answer 180 | ("match00001_cam02.png", 1), 181 | ("match00001_cam01_mask.png", 1), 182 | ("match00010_cam01.png", 10), 183 | ("match00011_cam02_mask.png", 11), 184 | ) 185 | 186 | for test_string, answer in TEST_STRINGS: 187 | assert answer == get_sa_car_id(test_string) 188 | --------------------------------------------------------------------------------