├── .coveragerc
├── .gitignore
├── .pre-commit-config.yaml
├── AUTHORS
├── CHANGELOG.md
├── LICENSE
├── README.md
├── VERSION
├── circle.yml
├── docker
    ├── Dockerfile.base
    ├── Dockerfile.csv2json
    ├── Dockerfile.images2vecs
    ├── Dockerfile.notebook
    ├── Dockerfile.rankDirectories
    ├── Dockerfile.test
    ├── Dockerfile.vectorSiamese
    └── pelops_start.sh
├── docs
    └── chips_to_features.md
├── etl
    ├── compareDirectory2Directory.py
    ├── convertCsvToJson.py
    ├── makeFeaturesResNet50.py
    ├── makeFeaturesTopSiamese.py
    └── makeFeaturesYOURMODEL.py
├── maintainers.md
├── makefile
├── misc
    ├── pelops.png
    └── pelops.svg
├── pelops
    ├── __init__.py
    ├── analysis
    │   ├── CMC_Confidence.ipynb
    │   ├── CameraVsCamera.ipynb
    │   ├── MakeChips.ipynb
    │   ├── SVMBinaryCarMatch.ipynb
    │   ├── ScoreChips.ipynb
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── camerautil.py
    │   ├── colormakemodel_dataset_maker.ipynb
    │   ├── comparecameras.py
    │   ├── isFileImage.ipynb
    │   ├── labelImageCars.ipynb
    │   ├── makeCMCplots.ipynb
    │   ├── makeFeatureFiles-TEST.ipynb
    │   ├── makeFeatureFiles.ipynb
    │   ├── makeFeaturesResNet50.ipynb
    │   ├── makeSiameseCMC.ipynb
    │   ├── makeVeri.py
    │   ├── recomputeCorpus.ipynb
    │   ├── saveExtractFeatsFromChips.ipynb
    │   ├── siamese.ipynb
    │   ├── siameseModelIterator-15.py
    │   ├── siameseModelIterator.ipynb
    │   ├── splitDataset.ipynb
    │   ├── test_analysis.py
    │   └── unsorted
    │   │   ├── __init__.py
    │   │   ├── makeH5pyFile.ipynb
    │   │   └── recompute
    │   │       ├── __init__.py
    │   │       ├── compute.py
    │   │       └── extract_feats_from_chips.py
    ├── const.py
    ├── datasets
    │   ├── __init__.py
    │   ├── chip.py
    │   ├── chipper.py
    │   ├── compcar.py
    │   ├── dgcars.py
    │   ├── featuredataset.py
    │   ├── slice.py
    │   ├── str.py
    │   └── veri.py
    ├── etl
    │   ├── __init__.py
    │   ├── computeMatrixCMC.py
    │   ├── json2h5.py
    │   ├── makeDistMatrix.py
    │   ├── makeFeaturesResNet50.py
    │   └── veriFileList2Json.py
    ├── experiment_api
    │   ├── __init__.py
    │   ├── experiment.py
    │   ├── metric.py
    │   └── run_metric.sh
    ├── features
    │   ├── feature_producer.py
    │   ├── hog.py
    │   ├── keras_model.py
    │   └── resnet50.py
    ├── models
    │   ├── __init__.py
    │   └── makesvm.py
    ├── training
    │   ├── CNN Retrainer.ipynb
    │   ├── Debug CNN Retrainer.ipynb
    │   ├── cnn_retrainer.py
    │   └── utils.py
    ├── transform_img
    │   ├── __init__.py
    │   ├── run.sh
    │   └── transform.py
    └── utils.py
├── requirements.txt
├── setup.py
└── testci
    ├── install.sh
    ├── small.hdf5
    ├── small.json
    ├── test_chip.py
    ├── test_chipper.py
    ├── test_compcar.py
    ├── test_dgcars.py
    ├── test_experiment_utils.py
    ├── test_featuredataset.py
    ├── test_featureproducer.py
    ├── test_hog_feature.py
    ├── test_keras_load_model.py
    ├── test_keras_model_feature.py
    ├── test_resnet50_feature.py
    ├── test_slice.py
    ├── test_str.py
    ├── test_training_utils.py
    └── test_veri.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | 
 4 | [report]
 5 | # Regexes for lines to exclude from consideration
 6 | exclude_lines =
 7 |     # Have to re-enable the standard pragma
 8 |     pragma: no cover
 9 | 
10 |     # Don't complain about missing debug-only code:
11 |     def __repr__
12 |     if self\.debug
13 | 
14 |     # Don't complain if tests don't hit defensive assertion code:
15 |     raise AssertionError
16 |     raise NotImplementedError
17 | 
18 |     # Don't complain if non-runnable code isn't run:
19 |     if 0:
20 |     if __name__ == .__main__.:
21 | 
22 | ignore_errors = True
23 | 
24 | [html]
25 | directory = coverage_html_report


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | venv/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | 
56 | # Sphinx documentation
57 | docs/_build/
58 | 
59 | # PyBuilder
60 | target/
61 | 
62 | #PyCharm
63 | .idea
64 | 
65 | #Ipython Notebook
66 | .ipynb_checkpoints
67 | 
68 | # swap files
69 | *.swp
70 | 
71 | # OSX crap
72 | .DS_Store
73 | 
74 | # pickled models
75 | **/*.pickle
76 | 
77 | #other crap
78 | **/.ropeproject
79 | checkscript.sh
80 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | -   repo: https://github.com/pre-commit/pre-commit
 2 |     sha: v0.9.4
 3 |     hooks:
 4 |     -   id: validate_config
 5 | -   repo: git@github.com:pre-commit/pre-commit-hooks
 6 |     sha: v0.6.1
 7 |     hooks:
 8 |     -   id: autopep8-wrapper
 9 |     -   id: check-case-conflict
10 |     -   id: check-json
11 |     -   id: check-merge-conflict
12 |     -   id: check-symlinks
13 |     -   id: check-yaml
14 |     -   id: end-of-file-fixer
15 |     -   id: pretty-format-json
16 |         args:
17 |         - --autofix
18 |     -   id: trailing-whitespace
19 | -   repo: git@github.com:asottile/reorder_python_imports
20 |     sha: v0.3.0
21 |     hooks:
22 |     -   id: reorder-python-imports
23 | -   repo: git@github.com:Lab41/verboten_words.git
24 |     sha: v1.0.0
25 |     hooks:
26 |     -   id: verboten-words
27 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | # This file lists all individuals having contributed content to the repository.
2 | # If you're submitting a patch, please add your name here in alphabetical order as part of the patch.
3 | #
4 | # For a list of active project maintainers, see the MAINTAINERS file.
5 | #
6 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 0.1.0-dev (current, unreleased)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pelops
 2 | 
 3 | [![CircleCI](https://circleci.com/gh/Lab41/pelops.svg?style=svg)](https://circleci.com/gh/Lab41/pelops)[![codecov](https://codecov.io/gh/Lab41/pelops/branch/master/graph/badge.svg)](https://codecov.io/gh/Lab41/pelops)
 4 | 
 5 | <!-- Need to set width, which can't be done with MarkDown on Github -->
 6 | <img src="/misc/pelops.png" alt="Pelops Logo" width="200"/>
 7 | 
 8 | Pelops is a project by [Lab41](http://www.lab41.org/) that uses deep learning
 9 | based methods to automatically identify cars by using their large scale
10 | features—color, shape, light configuration, etc.
11 | 
12 | ## Install Instructions
13 | 
14 | Pelops provides several Docker containers the assist in running the project.
15 | You can build them by checking out the code and running make:
16 | 
17 | ```bash
18 | git clone https://github.com/Lab41/pelops.git
19 | cd pelops
20 | make
21 | ```
22 | 
23 | Then:
24 | 
25 | ```bash
26 | make notebook
27 | ```
28 | 
29 | Which will run a container containing Pelops and a notebook server.
30 | 
31 | Otherwise you can install Pelops using `pip`:
32 | 
33 | ```bash
34 | git clone https://github.com/Lab41/pelops.git
35 | pip install pelops
36 | ```
37 | 
38 | There are several dependencies that will need to be installed. The
39 | [`requirements.txt`](requirements.txt) should include most of them, but other
40 | programs such as [keras](https://keras.io/) and
41 | [Tensorflow](https://www.tensorflow.org/) are also required. For this reason
42 | it is suggested to use the notebook container to run Pelops.
43 | 
44 | ## Documentation
45 | 
46 | - [Turning Chips into features](docs/chips_to_features.md)
47 | 
48 | ## Tests
49 | 
50 | Tests are currently written in [pytest](https://docs.pytest.org/en/latest/). The tests are automatically run when submitting pull requests.
51 | 
52 | You can run the tests in a container by calling:
53 | 
54 | ```bash
55 | make test
56 | ```
57 | 
58 | This will build a docker container, mount your local version of the code, and
59 | run the tests.
60 | 
61 | ## Contributing to Pelops
62 | 
63 | Want to contribute?  Awesome!
64 | 
65 | Please make sure you have [`pre-commit`](http://pre-commit.com/) installed so
66 | that your code is checked for various issues.
67 | 
68 | After that, send us a pull request! We're happy to review them!
69 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | v0.1.0-dev
2 | 


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
 1 | # Controls the build environment
 2 | machine:
 3 |   python:
 4 |     version: 3.5.2
 5 |   environment:
 6 |     PATH: /home/ubuntu/miniconda3/bin:$PATH
 7 | 
 8 | dependencies:
 9 |   override:
10 |     # Moving to nilearn directory before performing the installation.
11 |     - cd ~/pelops
12 |     - source testci/install.sh:
13 |         environment:
14 |             DISTRIB: "conda"
15 |             PYTHON_VERSION: "3.5"
16 |             NUMPY_VERSION: "*"
17 |             SCIPY_VERSION: "*"
18 |             SCIKIT_LEARN_VERSION: "*"
19 |             MATPLOTLIB_VERSION: "*"
20 |     - conda install -y opencv hdfs3
21 |     - conda install -y pytest pytest-cov pillow h5py scipy scikit-image
22 |     - /home/ubuntu/miniconda3/bin/pip install imageio
23 |     - /home/ubuntu/miniconda3/bin/pip install tensorflow==0.12.* git+git://github.com/fchollet/keras.git@2ad3544b017fe9c0d7a25ef0640baa52281372b5
24 | 
25 | # Set up the commands to run as a test (override), as well as the commands to
26 | # run before (pre) and after (post).
27 | test:
28 |   pre:
29 |     - mkdir -p $CIRCLE_TEST_REPORTS/junit/
30 |   override:
31 |     # Test installation via pip
32 |     - cd /home/ubuntu/pelops && /home/ubuntu/miniconda3/bin/pip install .
33 |     # Test importing installed package
34 |     - /home/ubuntu/miniconda3/bin/python3 -c "import pelops; import pelops.datasets"
35 |     # Run pytest tests
36 |     - cd /home/ubuntu/pelops && /home/ubuntu/miniconda3/bin/python3 -m pytest -v --cov --cov-report=term-missing:skip-covered --junitxml=$CIRCLE_TEST_REPORTS/junit/junit_output.xml
37 |   post:
38 |     - bash <(curl -s https://codecov.io/bash) -t 08234947-61d0-48ea-b0f0-1c82d3f2dfd7
39 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.base:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
 2 | 
 3 | MAINTAINER Lab41 <info@lab41.org>
 4 | 
 5 | RUN apt-get update && \
 6 |     apt-get install -y \
 7 |     bzip2 \
 8 |     ca-certificates \
 9 |     git \
10 |     libglib2.0-0 \
11 |     libsm6 \
12 |     libxext6 \
13 |     libxrender1 \
14 |     wget
15 | 
16 | #Configure environment
17 | ENV CONDA_DIR=/opt/conda \
18 |     # 4.2.12 is the last version with Python3.5, which we need
19 |     MINICONDA_SCRIPT=Miniconda3-4.2.12-Linux-x86_64.sh \
20 |     MINICONDA_SHA=c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a
21 | 
22 | # Install conda
23 | RUN cd /tmp && \
24 |     mkdir -p $CONDA_DIR && \
25 |     wget --quiet https://repo.continuum.io/miniconda/${MINICONDA_SCRIPT} && \
26 |     echo "${MINICONDA_SHA} ${MINICONDA_SCRIPT}" | sha256sum -c - && \
27 |     /bin/bash ${MINICONDA_SCRIPT} -f -b -p $CONDA_DIR && \
28 |     rm ${MINICONDA_SCRIPT}
29 | 
30 | RUN $CONDA_DIR/bin/conda install --quiet --yes \
31 |     'conda-build=2.1.*' \
32 |     'cython=0.24*' \
33 |     'h5py=2.6*' \
34 |     'hdfs3=0.1.*' \
35 |     'libhdfs3=2.2.*' \
36 |     'numpy=1.11*' \
37 |     'pillow=3.4*' \
38 |     'pytest=3.0.*' \
39 |     'python=3.5.*' \
40 |     'scikit-image=0.12*' \
41 |     'scikit-learn=0.18*' \
42 |     && $CONDA_DIR/bin/conda clean -tipsy
43 | 
44 | RUN $CONDA_DIR/bin/conda update pip --quiet --yes
45 | 
46 | # Install Python packages
47 | ENV TENSORFLOW_VERSION=0.12.* \
48 |     KERAS_VERSION=2ad3544b017fe9c0d7a25ef0640baa52281372b5
49 | RUN $CONDA_DIR/bin/pip install git+git://github.com/fchollet/keras.git@${KERAS_VERSION} \
50 |     tensorflow==${TENSORFLOW_VERSION} \
51 |     imageio
52 | 
53 | ENV INDOCKERCONTAINER 1
54 | 
55 | ADD . /pelops_root
56 | WORKDIR /pelops_root
57 | ENV PYTHONPATH=/pelops_root/pelops:$PYTHONPATH \
58 |     PATH=/usr/local/cuda/bin:/usr/local/nvidia/bin:$CONDA_DIR/bin:$PATH
59 | 
60 | # install dependencies of plugins for pelops
61 | RUN for file in $(find . -name "requirements.txt"); \
62 |     do \
63 |         $CONDA_DIR/bin/pip install -r $file; \
64 |     done
65 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.csv2json:
--------------------------------------------------------------------------------
 1 | FROM continuumio/anaconda3:4.3.1
 2 | 
 3 | MAINTAINER Lab41 <info@lab41.org>
 4 | 
 5 | RUN mkdir -p /pelops_root
 6 | WORKDIR /pelops_root
 7 | COPY . .
 8 | RUN pip install --no-cache-dir -r requirements.txt
 9 | 
10 | CMD python3 -m etl.convertCsvToJson


--------------------------------------------------------------------------------
/docker/Dockerfile.images2vecs:
--------------------------------------------------------------------------------
 1 | FROM l41-pelops-base
 2 | 
 3 | MAINTAINER Lab41 <info@lab41.org>
 4 | 
 5 | RUN mkdir INPUT_DIR
 6 | RUN mkdir OUTPUT_DIR
 7 | RUN mkdir MODEL_DIR
 8 | 
 9 | CMD ["python", "/pelops_root/etl/makeFeaturesYOURMODEL.py","./INPUT_DIR","./OUTPUT_DIR"]
10 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.notebook:
--------------------------------------------------------------------------------
 1 | FROM l41-pelops-base
 2 | 
 3 | MAINTAINER Lab41 <info@lab41.org>
 4 | 
 5 | # The startup script installs Pelops with pip from this directory
 6 | RUN mkdir /pelops
 7 | WORKDIR /pelops
 8 | 
 9 | # Run a notebook
10 | EXPOSE 8888
11 | 
12 | # Install Jupyter notebook
13 | RUN conda install --quiet --yes \
14 |     'notebook=4.1*' \
15 |     && conda clean -tipsy
16 | 
17 | ADD pelops_start.sh /
18 | 
19 | CMD ["/pelops_start.sh"]
20 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.rankDirectories:
--------------------------------------------------------------------------------
 1 | FROM l41-pelops-base
 2 | 
 3 | MAINTAINER Lab41 <info@lab41.org>
 4 | 
 5 | 
 6 | RUN mkdir INPUT_DIR1
 7 | RUN mkdir INPUT_DIR2
 8 | RUN mkdir MODEL_DIR
 9 | RUN mkdir OUTPUT_DIR
10 | 
11 | CMD ["python", "/pelops_root/etl/compareDirectory2Directory.py","./INPUT_DIR1","./INPUT_DIR2","./OUTPUT_DIR"]
12 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.test:
--------------------------------------------------------------------------------
1 | FROM l41-pelops-base
2 | 
3 | MAINTAINER Lab41 <info@lab41.org>
4 | 
5 | # Run the tests
6 | CMD ["python", "-m","pytest","-v","-s"]
7 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.vectorSiamese:
--------------------------------------------------------------------------------
 1 | FROM l41-pelops-base
 2 | 
 3 | MAINTAINER Lab41 <info@lab41.org>
 4 | 
 5 | RUN mkdir INPUT_DIR1
 6 | RUN mkdir INPUT_DIR2
 7 | RUN mkdir MODEL_DIR
 8 | RUN mkdir OUTPUT_DIR
 9 | 
10 | CMD ["python", "/pelops_root/etl/makeFeaturesTopSiamese.py","./INPUT_DIR1","./INPUT_DIR2","./OUTPUT_DIR"]
11 | 


--------------------------------------------------------------------------------
/docker/pelops_start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2014, IPython: interactive computing in Python
 4 | # All rights reserved.
 5 | #
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | #
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | # Strict mode
28 | set -euo pipefail
29 | 
30 | # Clone the repo
31 | git clone https://github.com/lab41/pelops /pelops
32 | pip install /pelops
33 | 
34 | # Launch the notebook
35 | jupyter notebook --no-browser --port 8888 --ip=* --NotebookApp.token=
36 | 


--------------------------------------------------------------------------------
/docs/chips_to_features.md:
--------------------------------------------------------------------------------
 1 | # Turning Chips to Features
 2 | 
 3 | 1. build the docker containers using make:
 4 | 
 5 | ```bash
 6 | make
 7 | ```
 8 | 
 9 | 2. map folders with images and and output directory, and run:
10 | 
11 | ```bash
12 | CHIPDIR1=/folder/with/chips && \
13 | OUTPUTDIR=/folder/for/output && \
14 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR l41-pelops-i2v
15 | ```
16 | 
17 | Note: Docker creates output files owned by root. Grant write privileges to OUTPUT_DIR for the current user and add `-u $(id -u $USER)` to the docker run command above to create output files owned by the current user.
18 | 
19 | 3. Advanced, bring your own model:
20 | 
21 | ```bash
22 | CHIPDIR1=/folder/with/chips && \
23 | OUTPUTDIR=/folder/for/output && \
24 | MODELDIR=/folder/with/models && \
25 | MODELFILE=name_of_model_file && \
26 | WEIGHTFILE=name_of_weight_file && \
27 | LAYERNAME=layername && \
28 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e LAYER="${LAYERNAME}" l41-pelops-i2v
29 | ```
30 | 
31 | Run the Siamese model as follows:
32 | 
33 | ```bash
34 | CHIPDIR1=/folder/with/chips && \
35 | CHIPDIR2=/folder/with/other/chips && \
36 | OUTPUTDIR=/folder/for/output && \
37 | MODELDIR=/folder/with/models && \
38 | MODELFILE=name_of_model_file.json && \
39 | WEIGHTFILE=name_of_weight_file.hdf5 && \
40 | VECTORFILE=name_of_VECTOR_file.json && \
41 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR1 -v ${CHIPDIR2}:/pelops_root/INPUT_DIR2 -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e VECTORS="/pelops_root/INPUT_DIR1/${VECTORFILE}" l41-pelops-siamese
42 | ```
43 | 
44 | Run the Ranker to compare two directories as follows:
45 | 
46 | ```bash
47 | CHIPDIR1=/folder/with/chips && \
48 | CHIPDIR2=/folder/with/other/chips && \
49 | OUTPUTDIR=/folder/for/output && \
50 | MODELDIR=/folder/with/models && \
51 | MODELFILE=name_of_model_file.json && \
52 | WEIGHTFILE=name_of_weight_file.hdf5 && \
53 | LAYERNAME=layername && \
54 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR1 -v ${CHIPDIR2}:/pelops_root/INPUT_DIR2 -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e LAYER="${LAYERNAME}" l41-pelops-ranker
55 | ```
56 | 
57 | Note: Docker creates output files owned by root. Grant write privileges to OUTPUT_DIR for the current user and add `-u $(id -u $USER)` to the docker run commands above to create output files owned by the current user.
58 | 
59 | Run the CSV to JSON docker conversion operations as follows:
60 | 
61 | ```bash
62 | CSV1=/path/to/file1.csv && \
63 | CSV2=/path/to/file2.csv && \
64 | MODE=product && \
65 | JSON=/path/to/output.json && \
66 | docker run -e pelops_csv_1="${CSV1}" -e pelops_csv_2="${CSV2}" -e pelops_csv_mode=${MODE} -e pelops_json="${JSON}" l41-pelops-c2j
67 | ```
68 | 


--------------------------------------------------------------------------------
/etl/compareDirectory2Directory.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import json
  3 | import os
  4 | import sys
  5 | import time
  6 | 
  7 | import numpy as np
  8 | import scipy.spatial.distance
  9 | from keras.applications.resnet50 import preprocess_input
 10 | from keras.models import Model
 11 | from keras.models import model_from_json
 12 | from keras.preprocessing import image
 13 | 
 14 | 
 15 | def load_image(img_path):
 16 |     data = image.load_img(img_path, target_size=(224, 224))
 17 |     x = image.img_to_array(data)
 18 |     x = np.expand_dims(x, axis=0)
 19 |     x = preprocess_input(x)
 20 |     return x
 21 | 
 22 | 
 23 | def load_model_workaround(model_file, weight_file):
 24 |     # load json and create model
 25 |     json_file = open(model_file, 'r')
 26 |     loaded_model_json = json_file.read()
 27 |     json_file.close()
 28 |     loaded_model = model_from_json(loaded_model_json)
 29 |     # load weights into new model
 30 |     loaded_model.load_weights(weight_file)
 31 |     return loaded_model
 32 | 
 33 | 
 34 | def get_models(model=None, weights=None):
 35 |     model = load_model_workaround(model, weights)
 36 |     return model
 37 | 
 38 | 
 39 | def image_features(left, right, model):
 40 |     predictions = model.predict([left, right])
 41 |     return predictions
 42 | 
 43 | 
 44 | def find_images(topdir):
 45 |     retval = []
 46 |     exten = ['jpg', 'bmp', 'png']
 47 |     images = 'images'
 48 | 
 49 |     for dirpath, dirnames, files in os.walk(topdir):
 50 |         for name in files:
 51 |             if name.lower().split('.')[-1] in exten:
 52 |                 if dirpath.lower().find(images):
 53 |                     retval.append(os.path.join(dirpath, name))
 54 |     return retval
 55 | 
 56 | 
 57 | def write_data(vector_file, limage_file, rimage_file, feature):
 58 |     list_feature = feature.flatten().tolist()
 59 |     str_feature = ','.join(str(j) for j in list_feature)
 60 |     outdata = '{0},{1},{2}\n'.format(limage_file, rimage_file, str_feature)
 61 |     vector_file.write(outdata)
 62 |     vector_file.flush()
 63 | 
 64 | 
 65 | def main(argv=None):
 66 |     if argv is None:
 67 |         argv = sys.argv
 68 |     image_dir_l = argv[1]
 69 |     image_dir_r = argv[2]
 70 |     vector_dir = argv[3]
 71 | 
 72 |     model_file = os.environ.get('MODEL', None)
 73 |     weights_file = os.environ.get('WEIGHTS', None)
 74 |     layer = os.environ.get('LAYER', None)
 75 | 
 76 |     vector_file_name = os.path.join(
 77 |         vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
 78 | 
 79 |     vector_file = open(vector_file_name, 'w')
 80 | 
 81 |     images_left = find_images(image_dir_l)
 82 |     images_right = find_images(image_dir_r)
 83 | 
 84 |     model = get_models(model_file, weights_file)
 85 | 
 86 |     for limage_file in images_left:
 87 |         for rimage_file in images_right:
 88 | 
 89 |             l_img = load_image(limage_file)
 90 |             r_img = load_image(rimage_file)
 91 | 
 92 |             feature = image_features(l_img, r_img, model)
 93 | 
 94 |             write_data(vector_file, limage_file, rimage_file, feature)
 95 | 
 96 |     vector_file.close()
 97 | 
 98 | if __name__ == "__main__":
 99 |     sys.exit(main())
100 | 


--------------------------------------------------------------------------------
/etl/convertCsvToJson.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Conversion script for image2vecs feature vector csvs to siamese json
 3 | 
 4 | Environment Variables:
 5 | - pelops_csv_*: one or more file paths to csvs for conversion
 6 | - pelops_csv_mode:
 7 |   - 'product': Combine using the cartesian product of the records from 2x csvs [default]
 8 |   - 'combo': Combine using pair-wise combinations of records for each csv (1 or more)
 9 | - pelops_json: Path to output json file
10 | """
11 | 
12 | import os
13 | import sys
14 | import traceback
15 | from pelops.utils import prep_for_siamese
16 | 
17 | if __name__ == '__main__':
18 |     csv_files = [v for k, v in os.environ.items() if k.startswith('pelops_csv') and os.path.isfile(v)]
19 | 
20 |     if len(csv_files) == 0:
21 |         print("No CSV files were provided for conversion")
22 |         sys.exit(-1)
23 |     print("Converting {} csv files:\n\t - {}".format(len(csv_files), '\n\t - '.join(csv_files)))
24 | 
25 |     mode = os.getenv('pelops_csv_mode', 'product')
26 |     print("Mode: {}".format(mode))
27 | 
28 |     out_json = os.getenv('pelops_json', None)
29 |     if out_json is None:
30 |         print("Output json file path was not specified")
31 |     print("Json: {}".format(out_json))
32 | 
33 |     try:
34 |         prep_for_siamese(*csv_files, json_file=out_json, full_combos=(mode != 'product'))
35 |         print("Conversion success")
36 |     except:
37 |         print("Conversion error occurred:\n{}".format(traceback.format_exc()))
38 | 


--------------------------------------------------------------------------------
/etl/makeFeaturesResNet50.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import json
 3 | import os
 4 | import sys
 5 | import time
 6 | 
 7 | import numpy as np
 8 | import scipy.spatial.distance
 9 | from keras.applications.resnet50 import ResNet50, preprocess_input
10 | from keras.models import Model
11 | from keras.preprocessing import image
12 | 
13 | 
14 | def load_image(img_path):
15 |     data = image.load_img(img_path, target_size=(224, 224))
16 |     x = image.img_to_array(data)
17 |     x = np.expand_dims(x, axis=0)
18 |     x = preprocess_input(x)
19 |     return x
20 | 
21 | 
22 | def get_models():
23 |     # include_top needs to be True for this to work
24 |     base_model = ResNet50(weights='imagenet', include_top=True)
25 |     model = Model(input=base_model.input,
26 |                   output=base_model.get_layer('flatten_1').output)
27 |     return (model, base_model)
28 | 
29 | 
30 | def image_features(img, model):
31 |     features = np.zeros((1, 2048), dtype=np.float16)
32 |     predictions = model.predict(img)
33 |     return predictions
34 | 
35 | 
36 | def find_images(topdir):
37 |     retval = []
38 |     exten = ['jpg', 'bmp', 'png']
39 |     images = 'images'
40 | 
41 |     for dirpath, dirnames, files in os.walk(topdir):
42 |         for name in files:
43 |             if name.lower().split('.')[-1] in exten:
44 |                 if dirpath.lower().find(images):
45 |                     retval.append(os.path.join(dirpath, name))
46 |     return retval
47 | 
48 | 
49 | def write_data(vector_file, image_file, feature):
50 |     list_feature = feature.flatten().tolist()
51 |     str_feature = ','.join(str(j) for j in list_feature)
52 |     outdata = '{0},{1}\n'.format(image_file, str_feature)
53 |     vector_file.write(outdata)
54 |     vector_file.flush()
55 | 
56 | 
57 | def main(argv=None):
58 |     if argv is None:
59 |         argv = sys.argv
60 |     image_dir = argv[1]
61 |     vector_dir = argv[2]
62 |     vector_file_name = os.path.join(
63 |         vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
64 |     vector_file = open(vector_file_name, 'w')
65 | 
66 |     images = find_images(image_dir)
67 | 
68 |     model, base_model = get_models()
69 | 
70 |     for image_file in images:
71 |         img = load_image(image_file)
72 |         feature = image_features(img, model)
73 |         write_data(vector_file, image_file, feature)
74 |         print('processed {0}'.format(image_file))
75 | 
76 |     vector_file.close()
77 | 
78 | if __name__ == "__main__":
79 |     sys.exit(main())
80 | 


--------------------------------------------------------------------------------
/etl/makeFeaturesTopSiamese.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | import time
  5 | 
  6 | import keras.backend.tensorflow_backend as KTF
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | from keras import backend as K
 10 | from keras.applications.resnet50 import preprocess_input
 11 | from keras.applications.resnet50 import ResNet50
 12 | from keras.callbacks import EarlyStopping
 13 | from keras.callbacks import ModelCheckpoint
 14 | from keras.callbacks import ReduceLROnPlateau
 15 | from keras.callbacks import TensorBoard
 16 | from keras.layers import Dense
 17 | from keras.layers import GlobalAveragePooling2D
 18 | from keras.layers import Input
 19 | from keras.layers import Lambda
 20 | from keras.layers import merge
 21 | from keras.layers.normalization import BatchNormalization
 22 | from keras.models import load_model
 23 | from keras.models import Model
 24 | from keras.models import model_from_json
 25 | from keras.optimizers import RMSprop
 26 | from keras.preprocessing import image
 27 | from keras.utils.np_utils import to_categorical
 28 | 
 29 | 
 30 | def just_the_top(num_training_classes, model_file, weights_file):
 31 | 
 32 |     def load_model_workaround(model_file, weight_file):
 33 |         # load json and create model
 34 |         json_file = open(model_file, 'r')
 35 |         loaded_model_json = json_file.read()
 36 |         json_file.close()
 37 |         loaded_model = model_from_json(loaded_model_json)
 38 |         # load weights into new model
 39 |         loaded_model.load_weights(weight_file)
 40 |         return loaded_model
 41 | 
 42 |     def s_distance(vects):
 43 |         """
 44 |         return the abs difference between vectors
 45 |         """
 46 |         x, y = vects
 47 |         s = K.abs(x - y)
 48 |         return s
 49 | 
 50 |     def s_shape(shapes):
 51 |         """
 52 |         return the sape of the vector being used
 53 |         """
 54 |         shape = list(shapes)
 55 |         outshape = (shape[0])
 56 |         return tuple(outshape)
 57 | 
 58 |     original_model = load_model_workaround(model_file, weights_file)
 59 |     d1 = original_model.get_layer('dense_1')
 60 |     d1_len = d1_len = d1.get_output_shape_for(d1.get_input_shape_at(0))[1]
 61 |     d2 = original_model.get_layer('dense_2')
 62 |     b1 = original_model.get_layer('batchnormalization_1')
 63 | 
 64 |     input_left = Input(shape=(1, 1, 2048))
 65 |     input_right = Input(shape=(1, 1, 2048))
 66 | 
 67 |     # use a distance measure for making the join
 68 |     siamese_join = Lambda(s_distance,
 69 |                           output_shape=s_shape)([input_left, input_right])
 70 |     my_layer = GlobalAveragePooling2D()(siamese_join)
 71 |     my_d1 = Dense(d1_len, activation='relu')(my_layer)
 72 |     bn = BatchNormalization()(my_d1)
 73 |     predictions = Dense(num_training_classes, activation='sigmoid')(bn)
 74 |     model = Model([input_left, input_right], output=predictions)
 75 | 
 76 |     print(model.summary())
 77 |     model.get_layer('dense_1').set_weights(d1.get_weights())
 78 |     model.get_layer('dense_2').set_weights(d2.get_weights())
 79 |     model.get_layer('batchnormalization_1').set_weights(b1.get_weights())
 80 | 
 81 |     return model
 82 | 
 83 | 
 84 | def write_data(vector_file, index, feature):
 85 |     list_feature = feature.flatten().tolist()
 86 |     str_feature = ','.join(str(j) for j in list_feature)
 87 |     outdata = '{0}|{1}\n'.format(index, str_feature)
 88 |     vector_file.write(outdata)
 89 |     vector_file.flush()
 90 | 
 91 | 
 92 | def make_top():
 93 |     a = np.ones((1, 1, 1, 2048))
 94 |     top = just_the_top(3,
 95 |                        '/pelops_root/MODEL_DIR/VeRi-siamese-weekend.model.json',
 96 |                        '/pelops_root/MODEL_DIR/VeRi-siamese-weekend.weights.hdf5')
 97 |     print('*********** test **********')
 98 |     print(top.predict([a, a])[0])
 99 |     # Out[8]: array([[ 0.98460394,  0.99653435,  0.99870515]], dtype=float32)
100 |     print('*********** test **********')
101 |     return top
102 | 
103 | 
104 | def main(argv=None):
105 | 
106 |     #model = make_top()
107 |     # test()
108 | 
109 |     if argv is None:
110 |         argv = sys.argv
111 |     image_dir_l = argv[1]
112 |     image_dir_r = argv[2]
113 |     output_dir = argv[3]
114 | 
115 |     input_file_name = os.environ.get('VECTORS', None)
116 |     model_file = os.environ.get('MODEL', None)
117 |     weights_file = os.environ.get('WEIGHTS', None)
118 | 
119 |     vector_file_name = os.path.join(
120 |         output_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
121 | 
122 |     vector_o_file = open(vector_file_name, 'w')
123 |     vector_i_file = open(input_file_name, 'r')
124 | 
125 |     print(3, model_file, weights_file)
126 |     model = just_the_top(3, model_file, weights_file)
127 | 
128 |     for index, line in enumerate(vector_i_file):
129 |         line = line.strip()
130 |         j_line = json.loads(line)
131 |         left = j_line['left']
132 |         right = j_line['right']
133 |         np_l = np.array(left)
134 |         np_r = np.array(right)
135 |         np_l = np_l.reshape(1, 1, 1, 2048)
136 |         np_r = np_r.reshape(1, 1, 1, 2048)
137 |         data = [np_l, np_r]
138 |         feature = model.predict(data)
139 |         feature = feature[0]
140 |         write_data(vector_o_file, index, feature)
141 | 
142 |     vector_o_file.close()
143 | 
144 | if __name__ == "__main__":
145 |     sys.exit(main())
146 | 


--------------------------------------------------------------------------------
/etl/makeFeaturesYOURMODEL.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import os
  3 | import sys
  4 | import time
  5 | 
  6 | import numpy as np
  7 | from keras.applications.resnet50 import preprocess_input
  8 | from keras.applications.resnet50 import ResNet50
  9 | from keras.models import Model, model_from_json
 10 | from keras.preprocessing import image
 11 | 
 12 | DEFAULT_LAYER_NAME = 'flatten_1'
 13 | 
 14 | 
 15 | def load_image(img_path):
 16 |     data = image.load_img(img_path, target_size=(224, 224))
 17 |     x = image.img_to_array(data)
 18 |     x = np.expand_dims(x, axis=0)
 19 |     x = preprocess_input(x)
 20 |     return x
 21 | 
 22 | 
 23 | def save_model_workaround(model, layer, model_output_file, weights_output_file, layer_output_file):
 24 |     print('saving model   to {}'.format(model_output_file))
 25 |     print('saving weights to {}'.format(weights_output_file))
 26 |     print('saving layer   to {}'.format(layer_output_file))
 27 |     # serialize model to JSON
 28 |     model_json = model.to_json()
 29 |     with open(model_output_file, 'w') as json_file:
 30 |         json_file.write(model_json)
 31 |     # serialize weights to HDF5
 32 |     model.save_weights(weights_output_file)
 33 |     # Write layer name to text
 34 |     with open(layer_output_file, 'w') as lyr_out:
 35 |         lyr_out.write(layer)
 36 | 
 37 | 
 38 | def load_model_workaround(model_file, weight_file):
 39 |     # load json and create model
 40 |     json_file = open(model_file, 'r')
 41 |     loaded_model_json = json_file.read()
 42 |     json_file.close()
 43 |     loaded_model = model_from_json(loaded_model_json)
 44 |     # load weights into new model
 45 |     loaded_model.load_weights(weight_file)
 46 |     return loaded_model
 47 | 
 48 | 
 49 | def get_models(model=None, weights=None, layer=None):
 50 |     # include_top needs to be True for this to work
 51 |     if model is None or weights is None or layer is None:
 52 |         print('MODEL NOT FULLY SPECIFIED, USING RESNET FEATURES')
 53 |         base_model = ResNet50(weights='imagenet', include_top=True)
 54 |         model = Model(input=base_model.input,
 55 |                       output=base_model.get_layer(DEFAULT_LAYER_NAME).output)
 56 |     else:
 57 |         base_model = load_model_workaround(model, weights)
 58 |         base_layer_names = {lyr.name for lyr in base_model.layers}
 59 |         base_is_siamese = all([(name in base_layer_names) for name in ['dense_1', 'dense_2', 'lambda_1']])
 60 | 
 61 |         if base_is_siamese:
 62 |             print('Input model is siamese, extracting resnet.')
 63 |             fresh_resnet = ResNet50(weights='imagenet', include_top=True)
 64 |             fresh_resnet.set_weights(base_model.get_layer('resnet50').get_weights())
 65 |             model = Model(input=fresh_resnet.input,
 66 |                           output=fresh_resnet.get_layer(DEFAULT_LAYER_NAME).output)
 67 |         else:
 68 |             model = Model(input=base_model.input,
 69 |                           output=base_model.get_layer(layer).output)
 70 |     return model
 71 | 
 72 | 
 73 | def image_features(img, model):
 74 |     predictions = model.predict(img)
 75 |     return predictions
 76 | 
 77 | 
 78 | def find_images(topdir):
 79 |     retval = []
 80 |     exten = ['jpg', 'bmp', 'png']
 81 |     images = 'images'
 82 | 
 83 |     for dirpath, dirnames, files in os.walk(topdir):
 84 |         for name in files:
 85 |             if name.lower().split('.')[-1] in exten:
 86 |                 if dirpath.lower().find(images):
 87 |                     retval.append(os.path.join(dirpath, name))
 88 |     return retval
 89 | 
 90 | 
 91 | def write_data(vector_file, image_file, feature):
 92 |     list_feature = feature.flatten().tolist()
 93 |     str_feature = ','.join(str(j) for j in list_feature)
 94 |     outdata = '{0},{1}\n'.format(image_file, str_feature)
 95 |     vector_file.write(outdata)
 96 |     vector_file.flush()
 97 | 
 98 | 
 99 | def main(argv=None):
100 |     if argv is None:
101 |         argv = sys.argv
102 |     image_dir = argv[1]
103 |     vector_dir = argv[2]
104 | 
105 |     model_file = os.environ.get('MODEL', None)
106 |     weights_file = os.environ.get('WEIGHTS', None)
107 |     layer_name = os.environ.get('LAYER', None)
108 | 
109 |     vector_file_name = os.path.join(
110 |         vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
111 |     vector_file = open(vector_file_name, 'w')
112 | 
113 |     images = find_images(image_dir)
114 | 
115 |     model = get_models(model_file, weights_file, layer_name)
116 | 
117 |     # Export model, weights, and layer if not originally supplied by the environment
118 |     if all(map(lambda v: v is None, [model_file, weights_file, layer_name])):
119 |         date_time = time.strftime('%Y%m%d_%H%M%S')
120 |         make_out_file = lambda n: os.path.join(vector_dir, date_time + '.' + n)
121 |         save_model_workaround(model, DEFAULT_LAYER_NAME, make_out_file('model'),
122 |                               make_out_file('weights'), make_out_file('layer'))
123 | 
124 |     for image_file in images:
125 |         img = load_image(image_file)
126 |         feature = image_features(img, model)
127 |         write_data(vector_file, image_file, feature)
128 |         print('processed {0}'.format(image_file))
129 | 
130 |     vector_file.close()
131 | 
132 | if __name__ == "__main__":
133 |     sys.exit(main())
134 | 


--------------------------------------------------------------------------------
/maintainers.md:
--------------------------------------------------------------------------------
1 | Listing of the project Maintainers


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | all: base image2vecs siamese ranker build-notebook csv2json
 2 | 
 3 | # Base requirements for all containers
 4 | base:
 5 | 	docker build -t l41-pelops-base -f docker/Dockerfile.base .
 6 | 
 7 | # Jupyter notebook server
 8 | build-notebook: base
 9 | 	docker build -t l41-pelops-notebook -f docker/Dockerfile.notebook ./docker/
10 | 
11 | notebook: build-notebook
12 | 	docker run -p 8888:8888 -it l41-pelops-notebook
13 | 
14 | # Tests
15 | test: base
16 | 	docker build -t l41-pelops-tests -f docker/Dockerfile.test .
17 | 	docker run l41-pelops-tests
18 | 
19 | # Image processing
20 | image2vecs: base
21 | 	docker build -t l41-pelops-i2v -f docker/Dockerfile.images2vecs .
22 | 
23 | siamese: base
24 | 	docker build -t l41-pelops-siamese -f docker/Dockerfile.vectorSiamese .
25 | 
26 | ranker: base
27 | 	docker build -t l41-pelops-ranker -f docker/Dockerfile.rankDirectories .
28 | 
29 | # Conversion utility
30 | csv2json:
31 | 	docker build -t l41-pelops-c2j -f docker/Dockerfile.csv2json .
32 | 


--------------------------------------------------------------------------------
/misc/pelops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/misc/pelops.png


--------------------------------------------------------------------------------
/pelops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/__init__.py


--------------------------------------------------------------------------------
/pelops/analysis/CMC_Confidence.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "#set some constants\n",
10 |     "DATASETFILE = '/path/to/dataFile'\n",
11 |     "ITEMSPERCAMERA = 10\n",
12 |     "YRANDOM=1024\n",
13 |     "CAMERAS=2\n",
14 |     "DROPPED=0\n",
15 |     "CMC=100\n",
16 |     "EXPERIMENTS=100"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "from pelops.datasets.featuredataset import FeatureDataset\n",
26 |     "from pelops.experiment_api.experiment import ExperimentGenerator\n",
27 |     "from pelops.analysis import analysis\n",
28 |     "\n",
29 |     "\n",
30 |     "#do the math\n",
31 |     "featureData = FeatureDataset(DATASETFILE)\n",
32 |     "expGen = ExperimentGenerator(featureData, CAMERAS, ITEMSPERCAMERA, DROPPED, YRANDOM)\n",
33 |     "experimentHolder = analysis.repeat_pre_cmc(featureData,expGen,NUMCMC=CMC,EXPPERCMC=EXPERIMENTS)\n",
34 |     "stats,gdata = analysis.make_cmc_stats(experimentHolder,ITEMSPERCAMERA)"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "code",
39 |    "execution_count": null,
40 |    "metadata": {},
41 |    "outputs": [],
42 |    "source": [
43 |     "%matplotlib inline\n",
44 |     "import matplotlib.pyplot as plt\n",
45 |     "\n",
46 |     "#make the plots\n",
47 |     "fig = plt.figure()\n",
48 |     "ax = plt.subplot(111)\n",
49 |     "\n",
50 |     "ax.plot(gdata.transpose())\n",
51 |     "plt.title('{} CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
52 |     "ax.legend(('-stddev','avg','+stddev'),bbox_to_anchor=(1, -0.05),\n",
53 |     "          fancybox=True, shadow=True, ncol=5)"
54 |    ]
55 |   }
56 |  ],
57 |  "metadata": {
58 |   "anaconda-cloud": {},
59 |   "kernelspec": {
60 |    "display_name": "Python 3",
61 |    "language": "python",
62 |    "name": "python3"
63 |   },
64 |   "language_info": {
65 |    "codemirror_mode": {
66 |     "name": "ipython",
67 |     "version": 3.0
68 |    },
69 |    "file_extension": ".py",
70 |    "mimetype": "text/x-python",
71 |    "name": "python",
72 |    "nbconvert_exporter": "python",
73 |    "pygments_lexer": "ipython3",
74 |    "version": "3.5.2"
75 |   }
76 |  },
77 |  "nbformat": 4,
78 |  "nbformat_minor": 0
79 | }


--------------------------------------------------------------------------------
/pelops/analysis/CameraVsCamera.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "cd '~/work/pelops'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "#import time\n",
 21 |     "import numpy as np\n",
 22 |     "from pelops.datasets.featuredataset import FeatureDataset\n",
 23 |     "from pelops.experiment_api.experiment import ExperimentGenerator\n",
 24 |     "from pelops.datasets import chip\n",
 25 |     "from pelops.models.makesvm import train_svm\n",
 26 |     "from pelops.analysis.comparecameras import mad_matrix"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "ITEMSPERCAMERA = 2\n",
 36 |     "NUMCAMERAS = 2\n",
 37 |     "TRAIN_RANDOM=1024\n",
 38 |     "DROPPED=0\n",
 39 |     "TRAIN_FEATURES = '/Users/dgrossman/image_NEW_TRAIN'\n",
 40 |     "EXAMPLES = 1000 \n",
 41 |     "\n",
 42 |     "fd_train = FeatureDataset(TRAIN_FEATURES)\n",
 43 |     "eg_train = ExperimentGenerator(fd_train, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TRAIN_RANDOM)\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "clf_train = train_svm(EXAMPLES,fd_train,eg_train)\n"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "g_train,b_train = mad_matrix(EXAMPLES,clf_train,fd_train,eg_train,'cam')"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "ITEMSPERCAMERA = 2\n",
 71 |     "NUMCAMERAS = 2\n",
 72 |     "TEST_RANDOM=1024\n",
 73 |     "DROPPED=0\n",
 74 |     "TEST_FEATURES = '/Users/dgrossman/image_NEW_TEST'\n",
 75 |     "EXAMPLES = 1000\n",
 76 |     "\n",
 77 |     "fd_test = FeatureDataset(TEST_FEATURES)\n",
 78 |     "eg_test = ExperimentGenerator(fd_test,  NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TEST_RANDOM)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "g_test,b_test = mad_matrix(EXAMPLES,clf_train,fd_test,eg_test,'cam')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "allset = set()\n",
 97 |     "for key in g_test.keys():\n",
 98 |     "    l,r = key.split('|')\n",
 99 |     "    allset.add(l)\n",
100 |     "    allset.add(r)\n",
101 |     "for key in b_test.keys():\n",
102 |     "    l,r = key.split('|')\n",
103 |     "    allset.add(l)\n",
104 |     "    allset.add(r)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "len(allset)\n",
114 |     "s = (len(allset),len(allset))\n",
115 |     "names = [x for x in allset]\n",
116 |     "name2index = dict()\n",
117 |     "index2name = dict()\n",
118 |     "for index,item in enumerate(names):\n",
119 |     "    name2index[item] = index\n",
120 |     "    index2name[index] = item"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "matrix = np.zeros(s)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "for carpcar in g_test.keys():\n",
139 |     "    n = g_test[carpcar]\n",
140 |     "    d = n\n",
141 |     "    if carpcar in b_test:\n",
142 |     "        d += b_test[carpcar]\n",
143 |     "    l,r = carpcar.split('|')\n",
144 |     "    matrix[name2index[l]][name2index[r]] = n / float(d)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "counts = list()\n",
154 |     "spoo = list()\n",
155 |     "\n",
156 |     "for i in index2name:\n",
157 |     "    counts.append(i)\n",
158 |     "    spoo.append(index2name[i])"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "import numpy as np\n",
168 |     "import matplotlib.pyplot as plt\n",
169 |     "plt.imshow(matrix,cmap='hot')\n",
170 |     "plt.colorbar()\n",
171 |     "plt.xticks(counts, spoo)\n",
172 |     "plt.yticks(counts,spoo)"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "anaconda-cloud": {},
178 |   "celltoolbar": "Raw Cell Format",
179 |   "kernelspec": {
180 |    "display_name": "Python [conda root]",
181 |    "language": "python",
182 |    "name": "conda-root-py"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.5.2"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 1
199 | }
200 | 


--------------------------------------------------------------------------------
/pelops/analysis/SVMBinaryCarMatch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "cd '~/work/pelops'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%matplotlib inline\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "#import time\n",
 21 |     "#import numpy as np\n",
 22 |     "from pelops.datasets.featuredataset import FeatureDataset\n",
 23 |     "from pelops.experiment_api.experiment import ExperimentGenerator\n",
 24 |     "from pelops.datasets import chip\n",
 25 |     "from  pelops.models.makesvm import train_svm, test_svm\n",
 26 |     "from  pelops.analysis.comparecameras import mad_matrix"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "ITEMSPERCAMERA = 2\n",
 36 |     "NUMCAMERAS = 2\n",
 37 |     "TRAIN_RANDOM=1024\n",
 38 |     "DROPPED=0\n",
 39 |     "TRAIN_FEATURES = '/Users/dgrossman/image_NEW_TINY_TRAIN'\n",
 40 |     "\n",
 41 |     "fd_train = FeatureDataset(TRAIN_FEATURES)\n",
 42 |     "eg_train = ExperimentGenerator(fd_train, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TRAIN_RANDOM)\n"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "ITEMSPERCAMERA = 2\n",
 52 |     "NUMCAMERAS = 2\n",
 53 |     "TEST_RANDOM=1024\n",
 54 |     "DROPPED=0\n",
 55 |     "TEST_FEATURES = '/Users/dgrossman/image_NEW_TINY_TEST'\n",
 56 |     "\n",
 57 |     "fd_test = FeatureDataset(TEST_FEATURES)\n",
 58 |     "eg_test = ExperimentGenerator(fd_test,  NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TEST_RANDOM)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "testpoints = [50,100,200,400,800,1600]"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "results = list()\n",
 77 |     "for testpoint in testpoints:\n",
 78 |     "    clf_train = train_svm(testpoint,fd_train,eg_train)\n",
 79 |     "    result = test_svm(testpoint,clf_train,fd_test,eg_test)\n",
 80 |     "    print ('items: {}, score {}'.format(testpoint,result))\n",
 81 |     "    results.append((testpoint,result))"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "for i,s in results:\n",
 91 |     "    print('items:{}, score:{}'.format(i,s))"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": []
100 |   }
101 |  ],
102 |  "metadata": {
103 |   "anaconda-cloud": {},
104 |   "kernelspec": {
105 |    "display_name": "Python [conda root]",
106 |    "language": "python",
107 |    "name": "conda-root-py"
108 |   },
109 |   "language_info": {
110 |    "codemirror_mode": {
111 |     "name": "ipython",
112 |     "version": 3
113 |    },
114 |    "file_extension": ".py",
115 |    "mimetype": "text/x-python",
116 |    "name": "python",
117 |    "nbconvert_exporter": "python",
118 |    "pygments_lexer": "ipython3",
119 |    "version": "3.5.2"
120 |   }
121 |  },
122 |  "nbformat": 4,
123 |  "nbformat_minor": 1
124 | }
125 | 


--------------------------------------------------------------------------------
/pelops/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/__init__.py


--------------------------------------------------------------------------------
/pelops/analysis/analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import defaultdict
  3 | from scipy.spatial.distance import cosine, euclidean
  4 | 
  5 | 
  6 | # compute cosine distance
  7 | # 0 -> things are closer
  8 | # 1 -> things are farther
  9 | def comp_cosine(cam1_feat, cam2_feat):
 10 |     retval = 1 - cosine(cam1_feat, cam2_feat)
 11 |     return (retval)
 12 | 
 13 | 
 14 | # compute Euclidean distance
 15 | # 0 -> things are closer
 16 | # + -> things are farther
 17 | def comp_euclid(cam1_feat, cam2_feat):
 18 |     retval = abs(euclidean(cam1_feat, cam2_feat))
 19 |     return (retval)
 20 | 
 21 | 
 22 | # do the comparisons between chips
 23 | # cam1 - listing of chips seen at cam1
 24 | # cam2 - listing of chips seen at cam2
 25 | # comparison - function to compare 2 vectors should return small things
 26 | #              when comparison is close, large otherwise
 27 | # verbose - return more info if true
 28 | def is_correct_match(featureData,
 29 |                      cam1,
 30 |                      cam2,
 31 |                      comparison=comp_cosine, verbose=False):
 32 |     similarities = []
 33 |     for cam1_chip in cam1:
 34 |         cam1_feat = featureData.get_feats_for_chip(cam1_chip)
 35 |         for cam2_chip in cam2:
 36 |             cam2_feat = featureData.get_feats_for_chip(cam2_chip)
 37 |             similarity = comparison(cam1_feat, cam2_feat)
 38 |             similarities.append((similarity, cam1_chip, cam2_chip))
 39 |     similarities.sort(reverse=True)
 40 |     for i, (similarity, chip1, chip2) in enumerate(similarities):
 41 |         # return best_match
 42 |         if chip1.car_id == chip2.car_id:
 43 |             if verbose:
 44 |                 return i, similarities
 45 |             else:
 46 |                 return i
 47 |     raise ValueError("Huh?")
 48 | 
 49 | 
 50 | # do EXPPERCMC, determine
 51 | # featureData - big table to look up data
 52 | # experimentGen  - function to create experiments
 53 | # EXPPERCMC - number of experiments to run for a single CMC
 54 | # comparison - function to compare 2 feature vectors
 55 | def pre_cmc(featureData, experimentGen,
 56 |             EXPPERCMC=1000, comparison=comp_cosine):
 57 | 
 58 |     num_downs = defaultdict(int)
 59 |     for i in range(EXPPERCMC):
 60 |         a = experimentGen.generate()
 61 |         num_down = is_correct_match(featureData, a[0], a[1],
 62 |                                     comparison=comparison)
 63 |         num_downs[num_down] += 1
 64 | 
 65 |     keys = sorted(num_downs)
 66 |     vals = [num_downs[key] for key in keys]
 67 |     return((keys, np.array(vals)/EXPPERCMC))
 68 | 
 69 | 
 70 | # Generate unprocessed CMC curves
 71 | # the data needs to be summed to make the correct
 72 | # CMC curve
 73 | # featureData - FeatureDataset of chips
 74 | # experimentGen - ExperimentGenerator
 75 | # NUMCMC - number of CMC to build
 76 | # EXPPERCMC - number of experiments run per CMC
 77 | # comparison - function that compares two feature vectors returning
 78 | #              distance measure, 0 -> close  big -> far
 79 | def repeat_pre_cmc(featureData, experimentGen, NUMCMC=100,
 80 |                    EXPPERCMC=1000, comparison=comp_cosine):
 81 |     experimentHolder = []
 82 |     for experiment in range(NUMCMC):
 83 |         experimentHolder.append(pre_cmc(featureData, experimentGen,
 84 |                                         EXPPERCMC=EXPPERCMC,
 85 |                                         comparison=comparison))
 86 |     return experimentHolder
 87 | 
 88 | 
 89 | # finalize creation of the CMC curves
 90 | # generate statistics on the CMC curves
 91 | # return all
 92 | # experimentHolder - array of CMC curves
 93 | # itemsPerCamera - number of items on a camera
 94 | def make_cmc_stats(experimentHolder, itemsPerCamera):
 95 |     comparisons = itemsPerCamera*itemsPerCamera
 96 |     stats = np.zeros((len(experimentHolder), comparisons))
 97 | 
 98 |     for index, (keys, vals) in enumerate(experimentHolder):
 99 |         for keyIndex in range(len(keys)):
100 |             stats[index, keys[keyIndex]] = vals[keyIndex]
101 | 
102 |     for index in range(len(stats[:, ])):
103 |         total_sum = 0.0
104 |         offsetlen = len(stats[0])
105 |         for sample in range(offsetlen):
106 |             total_sum += stats[index, sample]
107 |             stats[index, sample] = total_sum
108 | 
109 |     gdata = np.zeros((3, comparisons))
110 | 
111 |     for i in range(comparisons):
112 |         gdata[1, i] = np.average(stats[:, i])
113 |     for i in range(comparisons):
114 |         stddev = np.std(stats[:, i])
115 |         gdata[0, i] = gdata[1, i] - stddev
116 |         gdata[2, i] = gdata[1, i] + stddev
117 | 
118 |     return (stats, gdata)
119 | 


--------------------------------------------------------------------------------
/pelops/analysis/camerautil.py:
--------------------------------------------------------------------------------
 1 | """ utilities when working with cameras"""
 2 | 
 3 | from collections import defaultdict
 4 | 
 5 | 
 6 | def nameit_cam(first, second):
 7 |     """
 8 |     concatenate chip names together in a seperable way
 9 |     first(chip) - first item
10 |     second(chip) - second item
11 |     """
12 |     return '{}|{}'.format(first.cam_id, second.cam_id)
13 | 
14 | 
15 | def nameit_car(first, second):
16 |     """
17 |     concatenate chip.car names together in a seperable way
18 |     first(chip) - first item
19 |     second(chip) - second imte
20 |     """
21 |     return '{}|{}'.format(first.car_id, second.car_id)
22 | 
23 | 
24 | def get_match_id(cameras):
25 |     """
26 |     find the car of interest from a set of cameras
27 | 
28 |     cameras(list(list(chips)))): list of the cameras with cars in each camera
29 |     """
30 |     chosendict = defaultdict(int)
31 |     for camera in cameras:
32 |         for car in camera:
33 |             chosendict[car.car_id] += 1
34 |     mymax = -1
35 |     myid = None
36 |     for k in chosendict.keys():
37 |         if chosendict[k] > mymax:
38 |             mymax = chosendict[k]
39 |             myid = k
40 |     return myid
41 | 
42 | 
43 | def make_good_bad(cameras, car_id):
44 |     """
45 |     make a list of cars of interest, and a list of other
46 | 
47 |     cameras(list(list(chips))): list of the cameras with the cars in each cameras
48 |     car_id(): the id of the car of interest
49 |     """
50 |     goodlist = list()
51 |     bad_list = list()
52 |     for camera in cameras:
53 |         for car in camera:
54 |             if car.car_id == car_id:
55 |                 goodlist.append(car)
56 |             else:
57 |                 bad_list.append(car)
58 |     return (goodlist, bad_list)
59 | 
60 | 
61 | def glue(vec_a, vec_b):
62 |     """
63 |     concatenate two smaller vectors to a larger vector
64 |     vec_a : first vector
65 |     vec_b : second vector
66 |     """
67 |     retval = list()
68 |     retval.extend(vec_a)
69 |     retval.extend(vec_b)
70 |     return retval
71 | 


--------------------------------------------------------------------------------
/pelops/analysis/comparecameras.py:
--------------------------------------------------------------------------------
  1 | """ camera comparison """
  2 | 
  3 | import itertools
  4 | from collections import defaultdict
  5 | 
  6 | import numpy as np
  7 | from tqdm import tnrange
  8 | 
  9 | from pelops.analysis.camerautil import (get_match_id, glue, make_good_bad,
 10 |                                         nameit_cam, nameit_car)
 11 | 
 12 | 
 13 | def eval_good_bad(first, second, clf, featuredataset, goodmatches, badmatches, attribute_name):
 14 |     """
 15 |     label examples of good and bad comparisons
 16 | 
 17 |     take two chips, concantenate their feature vectors
 18 |     and create a balanced dataset of matches and differences
 19 | 
 20 |     first(Chip):  image to evaluate
 21 |     second(Chip): image to evaluate
 22 |     clr(classifier): classifier used to evaluate chips
 23 |     fd(featureDataset): maps chips to features
 24 |     goodmatches(defaultdictionary(int)): counts of good matches
 25 |     badmatches(defaultdictionary(int)): counts of bad matches
 26 |     attribute_name(str): which attribute to pull names from
 27 |     """
 28 | 
 29 |     namefunc = None
 30 |     if attribute_name == 'car':
 31 |         namefunc = nameit_car
 32 |     else:
 33 |         namefunc = nameit_cam
 34 | 
 35 |     bigvec1 = glue(featuredataset.get_feats_for_chip(first),
 36 |                    featuredataset.get_feats_for_chip(second))
 37 | 
 38 |     bigvec1np = np.array(bigvec1)
 39 |     #bigvec1np.reshape(1, -1)
 40 | 
 41 |     bigvec2 = glue(featuredataset.get_feats_for_chip(second),
 42 |                    featuredataset.get_feats_for_chip(first))
 43 | 
 44 |     bigvec2np = np.array(bigvec2)
 45 |     # bigvec2np.reshape(1, -1))
 46 | 
 47 |     decision = clf.predict(bigvec1np.reshape(1, -1))
 48 |     name = namefunc(first, second)
 49 | 
 50 |     tally_decision(decision, goodmatches, name, badmatches)
 51 | 
 52 |     decision = clf.predict(bigvec2np.reshape(1, -1))
 53 |     name = namefunc(second, first)
 54 | 
 55 |     tally_decision(decision, goodmatches, name, badmatches)
 56 | 
 57 | 
 58 | def tally_decision(decision, goodpic, name, badpic):
 59 |     """
 60 |     count the number of matches for a name
 61 | 
 62 |     decision(int): whether the classifier said they matched
 63 |     goodpic(defaultdict(int)): list of good matches
 64 |     badpic(defaultdict(int)): list of bad matches
 65 |     name(str): concatenation of names of first and second pics
 66 |     """
 67 |     if decision == 1:
 68 |         goodpic[name] += 1
 69 |     else:
 70 |         badpic[name] += 1
 71 | 
 72 | 
 73 | def mad_matrix(examples, clf, featuredataset, examplegenerator, attribute_name='car'):
 74 |     """
 75 |     run examples experiments to see how cars are declaired
 76 |     the same or different by the clf classifier.abs
 77 | 
 78 |     examples(int): number of trials
 79 |     clf(classifier): classifier to make same/different distinciton
 80 |     fd(featureDataset) : allows joining of chip to features
 81 |     eg(experimentGenerator): makes expermients for testing
 82 |     """
 83 | 
 84 |     ddg = defaultdict(int)
 85 |     ddb = defaultdict(int)
 86 | 
 87 |     for _ in tnrange(examples):
 88 |         cameras_test = examplegenerator.generate()
 89 |         match_id = get_match_id(cameras_test)
 90 |         goods, bads = make_good_bad(cameras_test, match_id)
 91 |         good0 = goods[0]
 92 |         good1 = goods[1]
 93 |         bad0 = bads[0]
 94 |         bad1 = bads[1]
 95 | 
 96 |         eval_good_bad(good0, good1, clf, featuredataset,
 97 |                       ddg, ddb, attribute_name)
 98 |         eval_good_bad(bad0, bad1, clf, featuredataset,
 99 |                       ddb, ddg, attribute_name)
100 | 
101 |     return(ddg, ddb)
102 | 
103 | 
104 | def make_work(fd_train, lessons, outcomes, items, label):
105 |     """
106 |     makes a listing of work from chips for classification
107 | 
108 |     fd_train(featureDataset): training features
109 |     lessons(list): feature vectors
110 |     outcomes(list): expected outcome for the comparison
111 |     items(list(chips)): list of chips for comparison
112 |     label(int): expected label for the comparison
113 |     """
114 |     workitems = itertools.permutations(items, 2)
115 |     for workitem in workitems:
116 |         item = glue(fd_train.get_feats_for_chip(
117 |             workitem[0]), fd_train.get_feats_for_chip(workitem[1]))
118 | 
119 |         lessons.append(item)
120 |         outcomes.append(label)
121 | 
122 |         item = glue(fd_train.get_feats_for_chip(
123 |             workitem[1]), fd_train.get_feats_for_chip(workitem[0]))
124 | 
125 |         lessons.append(item)
126 |         outcomes.append(label)
127 | 


--------------------------------------------------------------------------------
/pelops/analysis/isFileImage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "cd 'deep-learning-models/'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from resnet50 import ResNet50\n",
 19 |     "from keras.preprocessing import image\n",
 20 |     "from imagenet_utils import preprocess_input, decode_predictions\n",
 21 |     "import numpy as np\n",
 22 |     "import json\n",
 23 |     "import time\n",
 24 |     "from multiprocessing import Pool\n",
 25 |     "import functools"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "\n",
 35 |     "root = '/local_data/dgrossman/dgCars'\n",
 36 |     "allFiles = 'allImages'\n",
 37 |     "\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "def readTasking(filename):\n",
 47 |     "\n",
 48 |     "    af = open(allFiles,'r')\n",
 49 |     "    data = list()\n",
 50 |     "    fileProblems = list()\n",
 51 |     "\n",
 52 |     "    for jline in af:\n",
 53 |     "        jline = jline.strip()\n",
 54 |     "        line = json.loads(jline)\n",
 55 |     "        data.append(line)\n",
 56 |     "    af.close()\n",
 57 |     "    return data"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "def procLine2(l, r):\n",
 67 |     "    img_path = '{0}/{1}'.format(r,l['filename'])\n",
 68 |     "    try:\n",
 69 |     "        img = image.load_img(img_path, target_size=(224, 224))\n",
 70 |     "        return (1,l['filename'])\n",
 71 |     "    except:\n",
 72 |     "        return (0,l['filename'])\n",
 73 |     "    \n",
 74 |     "procLine = functools.partial(procLine2, r=root )"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "time.sleep(60*60*6) # sleep 6 hours then try to do the images"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "p = Pool(32)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "data = readTasking(allFiles)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "a = p.map(procLine,data)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "def goodBad(data):\n",
120 |     "    good= 0\n",
121 |     "    bad = 0\n",
122 |     "    for item in data:\n",
123 |     "        if item[0]==1:\n",
124 |     "            good = good + 1\n",
125 |     "        else:\n",
126 |     "            bad = bad + 1\n",
127 |     "    print('good',good,' bad',bad)\n",
128 |     "    return (good,bad)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "out = goodBad(a)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "bad = list()\n",
147 |     "for item in a:\n",
148 |     "    if item[0] == 0:\n",
149 |     "        bad.append(item[1])\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "bad"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "newdata = list()\n",
168 |     "for d in data:\n",
169 |     "    if d['filename'] not in bad:\n",
170 |     "        newdata.append(d)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "length(newdata)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "len(newdata)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "import json\n",
198 |     "out = open('allImages','w')\n",
199 |     "for d in newdata:\n",
200 |     "    out.write(json.dumps(d)+'\\n');\n",
201 |     "out.close()"
202 |    ]
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "anaconda-cloud": {},
207 |   "celltoolbar": "Raw Cell Format",
208 |   "kernelspec": {
209 |    "display_name": "Python 3",
210 |    "language": "python",
211 |    "name": "python3"
212 |   },
213 |   "language_info": {
214 |    "codemirror_mode": {
215 |     "name": "ipython",
216 |     "version": 3
217 |    },
218 |    "file_extension": ".py",
219 |    "mimetype": "text/x-python",
220 |    "name": "python",
221 |    "nbconvert_exporter": "python",
222 |    "pygments_lexer": "ipython3",
223 |    "version": "3.5.2"
224 |   },
225 |   "nbpresent": {
226 |    "slides": {},
227 |    "themes": {
228 |     "default": "197aed3e-040e-45b3-b365-855332b06482",
229 |     "theme": {}
230 |    }
231 |   }
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 1
235 | }
236 | 


--------------------------------------------------------------------------------
/pelops/analysis/labelImageCars.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys,os,os.path\n",
 10 |     "import tensorflow as tf\n",
 11 |     "os.environ['THEANO_FLAGS']='mode=FAST_RUN,device=gpu,floatX=float32'\n",
 12 |     "from keras import backend as K\n",
 13 |     "sess  = tf.Session()\n",
 14 |     "K.set_session(sess)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "cd 'deep-learning-models/'"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import json\n",
 33 |     "import time\n",
 34 |     "allFiles = 'allImages'\n",
 35 |     "root = '/local_data/dgrossman/dgCars/'\n",
 36 |     "af = open(allFiles,'r')\n",
 37 |     "data = list()\n",
 38 |     "for d in af:\n",
 39 |     "    d = d.strip()\n",
 40 |     "    data.append(json.loads(d))"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from resnet50 import ResNet50\n",
 50 |     "from keras.preprocessing import image\n",
 51 |     "from imagenet_utils import preprocess_input, decode_predictions\n",
 52 |     "import numpy as np"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "s = time.time()\n",
 62 |     "model = ResNet50(weights='imagenet')\n",
 63 |     "print ('loadResNet50',time.time() - s)\n",
 64 |     "\n"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "good = list()\n",
 74 |     "bad = list()\n",
 75 |     "file = list()"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "interesting = set()\n",
 85 |     "\n",
 86 |     "for x in ['police_van','moving_van','car','pickup','suv','truck','crossover','van','minivan','sports_car','cab','racer','convertible','car_wheel','jeep','ambulance']:\n",
 87 |     "    interesting.add(x)\n",
 88 |     "\n",
 89 |     "count = 0\n",
 90 |     "im = 0\n",
 91 |     "#with tf.device('/gpu:0'):\n",
 92 |     "#    print ('time:',time.time())\n",
 93 |     "s = time.time()\n",
 94 |     "print ('time:',time.time() - s)\n",
 95 |     "if True:\n",
 96 |     "    for d in data:   \n",
 97 |     "        img_path = '{0}/{1}'.format(root,d['filename'])\n",
 98 |     "        flag = True\n",
 99 |     "        try:\n",
100 |     "            img = image.load_img(img_path, target_size=(224, 224))\n",
101 |     "\n",
102 |     "        except:\n",
103 |     "            #print('FILE :',d['filename'])\n",
104 |     "            file.append(d)\n",
105 |     "            flag = False\n",
106 |     "\n",
107 |     "        if flag:\n",
108 |     "            x = image.img_to_array(img)\n",
109 |     "            x = np.expand_dims(x, axis=0)\n",
110 |     "            x = preprocess_input(x)\n",
111 |     "            preds = model.predict(x)\n",
112 |     "            predictions = decode_predictions(preds)[0][:4]\n",
113 |     "            #out = ''\n",
114 |     "            found = False\n",
115 |     "            for prediction in predictions:\n",
116 |     "                i,t,score = prediction\n",
117 |     "                #out = d['filename'], prediction\n",
118 |     "                if t in interesting:\n",
119 |     "                    #out = 'GOOD'+' ' + d['filename']+' '+t\n",
120 |     "                    good.append((d,t))\n",
121 |     "                    found = True\n",
122 |     "                    break\n",
123 |     "            if not found:\n",
124 |     "                bad.append((d,predictions[0][1]))\n",
125 |     "                #out = 'BAD'+ ' ' + d['filename']+ ' ' + predictions[0][1]\n",
126 |     "            #print (out)\n",
127 |     "            #print ('Predicted',decode_predictions(preds)[0][:4],' sec:',time.time() - s )\n",
128 |     "\n",
129 |     "        atOnce = 10000\n",
130 |     "        if count == atOnce:\n",
131 |     "            count = 0\n",
132 |     "            im = im + 1\n",
133 |     "            z = time.time() - s\n",
134 |     "            print('processed:',im * atOnce,'Images','good',len(good),'bad',len(bad),'file',len(file),z)\n",
135 |     "            s = time.time()\n",
136 |     "        count = count + 1"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "print('processed:',len(good) + len(bad) + len(file),'Images','good',len(good),'bad',len(bad),'file',len(file))"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "from collections import defaultdict\n",
155 |     "q = defaultdict(int)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "for b in good:\n",
165 |     "    q[b[1]] = q[b[1]]+1"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "for qq in q:\n",
175 |     "    print (qq,q[qq])\n"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "gfile = open('foundCars','w')\n",
185 |     "    for g in good:\n",
186 |     "        gfile.write(g+'\\n')\n",
187 |     "gfile.close()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "gfile = open('foundCars','w')\n",
197 |     "for g in good:\n",
198 |     "    dat, classification = g\n",
199 |     "    dat['resnet50'] = classification\n",
200 |     "    gfile.write(json.dumps(dat)+'\\n')\n",
201 |     "gfile.close()\n"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "good[0]"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "7+2"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": []
228 |   }
229 |  ],
230 |  "metadata": {
231 |   "anaconda-cloud": {},
232 |   "kernelspec": {
233 |    "display_name": "Python 3",
234 |    "language": "python",
235 |    "name": "python3"
236 |   },
237 |   "language_info": {
238 |    "codemirror_mode": {
239 |     "name": "ipython",
240 |     "version": 3
241 |    },
242 |    "file_extension": ".py",
243 |    "mimetype": "text/x-python",
244 |    "name": "python",
245 |    "nbconvert_exporter": "python",
246 |    "pygments_lexer": "ipython3",
247 |    "version": "3.5.2"
248 |   }
249 |  },
250 |  "nbformat": 4,
251 |  "nbformat_minor": 1
252 | }
253 | 


--------------------------------------------------------------------------------
/pelops/analysis/makeCMCplots.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#set some constants\n",
 10 |     "ITEMSPERCAMERA = 10\n",
 11 |     "YRANDOM=1024\n",
 12 |     "CAMERAS=2\n",
 13 |     "DROPPED=0\n",
 14 |     "CMC=100\n",
 15 |     "EXPERIMENTS=400"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "DATASETFILES = [('/local_data/dgrossman/image_body_type',4),\n",
 25 |     "                ('/local_data/dgrossman/image_color_type',10),\n",
 26 |     "                ('/local_data/dgrossman/image_color_body_type',40),\n",
 27 |     "                ('/local_data/dgrossman/image_make_model_type',1057),\n",
 28 |     "                ('/local_data/dgrossman/resnet50','-1')]\n",
 29 |     "DATASETFILES = [('/local_data/dgrossman/compcars_color',10),\n",
 30 |     "                ('/local_data/dgrossman/compcars_make_model',284),\n",
 31 |     "                ('/local_data/dgrossman/image_color_type',10),\n",
 32 |     "                ('/local_data/dgrossman/resnet50','-1')]\n",
 33 |     "DATASETFILES = [('/local_data/dgrossman/resnet50','-1')]"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from pelops.datasets.featuredataset import FeatureDataset\n",
 43 |     "from pelops.experiment_api.experiment import ExperimentGenerator\n",
 44 |     "from pelops.analysis import analysis\n",
 45 |     "\n",
 46 |     "alldata = list()\n",
 47 |     "for datasetfile,num in DATASETFILES:\n",
 48 |     "    #do the math\n",
 49 |     "    print(datasetfile)\n",
 50 |     "    featureData = FeatureDataset(datasetfile)\n",
 51 |     "    expGen = ExperimentGenerator(featureData, CAMERAS, ITEMSPERCAMERA, DROPPED, YRANDOM)\n",
 52 |     "    experimentHolder = analysis.repeat_pre_cmc(featureData,expGen,NUMCMC=CMC,EXPPERCMC=EXPERIMENTS)\n",
 53 |     "    stats,gdata = analysis.make_cmc_stats(experimentHolder,ITEMSPERCAMERA)\n",
 54 |     "    alldata.append(gdata)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "import numpy as np\n",
 64 |     "stats = np.zeros((100,len(DATASETFILES)))\n"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "for x in range(len(DATASETFILES)):\n",
 74 |     "    for y in range(100):\n",
 75 |     "        stats[y][x] = alldata[x][1][y]"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "%matplotlib inline\n",
 85 |     "import matplotlib.pyplot as plt\n",
 86 |     "\n",
 87 |     "#make the plots\n",
 88 |     "fig = plt.figure()\n",
 89 |     "ax = plt.subplot(111)\n",
 90 |     "\n",
 91 |     "ax.plot(stats)\n",
 92 |     "#plt.title('color: 10\\ncolor mixed with structure: 40 \\n structure only: 4, 1057\\n CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
 93 |     "plt.title('compcars color:10\\ncompcars make model:284\\ndgcars color:10\\nuntrained resnet\\nCMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
 94 |     "plt.grid(True)\n",
 95 |     "#ax.legend(('4','10','40','1057','untrained'),bbox_to_anchor=(1, -0.05),fancybox=True, shadow=True, ncol=5)\n",
 96 |     "ax.legend(('10cc','284cc','10dg','untrained'),bbox_to_anchor=(1, -0.05),fancybox=True, shadow=True, ncol=5)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "%matplotlib inline\n",
106 |     "import matplotlib.pyplot as plt\n",
107 |     "\n",
108 |     "#make the plots\n",
109 |     "fig = plt.figure()\n",
110 |     "ax = plt.subplot(111)\n",
111 |     "\n",
112 |     "ax.plot(stats)\n",
113 |     "plt.title('[color out performs structure]\\n CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
114 |     "plt.grid(True)\n",
115 |     "ax.legend(('4','10','40','1057 classes'),bbox_to_anchor=(1, -0.05),\n",
116 |     "          fancybox=True, shadow=True, ncol=5)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": []
125 |   }
126 |  ],
127 |  "metadata": {
128 |   "anaconda-cloud": {},
129 |   "kernelspec": {
130 |    "display_name": "Python 3",
131 |    "language": "python",
132 |    "name": "python3"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 3
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython3",
144 |    "version": "3.5.2"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 2
149 | }
150 | 


--------------------------------------------------------------------------------
/pelops/analysis/makeFeatureFiles-TEST.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from pelops.datasets.veri import VeriDataset\n",
 10 |     "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n",
 11 |     "import pelops.utils as utils\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n",
 21 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n",
 22 |     "layer = 'avg_pool'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
 32 |     "                   set_type=utils.SetType.TEST.value)\n",
 33 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_body_type_TEST',\n",
 34 |     "                   model_output_file,\n",
 35 |     "                   weights_output_file,\n",
 36 |     "                   layer)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n",
 46 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n",
 47 |     "layer = 'avg_pool'"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
 57 |     "                   set_type=utils.SetType.TEST.value)\n",
 58 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_type_TEST',\n",
 59 |     "                   model_output_file,\n",
 60 |     "                   weights_output_file,\n",
 61 |     "                   layer)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n",
 71 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n",
 72 |     "layer = 'avg_pool'"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
 82 |     "                   set_type=utils.SetType.TEST.value)\n",
 83 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_body_type_TEST',\n",
 84 |     "                   model_output_file,\n",
 85 |     "                   weights_output_file,\n",
 86 |     "                   layer)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n",
 96 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n",
 97 |     "layer = 'avg_pool'"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
107 |     "                   set_type=utils.SetType.TEST.value)\n",
108 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_make_model_type_TEST',\n",
109 |     "                   model_output_file,\n",
110 |     "                   weights_output_file,\n",
111 |     "                   layer)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.model.json'\n",
121 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.weights.hdf5'\n",
122 |     "layer = 'avg_pool'"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n",
132 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/resnet50_TEST',\n",
133 |     "                   model_output_file,\n",
134 |     "                   weights_output_file,\n",
135 |     "                   layer)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n",
145 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n",
146 |     "layer = 'avg_pool'"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n",
156 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_make_model_TEST',\n",
157 |     "                   model_output_file,\n",
158 |     "                   weights_output_file,\n",
159 |     "                   layer)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n",
169 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n",
170 |     "layer = 'avg_pool'"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n",
180 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_color_TEST',\n",
181 |     "                   model_output_file,\n",
182 |     "                   weights_output_file,\n",
183 |     "                   layer)"
184 |    ]
185 |   }
186 |  ],
187 |  "metadata": {
188 |   "anaconda-cloud": {},
189 |   "kernelspec": {
190 |    "display_name": "Python 3",
191 |    "language": "python",
192 |    "name": "python3"
193 |   },
194 |   "language_info": {
195 |    "codemirror_mode": {
196 |     "name": "ipython",
197 |     "version": 3
198 |    },
199 |    "file_extension": ".py",
200 |    "mimetype": "text/x-python",
201 |    "name": "python",
202 |    "nbconvert_exporter": "python",
203 |    "pygments_lexer": "ipython3",
204 |    "version": "3.5.2"
205 |   }
206 |  },
207 |  "nbformat": 4,
208 |  "nbformat_minor": 2
209 | }
210 | 


--------------------------------------------------------------------------------
/pelops/analysis/makeFeatureFiles.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from pelops.datasets.veri import VeriDataset\n",
 10 |     "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n",
 11 |     "import pelops.utils as utils\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n",
 21 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n",
 22 |     "layer = 'avg_pool'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
 32 |     "                   set_type=utils.SetType.TRAIN.value)\n",
 33 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_body_type',\n",
 34 |     "                   model_output_file,\n",
 35 |     "                   weights_output_file,\n",
 36 |     "                   layer)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n",
 46 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n",
 47 |     "layer = 'avg_pool'"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
 57 |     "                   set_type=utils.SetType.TRAIN.value)\n",
 58 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_type',\n",
 59 |     "                   model_output_file,\n",
 60 |     "                   weights_output_file,\n",
 61 |     "                   layer)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n",
 71 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n",
 72 |     "layer = 'avg_pool'"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
 82 |     "                   set_type=utils.SetType.TRAIN.value)\n",
 83 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_body_type',\n",
 84 |     "                   model_output_file,\n",
 85 |     "                   weights_output_file,\n",
 86 |     "                   layer)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n",
 96 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n",
 97 |     "layer = 'avg_pool'"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
107 |     "                   set_type=utils.SetType.TRAIN.value)\n",
108 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_make_model_type',\n",
109 |     "                   model_output_file,\n",
110 |     "                   weights_output_file,\n",
111 |     "                   layer)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.model.json'\n",
121 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.weights.hdf5'\n",
122 |     "layer = 'avg_pool'"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n",
132 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/resnet50',\n",
133 |     "                   model_output_file,\n",
134 |     "                   weights_output_file,\n",
135 |     "                   layer)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n",
145 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n",
146 |     "layer = 'avg_pool'"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n",
156 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_make_model',\n",
157 |     "                   model_output_file,\n",
158 |     "                   weights_output_file,\n",
159 |     "                   layer)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n",
169 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n",
170 |     "layer = 'avg_pool'"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n",
180 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_color',\n",
181 |     "                   model_output_file,\n",
182 |     "                   weights_output_file,\n",
183 |     "                   layer)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "1+1\n"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": []
201 |   }
202 |  ],
203 |  "metadata": {
204 |   "anaconda-cloud": {},
205 |   "kernelspec": {
206 |    "display_name": "Python 3",
207 |    "language": "python",
208 |    "name": "python3"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 3
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython3",
220 |    "version": "3.5.2"
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 2
225 | }
226 | 


--------------------------------------------------------------------------------
/pelops/analysis/makeVeri.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pelops.datasets.veri import VeriDataset
 3 | from pelops.etl.extract_feats_from_chips import extract_feats_from_chips
 4 | 
 5 | # make the stuff that we run on
 6 | if __name__ == '__main__':
 7 |     # path to the veri dataset
 8 |     v_file_name = sys.argv[0]
 9 | 
10 |     # filename of where to place the output
11 |     out_file_name = sys.argv[1]
12 | 
13 |     veri = VeriDataset(v_file_name)
14 |     extract_feats_from_chips(veri, out_file_name)
15 | 


--------------------------------------------------------------------------------
/pelops/analysis/recomputeCorpus.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from pelops.analysis.unsorted.recompute.compute import do_training"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "tbld = '/local_data/dgrossman/tensorboard_logs'\n",
 19 |     "mcfs = '/local_data/dgrossman/model_save_dir/dg_carsweights.{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5'\n",
 20 |     "batch_size=32\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "training_basepath = '/local_data/dgrossman/keras/make_model/train'\n",
 30 |     "validation_basepath = '/local_data/dgrossman/keras/make_model/validate'\n",
 31 |     "\n",
 32 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n",
 33 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n",
 34 |     "\n",
 35 |     "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "training_basepath = '/local_data/dgrossman/keras/color/train'\n",
 45 |     "validation_basepath = '/local_data/dgrossman/keras/color/validate'\n",
 46 |     "\n",
 47 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n",
 48 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n",
 49 |     "\n",
 50 |     "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "training_basepath = '/local_data/dgrossman/keras/color_body_type/train'\n",
 60 |     "validation_basepath = '/local_data/dgrossman/keras/color_body_type/validate'\n",
 61 |     "\n",
 62 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n",
 63 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n",
 64 |     "\n",
 65 |     "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "training_basepath = '/local_data/dgrossman/keras/body_type/train'\n",
 75 |     "validation_basepath = '/local_data/dgrossman/keras/body_type/validate'\n",
 76 |     "\n",
 77 |     "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n",
 78 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n",
 79 |     "\n",
 80 |     "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "training_basepath = '/local_data/teams/pelops/compcars_keras/make_model/train'\n",
 90 |     "validation_basepath = '/local_data/teams/pelops/compcars_keras/make_model/test'\n",
 91 |     "\n",
 92 |     "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n",
 93 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n",
 94 |     "\n",
 95 |     "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "training_basepath = '/local_data/dgrossman/compcars/colors/train'\n",
105 |     "validation_basepath = '/local_data/dgrossman/compcars/colors/test'\n",
106 |     "\n",
107 |     "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n",
108 |     "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n",
109 |     "\n",
110 |     "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
111 |    ]
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.5.2"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 2
135 | }
136 | 


--------------------------------------------------------------------------------
/pelops/analysis/saveExtractFeatsFromChips.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from pelops.datasets.veri import VeriDataset\n",
10 |     "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n",
11 |     "import pelops.utils as utils\n"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "veri = VeriDataset('/local_data/dgrossman/newVeRi',set_type=utils.SetType.TRAIN.value)\n",
21 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TRAIN')"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": null,
27 |    "metadata": {},
28 |    "outputs": [],
29 |    "source": [
30 |     "veri = VeriDataset('/local_data/dgrossman/newVeRi',set_type=utils.SetType.TEST.value)\n",
31 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TEST')"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "code",
36 |    "execution_count": null,
37 |    "metadata": {},
38 |    "outputs": [],
39 |    "source": []
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": null,
44 |    "metadata": {},
45 |    "outputs": [],
46 |    "source": [
47 |     "veri = VeriDataset('/local_data/dgrossman/tinyVeRi',set_type=utils.SetType.TRAIN.value)\n",
48 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TINY_TRAIN')"
49 |    ]
50 |   },
51 |   {
52 |    "cell_type": "code",
53 |    "execution_count": null,
54 |    "metadata": {},
55 |    "outputs": [],
56 |    "source": [
57 |     "veri = VeriDataset('/local_data/dgrossman/tinyVeRi',set_type=utils.SetType.TEST.value)\n",
58 |     "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TINY_TEST')"
59 |    ]
60 |   },
61 |   {
62 |    "cell_type": "code",
63 |    "execution_count": null,
64 |    "metadata": {},
65 |    "outputs": [],
66 |    "source": []
67 |   }
68 |  ],
69 |  "metadata": {
70 |   "anaconda-cloud": {},
71 |   "kernelspec": {
72 |    "display_name": "Python 3",
73 |    "language": "python",
74 |    "name": "python3"
75 |   },
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython",
79 |     "version": 3
80 |    },
81 |    "file_extension": ".py",
82 |    "mimetype": "text/x-python",
83 |    "name": "python",
84 |    "nbconvert_exporter": "python",
85 |    "pygments_lexer": "ipython3",
86 |    "version": "3.5.2"
87 |   }
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 2
91 | }
92 | 


--------------------------------------------------------------------------------
/pelops/analysis/splitDataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "from random import shuffle\n",
 11 |     "import glob\n",
 12 |     "import shutil\n",
 13 |     "import tqdm"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "def make_dir(path):\n",
 23 |     "    if not os.path.exists(path):\n",
 24 |     "        os.makedirs(path)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def move_link(src,dst):\n",
 34 |     "    real_src = os.path.realpath(src)\n",
 35 |     "    #print(real_src,dst)\n",
 36 |     "    os.symlink(real_src,dst)\n",
 37 |     "    os.unlink(src)\n",
 38 |     "    #os.rename(src,dst)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "def prep_datasets(srcpath,destpath,percent=0.3):\n",
 48 |     "    \n",
 49 |     "    if percent >1 or percent < 0:\n",
 50 |     "        print ('bad')\n",
 51 |     "        raise ValueError('percent needs to be in [0,1]')\n",
 52 |     "    found = 0\n",
 53 |     "    moved = 0\n",
 54 |     "    for image_class_filepath in tqdm.tqdm(glob.glob(os.path.join(srcpath, '*'))):\n",
 55 |     "        \n",
 56 |     "        if os.path.isdir(image_class_filepath):\n",
 57 |     "            image_class_num = int(os.path.basename(image_class_filepath))\n",
 58 |     "            \n",
 59 |     "            directory_name = os.path.join(destpath, '{}'.format(image_class_num))\n",
 60 |     "            #print(directory_name)\n",
 61 |     "            make_dir(directory_name)\n",
 62 |     "            \n",
 63 |     "            dir_contents = list()\n",
 64 |     "                   \n",
 65 |     "            for filename in glob.glob(os.path.join(image_class_filepath, '*')):\n",
 66 |     "                found+=1\n",
 67 |     "                dir_contents.append(filename)\n",
 68 |     "            \n",
 69 |     "            \n",
 70 |     "            shuffle(dir_contents)\n",
 71 |     "            threshold = int (percent * len(dir_contents))\n",
 72 |     "            mixed = dir_contents[:threshold]\n",
 73 |     "            for filename in mixed:\n",
 74 |     "                moved +=1\n",
 75 |     "                #print ('filename:',os.path.basename(filename))\n",
 76 |     "                src = os.path.join(srcpath,'{}'.format(image_class_num),filename)\n",
 77 |     "                #print(directory_name,filename)\n",
 78 |     "                dst = os.path.join(directory_name,os.path.basename(filename))\n",
 79 |     "                #print('src:{0}\\ndst:{1}'.format(src,dst))\n",
 80 |     "                move_link(src,dst)\n",
 81 |     "    print('total:',found,'moved:',moved,'remains:',found-moved)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "train = '/local_data/dgrossman/keras/color/train'\n",
 91 |     "test = '/local_data/dgrossman/keras/color/test'\n",
 92 |     "validate = '/local_data/dgrossman/keras/color/validate'"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "prep_datasets(train,test,0.3)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "prep_datasets(test,validate,0.3)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": []
119 |   }
120 |  ],
121 |  "metadata": {
122 |   "kernelspec": {
123 |    "display_name": "Python 3",
124 |    "language": "python",
125 |    "name": "python3"
126 |   },
127 |   "language_info": {
128 |    "codemirror_mode": {
129 |     "name": "ipython",
130 |     "version": 3
131 |    },
132 |    "file_extension": ".py",
133 |    "mimetype": "text/x-python",
134 |    "name": "python",
135 |    "nbconvert_exporter": "python",
136 |    "pygments_lexer": "ipython3",
137 |    "version": "3.5.2"
138 |   }
139 |  },
140 |  "nbformat": 4,
141 |  "nbformat_minor": 2
142 | }
143 | 


--------------------------------------------------------------------------------
/pelops/analysis/test_analysis.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pelops.analysis import analysis
  3 | 
  4 | 
  5 | class experimentGen():
  6 |     def __init__(self):
  7 |         self.fd = featureData()
  8 |         self.experiment = list()
  9 |         c1 = ['a', 'b', 'c', 'd']
 10 |         c2 = ['a', 'e', 'f', 'g']
 11 |         cam1 = list()
 12 |         cam2 = list()
 13 | 
 14 |         for c in c1:
 15 |             cam1.append(self.fd.getchip(c))
 16 | 
 17 |         for c in c2:
 18 |             cam2.append(self.fd.getchip(c))
 19 | 
 20 |         self.experiment.append(cam1)
 21 |         self.experiment.append(cam2)
 22 | 
 23 |     def generate(self):
 24 |         return self.experiment
 25 | 
 26 | 
 27 | class chip():
 28 |     def __init__(self, x):
 29 |         self.car_id = x[0]
 30 |         self.feature = x[1]
 31 | 
 32 | 
 33 | class featureData():
 34 |     def __init__(self):
 35 |         self.data = list()
 36 | 
 37 |         fun = [('a', [1, 2, 3, 4, 5, 6, 7]),
 38 |                ('b', [10, 20, 30, 40, 11, 9, 2.7]),
 39 |                ('c', [100, 20, 30, 40, 11, 9, 2.7]),
 40 |                ('d', [10, 200, 30, 40, 11, 9, 2.7]),
 41 |                ('e', [10, 20, 300, 40, 11, 9, 2.7]),
 42 |                ('f', [10, 20, 30, 400, 11, 9, 2.7]),
 43 |                ('g', [10, 20, 30, 40, 110, 9, 2.7]),
 44 |                ('h', [10, 20, 30, 40, 11, 90, 2.7]),
 45 |                ('i', [10, 20, 30, 40, 11, 9, 27.0])]
 46 |         for f in fun:
 47 |             self.data.append(chip(f))
 48 | 
 49 |     def get_feats_for_chip(self, chip):
 50 |         for d in self.data:
 51 |             if d.car_id == chip.car_id:
 52 |                 return d.feature
 53 | 
 54 |     def getchip(self, id):
 55 |         for d in self.data:
 56 |             if d.car_id == id:
 57 |                 return d
 58 | 
 59 | # test the comparisons
 60 | 
 61 | 
 62 | def test_cosine():
 63 |     a = [1, 2, 3, 4, 5, 6, 7]
 64 |     b = [10, 20, 30, 40, 11, 9, 2.7]
 65 |     out = analysis.comp_cosine(a, b)
 66 |     assert(abs(out - 0.63837193721375185) < 0.0000001)
 67 | 
 68 | 
 69 | def test_euclidean():
 70 |     a = [1, 2, 3, 4, 5, 6, 7]
 71 |     b = [10, 20, 30, 40, 11, 9, 2.7]
 72 |     out = analysis.comp_euclid(a, b)
 73 |     assert(abs(out - 49.93485756463114) < 0.0000001)
 74 | 
 75 | # test the matching works correctly
 76 | 
 77 | 
 78 | def test_is_correct_match():
 79 |     fd = featureData()
 80 | 
 81 |     c1 = ['a', 'b', 'c', 'd']
 82 |     c2 = ['a', 'e', 'f', 'g']
 83 |     cam1 = list()
 84 |     cam2 = list()
 85 | 
 86 |     for c in c1:
 87 |         cam1.append(fd.getchip(c))
 88 | 
 89 |     for c in c2:
 90 |         cam2.append(fd.getchip(c))
 91 | 
 92 |     out = analysis.is_correct_match(fd, cam1, cam2)
 93 |     assert (out == 0)
 94 | 
 95 | 
 96 | def test_pre_cmc():
 97 |     eg = experimentGen()
 98 |     fd = featureData()
 99 |     keys, values = analysis.pre_cmc(fd, eg, EXPPERCMC=10)
100 |     assert values[0] == 1.0
101 | 
102 | 
103 | #test the statistics are being generated correctly
104 | def test_make_cmc_stats():
105 |     eg = experimentGen()
106 |     fd = featureData()
107 |     experimentHolder = analysis.repeat_pre_cmc(fd, eg, NUMCMC=10, EXPPERCMC=10)
108 |     stats, gdata = analysis.make_cmc_stats(experimentHolder, 4)
109 | 
110 |     for x in range(len(gdata[0])):
111 |         assert ( gdata[1][x] ==gdata[2][x] == gdata[0][x])
112 | 


--------------------------------------------------------------------------------
/pelops/analysis/unsorted/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/unsorted/__init__.py


--------------------------------------------------------------------------------
/pelops/analysis/unsorted/makeH5pyFile.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "cd '/local_data/dgrossman/VeRi/'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import h5py\n",
 19 |     "import json\n",
 20 |     "import numpy as np\n",
 21 |     "\n",
 22 |     "def makeJsonList(fileName):\n",
 23 |     "    retval = list()\n",
 24 |     "    with open(fileName,'r') as f:\n",
 25 |     "        for line in f:\n",
 26 |     "            line = line.strip()\n",
 27 |     "            line = json.loads(line)\n",
 28 |     "            retval.append(line)\n",
 29 |     "    return retval\n",
 30 |     "\n",
 31 |     "def extractColumn(colName,jsonList,t):\n",
 32 |     "    retval = list()\n",
 33 |     "    for line in jsonList:\n",
 34 |     "        if t == str:\n",
 35 |     "            retval.append(str(line[colName]).encode('ascii','ignore'))\n",
 36 |     "        if t == int:\n",
 37 |     "            retval.append(int(line[colName]))\n",
 38 |     "        if t == float:\n",
 39 |     "            for element in line[colName]:\n",
 40 |     "                retval.append(float(element))\n",
 41 |     "    return retval\n",
 42 |     "\n",
 43 |     "def make5file(file5Name, names, jsonList):\n",
 44 |     "    with h5py.File(file5Name,'w') as f:  \n",
 45 |     "        for o, i, t, t2 in names:\n",
 46 |     "            print(o,i)\n",
 47 |     "            temp = extractColumn(o,jsonList,t)\n",
 48 |     "            f.create_dataset(i,data=temp,dtype=t2)\n",
 49 |     "\n",
 50 |     "def main(inFileName,outFileName):\n",
 51 |     "    jsonList = makeJsonList(inFileName)\n",
 52 |     "    f = np.dtype('float')\n",
 53 |     "    c = h5py.special_dtype(vlen=bytes)\n",
 54 |     "    names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]\n",
 55 |     "    make5file(outFileNAme,names,jsonList)\n",
 56 |     "    \n",
 57 |     "if __name__ == '__main__':\n",
 58 |     "    main(sys.argv[1],sys.argv[2])"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "main('./test_uniqfile.json','./test_uniqfile.p5')"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "jsonList[0].keys()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "f = np.dtype('float')\n",
 86 |     "c = h5py.special_dtype(vlen=bytes)\n",
 87 |     "names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "make5file('1test_features',names,jsonList)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": []
105 |   }
106 |  ],
107 |  "metadata": {
108 |   "anaconda-cloud": {},
109 |   "kernelspec": {
110 |    "display_name": "Python 3",
111 |    "language": "python",
112 |    "name": "python3"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 3
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython3",
124 |    "version": "3.5.2"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 1
129 | }
130 | 


--------------------------------------------------------------------------------
/pelops/analysis/unsorted/recompute/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/unsorted/recompute/__init__.py


--------------------------------------------------------------------------------
/pelops/analysis/unsorted/recompute/extract_feats_from_chips.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.applications.resnet50 import preprocess_input
 3 | from keras.applications.resnet50 import ResNet50
 4 | from keras.models import Model
 5 | from keras.models import model_from_json
 6 | from keras.preprocessing import image
 7 | 
 8 | from pelops.datasets.featuredataset import FeatureDataset
 9 | 
10 | 
11 | def load_image(img_path, resizex=224, resizey=224):
12 |     data = image.load_img(img_path, target_size=(resizex, resizey))
13 |     x = image.img_to_array(data)
14 |     x = np.expand_dims(x, axis=0)
15 |     x = preprocess_input(x)
16 |     return x
17 | 
18 | 
19 | def save_model_workaround(model, model_file, weight_file):
20 |     # serialize model to JSON
21 |     model_json = model.to_json()
22 |     with open(model_file, 'w') as json_file:
23 |         json_file.write(model_json)
24 |     # serialize weights to HDF5
25 |     model.save_weights(weight_file)
26 | 
27 | 
28 | def load_model_workaround(model_file, weight_file):
29 |     # load json and create model
30 |     json_file = open(model_file, 'r')
31 |     loaded_model_json = json_file.read()
32 |     json_file.close()
33 |     loaded_model = model_from_json(loaded_model_json)
34 |     # load weights into new model
35 |     loaded_model.load_weights(weight_file)
36 |     return loaded_model
37 | 
38 | # load the imagenet networks
39 | 
40 | 
41 | def get_models(model_file, weight_file, layer):
42 |     # include_top needs to be True for this to work
43 |     base_model = load_model_workaround(model_file, weight_file)
44 |     output_layer = base_model.get_layer(layer)
45 |     output_layer = output_layer.output
46 |     model = Model(input=base_model.input, output=output_layer)
47 |     # output=base_model.get_layer('flatten_1').output)
48 |     return (model, base_model)
49 | 
50 | # return feature vector for a given img, and model
51 | 
52 | 
53 | def image_features(img, model):
54 |     features = model.predict(img)
55 |     return features
56 | 
57 | 
58 | def extract_feats_from_chips(chipdataset, output_fname, model_file, weight_file, layer):
59 |     model, base_model = get_models(model_file, weight_file, layer)
60 | 
61 |     features = np.zeros((len(chipdataset), 2048), dtype=np.float16)
62 |     chips = []
63 |     chip_keys = []
64 |     for index, (chip_key, chip) in enumerate(chipdataset.chips.items()):
65 |         chip_keys.append(chip_key)
66 |         chips.append(chip)
67 |         img_path = chip.filepath
68 |         img_data = load_image(img_path)
69 |         features[index] = image_features(img_data, model)
70 | 
71 |     FeatureDataset.save(output_fname, chip_keys, chips, features)
72 |     return True
73 | 


--------------------------------------------------------------------------------
/pelops/const.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | class _Const:
 4 |     """ Create a constant class
 5 |     """
 6 |     class ConstError(TypeError): pass
 7 | 
 8 |     def __setattr__(self, name, value):
 9 |         if name in self.__dict__:
10 |             raise self.ConstError("Cannot rebind constant {}".format(name))
11 |         self. __dict__[name] = value
12 | 
13 |     def __delattr__(self, name):
14 |         if name in self.__dict__:
15 |             raise self.ConstError("Cannot unbind constant {}".format(name))
16 |         raise NameError(name)
17 | 
18 | sys.modules[__name__] = _Const()


--------------------------------------------------------------------------------
/pelops/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/datasets/__init__.py


--------------------------------------------------------------------------------
/pelops/datasets/chip.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import collections
  3 | import os
  4 | import pelops.utils as utils
  5 | 
  6 | # ================================================================================
  7 | #  Chip Factory
  8 | # ================================================================================
  9 | 
 10 | 
 11 | class DatasetFactory(object):
 12 |     @staticmethod
 13 |     def create_dataset(dataset_type, dataset_path, set_type=None):
 14 |         for cls in ChipDataset.__subclasses__():
 15 |             if cls.check_dataset_type(dataset_type):
 16 |                 return cls(dataset_path, set_type)
 17 | 
 18 | # ================================================================================
 19 | #  Chip Dataset
 20 | # ================================================================================
 21 | 
 22 | 
 23 | class ChipDataset(metaclass = abc.ABCMeta):
 24 |     def __init__(self, dataset_path, set_type=None):
 25 |         self.dataset_path = dataset_path
 26 |         self.__set_set_type(set_type)
 27 |         self.chips = dict()
 28 |         self.chips_by_cam_id = None
 29 |         self.chips_by_car_id = None
 30 | 
 31 |     def __set_set_type(self, set_type):
 32 |         self.set_type = None
 33 | 
 34 |         # The Default ALL
 35 |         if set_type is None:
 36 |             self.set_type = utils.SetType.ALL
 37 | 
 38 |         # If passed a SetType
 39 |         if isinstance(set_type, utils.SetType):
 40 |             self.set_type = set_type
 41 | 
 42 |         # If passed a string
 43 |         if isinstance(set_type, str):
 44 |             set_type = set_type.lower()
 45 |             for st in utils.SetType:
 46 |                 if set_type == st.value:
 47 |                     self.set_type = st
 48 | 
 49 |         if self.set_type is None:
 50 |             raise ValueError("set_type is not a valid string or SetType enum")
 51 | 
 52 | 
 53 |     @classmethod
 54 |     def check_dataset_type(self, dataset_type):
 55 |         return dataset_type == self.__name__
 56 | 
 57 |     def get_all_chips_by_car_id(self, car_id):
 58 |         if self.chips_by_car_id is None:
 59 |             self.chips_by_car_id = collections.defaultdict(list)
 60 |             for chip_key, chip in self.chips.items():
 61 |                 self.chips_by_car_id[chip.car_id].append(chip_key)
 62 |         return [self.chips[chip_key] for chip_key in self.chips_by_car_id[car_id]]
 63 | 
 64 |     def get_all_chips_by_car_id_camera_id(self, car_id, cam_id):
 65 |         output = []
 66 |         for chip in self.get_all_chips_by_car_id(car_id):
 67 |             if chip.cam_id == cam_id:
 68 |                 output.append(chip)
 69 |         return output
 70 | 
 71 |     def get_all_chips_by_cam_id(self, cam_id):
 72 |         if self.chips_by_cam_id is None:
 73 |             self.chips_by_cam_id = collections.defaultdict(list)
 74 |             for chip_key, chip in self.chips.items():
 75 |                 self.chips_by_cam_id[chip.cam_id].append(chip_key)
 76 | 
 77 |         return [self.chips[chip_key] for chip_key in self.chips_by_cam_id[cam_id]]
 78 | 
 79 |     def get_distinct_cams_by_car_id(self, car_id):
 80 |         # TODO: Look at performance
 81 |         return self.get_distinct_cams_per_car()[car_id]
 82 | 
 83 |     def get_distinct_cams_per_car(self):
 84 |         # TODO: Look at performance
 85 |         list_of_cameras_per_car = collections.defaultdict(set)
 86 |         for chip in self.chips.values():
 87 |             list_of_cameras_per_car[chip.car_id].add(chip.cam_id)
 88 |         return list_of_cameras_per_car
 89 | 
 90 |     def get_all_cam_ids(self):
 91 |         return list(set(chip.cam_id for chip in self.chips.values()))
 92 | 
 93 |     def get_all_car_ids(self):
 94 |         return list(set(chip.car_id for chip in self.chips.values()))
 95 | 
 96 |     def __iter__(self):
 97 |         for chip in self.chips.values():
 98 |             yield chip
 99 |         raise StopIteration()
100 | 
101 |     def __len__(self):
102 |         return len(self.chips)
103 | 
104 | # ================================================================================
105 | #  Chip Base
106 | # ================================================================================
107 | 
108 | 
109 | # chip_id is the filepath
110 | Chip = collections.namedtuple("Chip",
111 |     ["filepath",
112 |      "car_id",
113 |      "cam_id",
114 |      "time",
115 |      "misc"])
116 | 


--------------------------------------------------------------------------------
/pelops/datasets/compcar.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import os
  3 | import scipy.io
  4 | 
  5 | import pelops.datasets.chip as chip
  6 | import pelops.utils as utils
  7 | 
  8 | 
  9 | class CompcarDataset(chip.ChipDataset):
 10 |     filenames = collections.namedtuple(
 11 |         "filenames",
 12 |         [
 13 |             "image_dir",
 14 |             "name_train",
 15 |             "name_test",
 16 |             "model_mat",
 17 |             "color_mat",
 18 |         ]
 19 |     )
 20 |     filepaths = filenames (
 21 |         "image",
 22 |         "train_surveillance.txt",
 23 |         "test_surveillance.txt",
 24 |         "sv_make_model_name.mat",
 25 |         "color_list.mat",
 26 |     )
 27 | 
 28 |     def __init__(self, dataset_path, set_type=None):
 29 |         super().__init__(dataset_path, set_type)
 30 |         self.__set_filepaths()         # set self.__filepaths
 31 |         self.__extract_color_labels()  # set self.__color_map
 32 |         self.__extract_model_labels()  # set self.__model_map
 33 |         self.__set_chips()
 34 | 
 35 |     def __set_filepaths(self):
 36 |         self.__filepaths = self.filenames(
 37 |             os.path.join(self.dataset_path, CompcarDataset.filepaths.image_dir),
 38 |             os.path.join(self.dataset_path, CompcarDataset.filepaths.name_train),
 39 |             os.path.join(self.dataset_path, CompcarDataset.filepaths.name_test),
 40 |             os.path.join(self.dataset_path, CompcarDataset.filepaths.model_mat),
 41 |             os.path.join(self.dataset_path, CompcarDataset.filepaths.color_mat),
 42 |         )
 43 | 
 44 |     def __extract_color_labels(self):
 45 |         self.__color_map = {}
 46 | 
 47 |         # Map color_id to its respective name
 48 |         color_map = {
 49 |             -1: None,
 50 |             0: "black",
 51 |             1: "white",
 52 |             2: "red",
 53 |             3: "yellow",
 54 |             4: "blue",
 55 |             5: "green",
 56 |             6: "purple",
 57 |             7: "brown",
 58 |             8: "champagne",
 59 |             9: "silver",
 60 |         }
 61 | 
 62 |         # Load the matrix of colors
 63 |         color_matrix = scipy.io.loadmat(
 64 |             self.__filepaths.color_mat)["color_list"]
 65 | 
 66 |         # File is an length 1 array, color_num is a 1x1 matrix
 67 |         for file_array, color_num_matrix in color_matrix:
 68 |             filepath = file_array[0]
 69 |             color_num = int(color_num_matrix[0][0])
 70 |             self.__color_map[filepath] = color_map[color_num]
 71 | 
 72 |     def __extract_model_labels(self):
 73 |         self.__model_map = {}
 74 | 
 75 |         model_matrix = scipy.io.loadmat(
 76 |             self.__filepaths.model_mat)["sv_make_model_name"]
 77 |         for car_id, model_matrix in enumerate(model_matrix):
 78 |             # correct car_id
 79 |             car_id = int(car_id) + 1
 80 |             # make contains only the make of the car and occasionally contains whitespaces after
 81 |             make = model_matrix[0][0].strip()
 82 |             # correct instance when make is misspelled that affects the model
 83 |             if make == "Zoyte":
 84 |                 make = "Zotye"
 85 |             # model sometimes contains both make and model, so ensure that model only contains model
 86 |             make_and_model = model_matrix[1][0]
 87 |             model = make_and_model.replace(make, "").strip()
 88 |             # model_id contains the model id used in the web
 89 |             model_id = int(model_matrix[2][0][0])
 90 |             # correct instance when make is misspelled
 91 |             if make == "BWM":
 92 |                 make = "BMW"
 93 |             self.__model_map[car_id] = [make, model, model_id]
 94 | 
 95 |     def __set_chips(self):
 96 |         # identify all the chips, default query to all
 97 |         all_names_filepaths = {
 98 |             utils.SetType.ALL: [self.__filepaths.name_test, self.__filepaths.name_train],
 99 |             utils.SetType.TEST: [self.__filepaths.name_test],
100 |             utils.SetType.TRAIN: [self.__filepaths.name_train],
101 |         }.get(self.set_type, [self.__filepaths.name_test, self.__filepaths.name_train])
102 |         # create chip objects based on the names listed in the files
103 |         for name_filepath in all_names_filepaths:
104 |             for name in open(name_filepath):
105 |                 current_chip = self.__create_chip(self.__filepaths.image_dir, name.strip())
106 |                 self.chips[current_chip.filepath] = current_chip
107 | 
108 |     def __create_chip(self, img_dir, img_name):
109 |         splitter = img_name.split("/")
110 |         misc = dict()
111 | 
112 |         filepath = os.path.join(img_dir, img_name)
113 |         car_id = int(splitter[0])
114 |         cam_id = None
115 |         time = None
116 |         misc["color"] = self.__color_map[img_name]
117 |         make, model, model_id = self.__model_map[car_id]
118 |         misc["make"] = make
119 |         misc["model"] = model
120 |         misc["model_id"] = model_id
121 | 
122 |         return chip.Chip(filepath, car_id, cam_id, time, misc)
123 | 


--------------------------------------------------------------------------------
/pelops/datasets/dgcars.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import json
 3 | import os.path
 4 | 
 5 | import pelops.datasets.chip as chip
 6 | import pelops.utils as utils
 7 | 
 8 | 
 9 | class DGCarsDataset(chip.ChipDataset):
10 |     filenames = collections.namedtuple(
11 |         "filenames",
12 |         [
13 |             "all_list",
14 |             "train_list",
15 |             "test_list",
16 |         ]
17 |     )
18 |     filepaths = filenames(
19 |         "allFiles",
20 |         "training",
21 |         "testing",
22 |     )
23 | 
24 |     def __init__(self, dataset_path, set_type=None):
25 |         super().__init__(dataset_path, set_type)
26 |         self.__set_filepaths()         # set self.__filepaths
27 |         self.__set_chips()
28 | 
29 |     def __set_filepaths(self):
30 |         self.__filepaths = self.filenames(
31 |             os.path.join(self.dataset_path, DGCarsDataset.filepaths.all_list),
32 |             os.path.join(self.dataset_path, DGCarsDataset.filepaths.train_list),
33 |             os.path.join(self.dataset_path, DGCarsDataset.filepaths.test_list),
34 |         )
35 | 
36 |     def __set_chips(self):
37 |         # identify all the chips, default query to all
38 |         name_filepath = {
39 |             utils.SetType.ALL: self.__filepaths.all_list,
40 |             utils.SetType.TEST: self.__filepaths.test_list,
41 |             utils.SetType.TRAIN: self.__filepaths.train_list,
42 |         }.get(self.set_type, self.__filepaths.all_list)
43 | 
44 |         # create chip objects based on the names listed in the files
45 |         for dg_chip in utils.read_json(name_filepath):
46 |             filepath = os.path.normpath(os.path.join(self.dataset_path, dg_chip["filename"]))
47 |             car_id = None
48 |             cam_id = None
49 |             time = None
50 |             misc = dg_chip
51 |             current_chip = chip.Chip(filepath, car_id, cam_id, time, misc)
52 | 
53 |             self.chips[filepath] = current_chip
54 | 


--------------------------------------------------------------------------------
/pelops/datasets/featuredataset.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import datetime
 3 | import h5py
 4 | import numpy as np
 5 | from pelops.datasets.chip import ChipDataset, Chip
 6 | 
 7 | class FeatureDataset(ChipDataset):
 8 |     def __init__(self, filename):
 9 |         super().__init__(filename)
10 |         self.chip_index_lookup, self.chips, self.feats = self.load(filename)
11 |         self.filename_lookup = {}
12 |         for chip_key, chip in self.chips.items():
13 |             self.filename_lookup[chip.filepath] = chip_key
14 |     
15 |     def get_feats_for_chip(self, chip):
16 |         chip_key = self.filename_lookup[chip.filepath]
17 |         return self.feats[self.chip_index_lookup[chip_key]]
18 |     
19 |     @staticmethod
20 |     def load(filename):
21 |         with h5py.File(filename) as fIn:
22 |             feats = np.array(fIn['feats'])
23 |             
24 |             num_items = fIn['feats'].shape[0]
25 |             # Hack to deal with performance of extracting single items
26 |             local_hdf5 = {}
27 |             local_hdf5['chip_keys'] = np.array(fIn['chip_keys'])
28 |             local_hdf5['filepath'] = np.array(fIn['filepath'])
29 |             local_hdf5['car_id'] = np.array(fIn['car_id'])
30 |             local_hdf5['cam_id'] = np.array(fIn['cam_id'])
31 |             local_hdf5['time'] = np.array(fIn['time'])
32 |             local_hdf5['misc'] = np.array(fIn['misc'])
33 |             
34 |             chips = {}
35 |             chip_index_lookup = {}
36 |             for i in range(num_items):
37 |                 filepath = local_hdf5['filepath'][i].decode('utf-8')
38 |                 car_id = local_hdf5['car_id'][i]
39 |                 cam_id = local_hdf5['cam_id'][i]
40 |                 timestamp = local_hdf5['time'][i]
41 |                 if isinstance(timestamp, str) or isinstance(timestamp, bytes):
42 |                     # Catch the case where we have encoded time as a string timestamp
43 |                     timestamp = datetime.datetime.fromtimestamp(float(timestamp))
44 |                 misc = json.loads(local_hdf5['misc'][i].decode('utf-8'))
45 |                 chip_key = local_hdf5['chip_keys'][i]
46 |                 if isinstance(chip_key, bytes):
47 |                     chip_key = chip_key.decode('utf-8')
48 |                 chip_index_lookup[chip_key] = i
49 |                 chips[chip_key] = Chip(filepath, car_id, cam_id, timestamp, misc)
50 |             return chip_index_lookup, chips, feats
51 | 
52 |     @staticmethod
53 |     def _save_field(fOut, field_example, field_name, value_array):
54 |         if isinstance(field_example, datetime.datetime):
55 |             # Encode time as a string seconds since epoch
56 |             times = np.array([str(val.timestamp()).encode('ascii', 'ignore') for val in value_array])
57 |             fOut.create_dataset(field_name,
58 |                                 data=times,
59 |                                 dtype=h5py.special_dtype(vlen=bytes))
60 |         elif isinstance(field_example, str):
61 |             output_vals = [val.encode('ascii', 'ignore') for val in value_array]
62 |             fOut.create_dataset(field_name,
63 |                                 data= output_vals,
64 |                                 dtype=h5py.special_dtype(vlen=bytes))
65 |         elif isinstance(field_example, dict):
66 |             output_vals = [json.dumps(val).encode('ascii', 'ignore') for val in value_array]
67 |             fOut.create_dataset(field_name,
68 |                                 data=output_vals,
69 |                                 dtype=h5py.special_dtype(vlen=bytes))
70 |         else:
71 |             fOut.create_dataset(field_name, data=value_array)
72 |     
73 |     @staticmethod
74 |     def save(filename, chip_keys, chips, features):
75 |         """ Save a feature dataset
76 |         """
77 |         with h5py.File(filename, 'w') as fOut:
78 |             fOut.create_dataset('feats', data=features)
79 | 
80 |             FeatureDataset._save_field(fOut,
81 |                                        chip_keys[0],
82 |                                        'chip_keys',
83 |                                        chip_keys)
84 | 
85 |             first_chip = chips[0]
86 |             fields = first_chip._fields
87 |             for field in fields:
88 |                 field_example = getattr(first_chip, field)
89 |                 output_data = [getattr(chip, field) for chip in chips]
90 |                 FeatureDataset._save_field(fOut, field_example, field, output_data)
91 | 


--------------------------------------------------------------------------------
/pelops/datasets/slice.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import io
  3 | import itertools
  4 | import os
  5 | import re
  6 | import sys
  7 | from datetime import datetime
  8 | 
  9 | import pelops.datasets.chip as chip
 10 | 
 11 | # ================================================================================
 12 | #  SLiCE Test Dataset (labeled by STR)
 13 | # ================================================================================
 14 | 
 15 | 
 16 | class SliceDataset(chip.ChipDataset):
 17 | 
 18 |     def __init__(self, dataset_path, set_type=None, debug=False):
 19 |         super().__init__(dataset_path, set_type)
 20 |         self.__noise_seq = 0
 21 |         self.__debug = debug
 22 |         self.__set_chips()
 23 | 
 24 |     @staticmethod
 25 |     def __decode_truth_file(truth_file):
 26 |         """The labels for the STR processed SLiCE chips are in a 'truth.txt' file which this function parses."""
 27 | 
 28 |         with open(truth_file) as truth_hdl:
 29 |             truth_text = truth_hdl.read()
 30 |             for char in [' ', '%']:
 31 |                 truth_text = truth_text.replace(char, '')
 32 |             truth_fobj = io.StringIO(truth_text)
 33 |             return {(int(dct['obSetIdx']), int(dct['chipIdx'])): int(dct['targetID'])
 34 |                     for dct in csv.DictReader(truth_fobj)}
 35 | 
 36 |     @staticmethod
 37 |     def index_chip(file_path):
 38 |         """Parses an arbitrary file path and identifies paths of valid image chips.
 39 |         Returns None for non-chip file paths."""
 40 | 
 41 |         # We have to handle two cases:
 42 |         #
 43 |         # 1) The STR San Antonio DOT chips, which have the form:
 44 |         #     ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png
 45 |         #
 46 |         # 2) The SLICE chips, which have the form:
 47 |         #     ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg
 48 |         #
 49 |         # The epoch on the SLICE chips is per chip, whereas it is per
 50 |         # observation set for the STR chips. The SLICE chip file names have the
 51 |         # follow information after the ObSet and chip id:
 52 |         #
 53 |         # Obset-ChipID-label-time_unused
 54 | 
 55 |         # Split the file path into pieces to extract the information from it
 56 |         file_path = os.path.normpath(file_path)
 57 |         directory, img_type, file = file_path.split(os.sep)[-3:]
 58 | 
 59 |         # Sometimes we have masks, reject those
 60 |         if img_type != "images":
 61 |             return
 62 | 
 63 |         # Sometimes we get the truth.txt file, which we do not want
 64 |         if file == "truth.txt":
 65 |             return
 66 | 
 67 |         # Get the observation set, time, and name from the directory
 68 |         obset_str, epoch_str, *name = directory.split("_")
 69 |         name = "_".join(name)
 70 | 
 71 |         # We slice off the first part of the string that is non-numeric, where
 72 |         # 5 = len("ObSet")
 73 |         obset_int = int(obset_str[5:])
 74 | 
 75 |         # Get the chip ID, and perhaps more, from the name of the file
 76 |         _, chip_id_str, *misc = file.split("-")
 77 | 
 78 |         # SLICE chips have more information
 79 |         if misc:
 80 |             chip_id_int = int(chip_id_str)
 81 |             _, time = misc
 82 |             # Remove file extension
 83 |             time, _ = os.path.splitext(time)
 84 |             # Remove _1 at end of each time and convert to microseconds
 85 |             time = time[:-2] + "000"
 86 |             # Get milliseconds since the unix epoch
 87 |             epoch = datetime.utcfromtimestamp(0)
 88 |             dt = datetime.strptime(time, "%Y%m%d_%H%M%S.%f")
 89 |             epoch_str = str(int((dt - epoch).total_seconds()))
 90 |         else:
 91 |             chip_id, _ = os.path.splitext(chip_id_str)
 92 |             chip_id_int = int(chip_id)
 93 | 
 94 |         idx_key = (obset_int, chip_id_int)
 95 |         idx_val = {
 96 |             'file': file_path,
 97 |             'meta': {
 98 |                 'obSetName': name,
 99 |                 'epoch': epoch_str,
100 |             },
101 |         }
102 |         return idx_key, idx_val
103 | 
104 |     def __create_chip(self, file_info, truth_value):
105 |         """Converts parsing / indexing results into a pelops.datasets.chip.Chip object"""
106 |         if truth_value == 0:
107 |             self.__noise_seq += 1
108 |             car_id = 'unk-{:09d}'.format(self.__noise_seq)
109 |         else:
110 |             car_id = 'tgt-{:09d}'.format(truth_value)
111 | 
112 |         chip_params = [
113 |             file_info['file'],
114 |             car_id,
115 |             file_info['meta']['obSetName'],
116 |             file_info['meta']['epoch'],
117 |             file_info['meta']
118 |         ]
119 |         return chip.Chip(*chip_params)
120 | 
121 |     def __set_chips(self):
122 |         """Sets the chips dict of the superclass to contain chip files for the dataset."""
123 | 
124 |         # Scan filesystem
125 |         root_files = [root_file for root_file in os.walk(self.dataset_path)]
126 | 
127 |         # Decode truth.txt file
128 |         truth_files = [os.path.join(walked[0], 'truth.txt') for walked in root_files if 'truth.txt' in walked[2]]
129 |         if len(truth_files) == 0:
130 |             raise IOError("No truth file found.")
131 |         elif len(truth_files) > 1:
132 |             raise IOError("Too many truth files available.")
133 | 
134 |         truth_data = self.__decode_truth_file(truth_files.pop())
135 |         if len(truth_data) < 1:
136 |             raise IOError("No truth loaded")
137 |         if self.__debug:
138 |             print("{} truth records loaded.".format(len(truth_data)))
139 | 
140 |         # Index all image chips
141 |         file_paths = [[os.path.join(walked[0], wfile) for wfile in walked[2]] for walked in root_files]
142 |         chip_idx = dict(filter(lambda t: t is not None, map(self.index_chip, itertools.chain(*file_paths))))
143 | 
144 |         if len(chip_idx) != len(truth_data):
145 |             raise IOError("Number of truth records not equal to number of chips.")
146 |         if self.__debug:
147 |             print("{} image chips loaded.".format(len(chip_idx)))
148 | 
149 |         # Create and store chips
150 |         self.chips = {meta['file']: self.__create_chip(meta, truth_data[idx]) for idx, meta in chip_idx.items()}
151 |         if self.__debug:
152 |             print("{} chip.Chips loaded.".format(len(self.chips)))
153 | 


--------------------------------------------------------------------------------
/pelops/datasets/str.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import collections
 3 | import os
 4 | 
 5 | import pelops.datasets.chip as chip
 6 | import pelops.utils as utils
 7 | 
 8 | # ================================================================================
 9 | #  STR_SA Dataset
10 | # ================================================================================
11 | 
12 | 
13 | class StrDataset(chip.ChipDataset):
14 |     # define paths to files and directories
15 |     filenames = collections.namedtuple(
16 |         "filenames",
17 |         [
18 |             "dir_all"
19 |         ]
20 |     )
21 |     filepaths = filenames (
22 |         "crossCameraMatches"
23 |     )
24 | 
25 |     def __init__(self, dataset_path, set_type=None):
26 |         super().__init__(dataset_path, set_type)
27 |         self.__set_filepaths()  # set self.__filepaths
28 |         self.__set_chips()
29 |         # STR does not differentiate the set type
30 | 
31 |     def __set_filepaths(self):
32 |         self.__filepaths = StrDataset.filenames(
33 |             os.path.join(self.dataset_path, StrDataset.filepaths.dir_all)
34 |         )
35 | 
36 |     def __set_chips(self):
37 |         directory = self.__filepaths.dir_all
38 |         for file in os.listdir(directory):
39 |             path = os.path.join(directory, file)
40 | 
41 |             # Only interested in certain files
42 |             is_valid = os.path.isfile(path)
43 |             is_png = path.endswith(".png")
44 |             is_mask = "mask" in path
45 |             if not is_valid or not is_png or is_mask:
46 |                 continue
47 | 
48 |             # Set all Chip variables
49 |             car_id = get_sa_car_id(path)
50 |             cam_id = get_sa_cam_id(path)
51 | 
52 |             time = None    # No timestamp information
53 |             misc = None    # No miscellaneous information
54 | 
55 |             # Make chip
56 |             current_chip = chip.Chip(
57 |                 path,
58 |                 car_id,
59 |                 cam_id,
60 |                 time,
61 |                 misc
62 |             )
63 | 
64 |             self.chips[path] = current_chip
65 | 
66 | 
67 | def int_from_string(string, start_chars, int_len):
68 |     # We only want to use the filename, not the directory names
69 |     base_string = os.path.basename(string)
70 |     loc = base_string.find(start_chars)
71 | 
72 |     # Not found
73 |     if loc < 0:
74 |         return None
75 | 
76 |     start = loc + len(start_chars)
77 |     end = start + int_len
78 |     str_num = base_string[start:end]
79 |     return int(str_num)
80 | 
81 | 
82 | def get_sa_cam_id(string):
83 |     return int_from_string(string, start_chars="_cam", int_len=2)
84 | 
85 | 
86 | def get_sa_car_id(string):
87 |     return int_from_string(string, start_chars="match", int_len=5)
88 | 


--------------------------------------------------------------------------------
/pelops/datasets/veri.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import datetime
  3 | import os
  4 | import xml.etree.ElementTree
  5 | 
  6 | import pelops.datasets.chip as chip
  7 | import pelops.utils as utils
  8 | 
  9 | # ================================================================================
 10 | #  Veri Dataset
 11 | # ================================================================================
 12 | 
 13 | 
 14 | class VeriDataset(chip.ChipDataset):
 15 |     filenames = collections.namedtuple(
 16 |         "filenames",
 17 |         [
 18 |             "name_query",
 19 |             "name_test",
 20 |             "name_train",
 21 |             "dir_query",
 22 |             "dir_test",
 23 |             "dir_train",
 24 |             "list_color",
 25 |             "list_type",
 26 |             "ground_truths",
 27 |             "junk_images",
 28 |             "label_train"
 29 |         ]
 30 |     )
 31 |     filepaths = filenames(
 32 |         "name_query.txt",
 33 |         "name_test.txt",
 34 |         "name_train.txt",
 35 |         "image_query",
 36 |         "image_test",
 37 |         "image_train",
 38 |         "list_color.txt",
 39 |         "list_type.txt",
 40 |         "gt_image.txt",
 41 |         "jk_image.txt",
 42 |         "train_label.xml"
 43 |     )
 44 | 
 45 |     def __init__(self, dataset_path, set_type=None):
 46 |         super().__init__(dataset_path, set_type)
 47 |         self.__set_filepaths()  # set self.__filepaths
 48 |         self.__color_type = {}
 49 |         if self.set_type is utils.SetType.ALL or self.set_type is utils.SetType.TRAIN:
 50 |             self.__build_metadata_dict()
 51 |         self.__set_chips()
 52 | 
 53 |     def __build_metadata_dict(self):
 54 |         """Extract car type and color from the label file."""
 55 |         try:
 56 |             root = xml.etree.ElementTree.parse(self.__filepaths.label_train).getroot()
 57 |         except ValueError as e:
 58 |             URL = "https://github.com/Lab41/pelops/issues/72"
 59 |             ERROR = (
 60 |                 str(e) + "\n\n"
 61 |                 "The label file 'train_label.xml' comes malformed from the\n"
 62 |                 "source. The first line needs to be changed to:\n"
 63 |                 "'<?xml version=\"1.0\" encoding=\"UTF-8\" ?>'\n"
 64 |                 "if it is not already.\n"
 65 |                 "See: " + URL
 66 |             )
 67 |             raise ValueError(ERROR)
 68 | 
 69 |         colors = {
 70 |             1: "yellow", 2: "orange", 3: "green", 4: "gray", 5: "red",
 71 |             6: "blue", 7: "white", 8: "golden", 9: "brown", 10: "black",
 72 |         }
 73 |         types = {
 74 |             0: "unknown", 1: "sedan", 2: "suv", 3: "van", 4: "hatchback",
 75 |             5: "mpv", 6: "pickup", 7: "bus", 8: "truck", 9: "estate",
 76 |         }
 77 | 
 78 |         # Version 1.0 of the VeRI data has a bug where several cars are labeled
 79 |         # as the illegal type 0:
 80 |         #
 81 |         # https://github.com/Lab41/pelops/issues/76
 82 |         #
 83 |         # These cars are actually SUVs (or, cross-overs) and hence should by
 84 |         # type 2.
 85 |         if root.attrib["Version"] == "1.0":
 86 |             types[0] = "suv"
 87 |             URL = "https://github.com/Lab41/pelops/issues/76"
 88 |             output = (
 89 |                 "VeRI Version 1.0 found! Patching `typeID=0` to `typeID=2`.\n"
 90 |                 "See: " + URL
 91 |             )
 92 |             print(output)
 93 | 
 94 |         self.__color_type = {}
 95 |         for child in root.iter("Item"):
 96 |             # Get the IDs from the XML node
 97 |             vehicle_id = child.attrib["vehicleID"]
 98 |             color = child.attrib["colorID"]
 99 |             body_type = child.attrib["typeID"]
100 | 
101 |             vehicle_id_int = int(vehicle_id)
102 |             color_id = int(color)
103 |             body_id = int(body_type)
104 |             str_color = colors[color_id]
105 |             str_body = types[body_id]
106 | 
107 |             self.__color_type[vehicle_id_int] = (str_color, str_body)
108 | 
109 |     def __set_filepaths(self):
110 |         self.__filepaths = VeriDataset.filenames(
111 |             os.path.join(self.dataset_path, VeriDataset.filepaths.name_query),
112 |             os.path.join(self.dataset_path, VeriDataset.filepaths.name_test),
113 |             os.path.join(self.dataset_path, VeriDataset.filepaths.name_train),
114 |             os.path.join(self.dataset_path, VeriDataset.filepaths.dir_query),
115 |             os.path.join(self.dataset_path, VeriDataset.filepaths.dir_test),
116 |             os.path.join(self.dataset_path, VeriDataset.filepaths.dir_train),
117 |             os.path.join(self.dataset_path, VeriDataset.filepaths.list_color),
118 |             os.path.join(self.dataset_path, VeriDataset.filepaths.list_type),
119 |             os.path.join(self.dataset_path, VeriDataset.filepaths.ground_truths),
120 |             os.path.join(self.dataset_path, VeriDataset.filepaths.junk_images),
121 |             os.path.join(self.dataset_path, VeriDataset.filepaths.label_train),
122 |         )
123 | 
124 |     def __set_chips(self):
125 |         # TODO: ignore images labeled as query, so we do not have to keep tabs for identical chips
126 |         # identify all the chips
127 |         all_names_filepaths = {
128 |             utils.SetType.ALL: [self.__filepaths.name_query, self.__filepaths.name_test, self.__filepaths.name_train],
129 |             utils.SetType.QUERY: [self.__filepaths.name_query],
130 |             utils.SetType.TEST: [self.__filepaths.name_test],
131 |             utils.SetType.TRAIN: [self.__filepaths.name_train],
132 |         }.get(self.set_type)
133 |         # create chip objects based on the names listed in the files
134 |         for name_filepath in all_names_filepaths:
135 |             if VeriDataset.filepaths.name_query in name_filepath:
136 |                 img_dir = self.__filepaths.dir_query
137 |             elif VeriDataset.filepaths.name_test in name_filepath:
138 |                 img_dir = self.__filepaths.dir_test
139 |             else:  # VeriDataset.filepaths.name_train in filepath
140 |                 img_dir = self.__filepaths.dir_train
141 |             for name in open(name_filepath):
142 |                 current_chip = self.__create_chip(img_dir, name.strip())
143 |                 self.chips[current_chip.filepath] = current_chip
144 | 
145 |     def __create_chip(self, img_dir, img_name):
146 |         # information about the chip resides in the chip's name
147 |         splitter = img_name.split("_")
148 |         misc = {}
149 | 
150 |         filepath = os.path.join(img_dir, img_name)
151 |         car_id = int(splitter[0])
152 |         cam_id = int(utils.get_numeric(splitter[1]))
153 |         time = datetime.datetime.fromtimestamp(int(splitter[2]))
154 |         misc["binary"] = int(os.path.splitext(splitter[3])[0])
155 | 
156 |         color, vehicle_type = self.__color_type.get(car_id, (None, None))
157 |         misc["color"] = color
158 |         misc["vehicle_type"] = vehicle_type
159 | 
160 |         return chip.Chip(filepath, car_id, cam_id, time, misc)
161 | 


--------------------------------------------------------------------------------
/pelops/etl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/etl/__init__.py


--------------------------------------------------------------------------------
/pelops/etl/computeMatrixCMC.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | from collections import defaultdict
 4 | 
 5 | from matplotlib import pyplot
 6 | 
 7 | 
 8 | def makeTransDicts(reindexFile):
 9 |     reindex = open(reindexFile, 'r')
10 |     file2num = dict()
11 |     num2file = dict()
12 |     index = 0
13 |     for line in reindex:
14 |         line = line.strip()
15 |         file2num[line] = index
16 |         num2file[index] = line
17 |         index += 1
18 |     return (file2num, num2file)
19 | 
20 | 
21 | def makeMatrix(matrixFilename, num2file, file2num, measure='cosine'):
22 | 
23 |     a = open(matrixFilename, 'r')
24 |     lines = 0
25 |     for line in a:
26 |         lines += 1
27 |     a.close()
28 | 
29 |     Matrix = [[0 for x in range(lines)] for y in range(lines)]
30 |     matrixFile = open(matrixFilename, 'r')
31 |     for line in matrixFile:
32 | 
33 |         line = line.strip()
34 |         line = json.loads(line)
35 |         x = file2num[line['x']]
36 |         y = file2num[line['y']]
37 |         Matrix[x][y] = line[measure]
38 |         Matrix[y][x] = line[measure]
39 | 
40 |     for index in range(0, lines):
41 |         Matrix[index][index] = 8675309
42 |     return Matrix
43 | 
44 | 
45 | def getrank(car, s, maxval=-1):
46 |     for sidx, work in enumerate(s):
47 |         # sval = work[0]
48 |         scar = work[1]
49 |         if scar == car:
50 |             return sidx
51 |     return maxval
52 | 
53 | 
54 | def preCMC(Matrix, num2file, downto=50):
55 |     retval = defaultdict(int)
56 |     start = time.time()
57 |     size = len(Matrix[0])
58 | 
59 |     for oindex in range(size):
60 |         if oindex % 1000 == 0:
61 |             print('index:{0} time:{1}'.format(oindex, time.time() - start))
62 |             start = time.time()
63 | 
64 |         car = num2file[oindex].split('_')[0]
65 | 
66 |         current = list()
67 | 
68 |         for idx, val in enumerate(Matrix[oindex]):
69 |             current.append((float(val), num2file[idx].split('_')[0]))
70 | 
71 |         s = sorted(current, key=lambda tup: tup[0])[:downto]
72 |         maxSearch = downto + 1
73 |         r = getrank(car, s, maxval=maxSearch)
74 |         retval[r] += 1
75 |     return retval
76 | 
77 | 
78 | def computeCMC(rawCounts, num):
79 |     idx = sorted(rawCounts)
80 |     sum = 0
81 |     CMC = list()
82 |     for index in range(0, len(idx)):
83 |         sum += rawCounts[index]
84 |         print (index, sum)
85 |         CMC.append(sum / float(num))
86 |     return CMC
87 | 
88 | 
89 | testFilesName = '/local_data/dgrossman/VeRi/test_uniqfiles'
90 | matrixFilename = '/local_data/dgrossman/VeRi/matrixFile.test_uniqfile'
91 | file2num, num2file = makeTransDicts(testFilesName)
92 | Matrix = makeMatrix(matrixFilename, num2file, file2num)
93 | rawCounts = preCMC(Matrix, num2file)
94 | CMC = computeCMC(rawCounts, len(Matrix[0]))
95 | 
96 | # pyplot.ylim(0,1)
97 | pyplot.plot(CMC[:-1])
98 | pyplot.show()
99 | 


--------------------------------------------------------------------------------
/pelops/etl/json2h5.py:
--------------------------------------------------------------------------------
  1 | '''transform the json files into h5py files
  2 | 
  3 | Input:
  4 |     one json encoded dict / line
  5 |     dict should have the following keys:
  6 |         colorID     - colorID of the vehicle
  7 |         vehicleID   - vehicle ID
  8 |         resnet50    - feature vector of the vehicle
  9 |         imageName   - name of the file in storage
 10 |         typeID      - ??
 11 |         cameraID    - which camera took the image
 12 | 
 13 | Output:
 14 |     h5py file with the following datasets
 15 |         colorID     - int colorID of the vehicle
 16 |         vehicleID   - int vehicle ID
 17 |         resnet50    - [float] feature vector of the vehicle
 18 |         imageName   - str name of the file in storage
 19 |         typeID      - int ??
 20 |         cameraID    - which camera took the image
 21 | 
 22 | Usage:
 23 |     json2h5.py [-hv]
 24 |     json2h5.py -i <INFILE> -o <OUTFILE>
 25 | 
 26 | Arguments:
 27 |     INFILE - json infile name
 28 |     OUTFILE - h5py outfile name
 29 | 
 30 | Options:
 31 |     -h, --help               :show this message
 32 |     -v, --version            :Version of the program
 33 |     -i, --input=<INFILE>     :input file for the program
 34 |     -o, --output=<OUTFILE>   :output file for the program
 35 | 
 36 | '''
 37 | import docopt
 38 | import h5py
 39 | import json
 40 | import numpy as np
 41 | import sys
 42 | 
 43 | 
 44 | def makeJsonList(fileName):
 45 |     retval = list()
 46 |     with open(fileName, 'r') as f:
 47 |         for line in f:
 48 |             line = line.strip()
 49 |             line = json.loads(line)
 50 |             retval.append(line)
 51 |     return retval
 52 | 
 53 | 
 54 | def extractColumn(colName, jsonList, t):
 55 |     retval = list()
 56 |     for line in jsonList:
 57 |         if t == str:
 58 |             retval.append(str(line[colName]).encode('ascii', 'ignore'))
 59 |         if t == int:
 60 |             retval.append(int(line[colName]))
 61 |         if t == float:
 62 |             vector = list()
 63 |             for element in line[colName]:
 64 |                 vector.append(float(element))
 65 |             retval.append(vector)
 66 |     return retval
 67 | 
 68 | 
 69 | def make5file(file5Name, names, jsonList):
 70 |     with h5py.File(file5Name, 'w') as f:
 71 |         for o, i, t, t2 in names:
 72 |             sys.stdout.write('converting column {0}'.format(o))
 73 |             temp = extractColumn(o, jsonList, t)
 74 |             sys.stdout.write('...Done\n')
 75 |             sys.stdout.write('making dataset {0}'.format(i))
 76 |             f.create_dataset(i, data=temp, dtype=t2)
 77 |             sys.stdout.write('...Done\n')
 78 | 
 79 | 
 80 | def main(args):
 81 |     try:
 82 |         inFileName = args['--input']
 83 |         outFileName = args['--output']
 84 |     except docopt.DocoptExit as e:
 85 |         sys.exit('error: input invalid options: {0}'.format(e))
 86 | 
 87 |     f = np.dtype('float')
 88 |     c = h5py.special_dtype(vlen=bytes)
 89 |     names = [('colorID', 'colorID', int, int),
 90 |              ('vehicleID', 'vehicleID', int, int),
 91 |              ('resnet50', 'feats', float, f),
 92 |              ('imageName', 'ids', str, c),
 93 |              ('typeID', 'typeID', int, int),
 94 |              ('cameraID', 'cameraID', str, c)]
 95 | 
 96 |     sys.stdout.write('Reading {0}'.format(inFileName))
 97 |     jsonList = makeJsonList(inFileName)
 98 |     sys.stdout.write('...Done\n')
 99 | 
100 |     make5file(outFileName, names, jsonList)
101 | 
102 | if __name__ == '__main__':
103 |     args = docopt.docopt(__doc__, version='json2h5.py 1.0')
104 |     main(args)
105 | 


--------------------------------------------------------------------------------
/pelops/etl/makeDistMatrix.py:
--------------------------------------------------------------------------------
  1 | """ make the data for comparing entity id
  2 | 
  3 | Input:
  4 |        processsed json file containing the list of images to compare
  5 |        the file must contain the 'resnet50' feature vector
  6 | 
  7 | Output:
  8 |        all pairs comparison between images using resnet50
  9 |        output file lines will have the form of json dict
 10 |        dict will contain the following keys
 11 |        x - image name of the first image in comparison
 12 |        y - image name of the second image in comparison
 13 |        cosine - cosine distance bewteen the images
 14 |        euclidean - euclidian distance between the images
 15 | 
 16 | Usage:
 17 |     makeDistMatrix [-hv]
 18 |     makeDistMatrix -i <INPUT_FILE> [-w <WORKERS>]
 19 | 
 20 | Arguments:
 21 |     INPUT_FILE                   :file of the json description of the VeRi files
 22 |     WORKERS                      :number of threads in the pool
 23 | 
 24 | Options:
 25 |     -i, --inputFile=<INPUT_FILE> :file location of the input
 26 |     -w,--workers=<WORKERS>       :num of workers in threadpool [default: 10]
 27 | """
 28 | import docopt
 29 | import json
 30 | import sys
 31 | from multiprocessing import Pool
 32 | import scipy.spatial.distance
 33 | import itertools
 34 | import numpy as np
 35 | import time
 36 | 
 37 | 
 38 | # read the list of things to compare
 39 | def makeWork(vectorFileName):
 40 |     vfile = open(vectorFileName, 'r')
 41 |     retval = list()
 42 |     for line in vfile:
 43 |         line = line.strip()
 44 |         line = json.loads(line)
 45 |         retval.append(line)
 46 |     vfile.close()
 47 |     return retval
 48 | 
 49 | 
 50 | # help by chopping work into chunks
 51 | def grouper(n, iterable):
 52 |     it = iter(iterable)
 53 |     while True:
 54 |         chunk = tuple(itertools.islice(it, n))
 55 |         if not chunk:
 56 |             return
 57 |         yield chunk
 58 | 
 59 | 
 60 | # my distance measures
 61 | def my_dist(workList):
 62 |     retval = list()
 63 | 
 64 |     for pair in workList:
 65 |         x = pair[0]
 66 |         y = pair[1]
 67 |         fx = np.asarray(x['resnet50'])
 68 |         fy = np.asarray(y['resnet50'])
 69 |         workItem = dict()
 70 |         dc = str(float(scipy.spatial.distance.cosine(fx, fy)))
 71 |         de = str(float(scipy.spatial.distance.euclidean(fx, fy)))
 72 |         workItem['x'] = x['imageName']
 73 |         workItem['y'] = y['imageName']
 74 |         workItem['cosine'] = dc
 75 |         workItem['euclidean'] = de
 76 |         retval.append(workItem)
 77 | 
 78 |     return (retval)
 79 | 
 80 | 
 81 | # takes in a json file with vectors and creates all the pairwise
 82 | # distance calculations, saves output to file
 83 | def main(args, atOnceOuter=100000, atOnceInner=10000):
 84 |     try:
 85 |         pworkers = args['--workers']
 86 |         inFileName = args['--inputFile']
 87 |     except docopt.DocoptExit as e:
 88 |         sys.exit('ERROR: input invalid options {0}'.format(e))
 89 | 
 90 |     inFileName = sys.argv[1]
 91 |     work = makeWork(inFileName)
 92 |     p = Pool(pworkers)
 93 | 
 94 |     outFileName = 'matrixFile.{0}'.format(inFileName)
 95 |     matrixFile = open(outFileName, 'w')
 96 | 
 97 |     total = 0
 98 |     for batch in grouper(atOnceOuter, itertools.combinations(work, 2)):
 99 |         start = time.time()
100 |         batched = list()
101 | 
102 |         for workbatch in grouper(atOnceInner, batch):
103 |             batched.append(workbatch)
104 | 
105 |         retval = p.map(my_dist, batched)
106 |         end = time.time()
107 |         start2 = time.time()
108 |         for listLine in retval:
109 |             for line in listLine:
110 |                 total = total + 1
111 |                 matrixFile.write(json.dumps(line)+'\n')
112 |         end2 = time.time()
113 | 
114 |     fstr = 'proc elapsed:{0} sec proc:{1} total{2}'
115 |     print(fstr.format(end-start, atOnceOuter, total))
116 |     print('IO elapsed:{0}\n'.format(end2-start2))
117 |     matrixFile.close()
118 | 
119 | if __name__ == '__main__':
120 |     args = docopt.docopt(__doc__,version='makeDistMatrix 1.0')
121 |     main()
122 | 


--------------------------------------------------------------------------------
/pelops/etl/makeFeaturesResNet50.py:
--------------------------------------------------------------------------------
  1 | """ Generate resnet50 features
  2 | 
  3 | Input:
  4 |     infile shold be a list of json lines one json/line
  5 | 
  6 | Output:
  7 |     appending of resnet50 features to each json line
  8 | 
  9 | Usage:
 10 |     makeFeaturesResNet50 [-hv]
 11 |     makeFeaturesResNet50 -i <INPUT_FILENAME> -p <IMAGE_DIR>
 12 | 
 13 | Arguments:
 14 |     INPUT_FILENAME        : location of the file to enrich with resnet features
 15 |     IMAGE_DIR             : full path to where the images live
 16 | 
 17 | Options:
 18 |     -h, --help            : Show this help message.
 19 |     -v, --version         : Show the version number.
 20 |     -i, --inFile          : input file to enrich with reset fetures
 21 |     -p, --path            : Path to the directory holding the images
 22 | 
 23 | 
 24 | """
 25 | 
 26 | import docopt
 27 | import numpy as np
 28 | from keras.applications.resnet50 import ResNet50
 29 | from keras.preprocessing import image
 30 | from keras.applications.imagenet_utils import preprocess_input
 31 | from keras.models import Model
 32 | import os
 33 | import time
 34 | import json
 35 | import sys
 36 | 
 37 | 
 38 | # return an image from a file, default resize to 224,224
 39 | def load_image(img_path, resizex=224, resizey=224):
 40 |     data = image.load_img(img_path, target_size=(resizex, resizey))
 41 |     x = image.img_to_array(data)
 42 |     x = np.expand_dims(x, axis=0)
 43 |     x = preprocess_input(x)
 44 |     return x
 45 | 
 46 | 
 47 | # load the imagenet networks
 48 | def get_models():
 49 |     # include_top needs to be True for this to work
 50 |     base_model = ResNet50(weights='imagenet', include_top=True)
 51 |     model = Model(input=base_model.input,
 52 |                   output=base_model.get_layer('flatten_1').output)
 53 |     return (model, base_model)
 54 | 
 55 | 
 56 | # return feature vector for a given img, and model
 57 | def image_features(img, model):
 58 |     features = model.predict(img)
 59 |     return features
 60 | 
 61 | 
 62 | # read the files to process
 63 | def getList(name):
 64 |     retval = list()
 65 |     f = open('name', 'r')
 66 |     for line in f:
 67 |         line = line.strip()
 68 |         line = json.loads(line)
 69 |         retval.append(line)
 70 |     f.close()
 71 |     return retval
 72 | 
 73 | 
 74 | # perform the file by file processing
 75 | def process(trainingList, prefix, model, outFilename, batchSize=1000):
 76 |     outFile = open(outFilename, 'w')
 77 |     start = time.time()
 78 |     for idx, line in enumerate(trainingList):
 79 |         tempd = dict()
 80 |         if idx % batchSize == 0:
 81 |             end = time.time() - start
 82 |             start = time.time()
 83 |             fstring = 'total {0} batch {1} images in {2} seconds'
 84 |             print (fstring.format(idx, batchSize, end))
 85 |             path = os.path.join(prefix, line['imageName'])
 86 |             img = load_image(path)
 87 |             feature = image_features(img, model)
 88 |             tempd['resnet50'] = feature.tolist()[0]
 89 |             tempd.update(line)
 90 |             outFile.write(json.dumps(tempd)+'\n')
 91 |     outFile.close()
 92 | 
 93 | 
 94 | # read json file append feature vector to each line dict
 95 | def main(args):
 96 |     try:
 97 |         lineFileName = args['--inFile']
 98 |         prefix = args['--path']
 99 | 
100 |     except docopt.DocoptExit as e:
101 |         sys.exit('Error: input invalid options {0}'.format(e))
102 | 
103 |     outFilename = '{0}.resnet50.json'.format(lineFileName)
104 |     model, base_model = get_models()
105 | 
106 |     print('loading...')
107 |     trainingList = getList(lineFileName)
108 | 
109 |     print('processing...')
110 |     process(trainingList, prefix,  model, outFilename)
111 | 
112 |     print('done.')
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     args = docopt.docopt(__doc__, version='1.0')
117 |     main(args)
118 | 


--------------------------------------------------------------------------------
/pelops/etl/veriFileList2Json.py:
--------------------------------------------------------------------------------
 1 | """ turn the list of files into a list of json dicts about the files
 2 | 
 3 | Input:
 4 |     Take the VeRi datset that contains the following information:
 5 |     * 49358 images (1679 query images, 11580 test images, 37779 train images)
 6 |     * 776 vehicles
 7 |     * 20 cameras
 8 |     * covering 1.0 km^2 area in 24 hours
 9 | 
10 |     convert the name_* files into json files for processing
11 | 
12 | Output:
13 |     json file with the following attributes in a dict per line:
14 |         imageName
15 |         vehicleID
16 |         cameraID
17 |         colorID
18 |         typeID
19 | 
20 | Usage:
21 |     veriFileList2Json [-hv]
22 |     veriFileList2Json  -i <INFILE_NAME>
23 | 
24 | Arguments:
25 |     INFILE_NAME         :file path to the VeRI name_ file
26 | 
27 | Options:
28 |     -h, --help          :Show this message
29 |     -v, --version       :Version of the prog
30 |     -i, --inputFile     :location of the VeRi name_ file to process
31 | 
32 | 
33 | 
34 | """
35 | import docopt
36 | import json
37 | import sys
38 | 
39 | 
40 | # turn the list of files into json for working with
41 | def main(args):
42 |     try:
43 |         inFileName = args['--inputFile']
44 |     except docopt.DocoptExit as e:
45 |         sys.exit('error: input invalid options: {0}'.format(e))
46 | 
47 |     outFileName = '{0}.json'.format(inFileName)
48 | 
49 |     inFile = open(inFileName, 'r')
50 |     outFile = open(outFileName, 'w')
51 | 
52 |     for line in inFile:
53 |         d = dict()
54 |         line = line.strip()
55 |         attrs = line.split('_')
56 |         d['imageName'] = line
57 |         d['vehicleID'] = attrs[0]
58 |         d['cameraID'] = attrs[1]
59 |         d['colorID'] = str(-1)
60 |         d['typeID'] = str(-1)
61 |         outFile.write(json.dumps(d)+'\n')
62 |     inFile.close()
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     args = docopt.docopt(__doc__,version='veriFileList2Json 1.0')
67 |     main(args)
68 | 


--------------------------------------------------------------------------------
/pelops/experiment_api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/experiment_api/__init__.py


--------------------------------------------------------------------------------
/pelops/experiment_api/run_metric.sh:
--------------------------------------------------------------------------------
1 | python3 metric.py -c -w VeriDataset -y 2 -r 5 "/path/to/veri/dataset" "/path/to/image/feature/json/file"
2 | 


--------------------------------------------------------------------------------
/pelops/features/feature_producer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | 
 4 | from pelops.datasets.chipper import Chipper
 5 | from pelops.datasets.featuredataset import FeatureDataset
 6 | 
 7 | 
 8 | class FeatureProducer(object):
 9 |     def __init__(self, chip_producer):
10 |         self.chip_producer = chip_producer
11 |         self.set_variables()
12 | 
13 |     def return_features(self):
14 |         if isinstance(self.chip_producer, Chipper):
15 |             chips = []
16 |             chip_keys = []
17 |             for chip_list in self.chip_producer:
18 |                 chips.extend(chip_list)
19 |                 for i, chip in enumerate(chip_list):
20 |                     chip_keys.append('{}_{}'.format(chip.frame_number, i))
21 | 
22 |         else:
23 |             chips = []
24 |             chip_keys = []
25 |             for chip_key, chip in self.chip_producer.chips.items():
26 |                 chips.append(chip)
27 |                 chip_keys.append(chip_key)
28 | 
29 |         feats = np.zeros((len(chips), self.feat_size), dtype=np.float32)
30 |         for i, chip in enumerate(chips):
31 |             feats[i] = self.produce_features(chip)
32 |         return chip_keys, chips, feats
33 | 
34 |     @staticmethod
35 |     def get_image(chip):
36 |         if hasattr(chip, 'img_data'):
37 |             img = Image.fromarray(chip.img_data)
38 |             return img.convert('RGB')
39 |         else:
40 |             return Image.open(chip.filepath)
41 | 
42 |     def produce_features(self, chip):
43 |         """Takes a chip object and returns a feature vector of size
44 |         self.feat_size. """
45 |         raise NotImplementedError("produce_features() not implemented")
46 | 
47 |     def save_features(self, output_filename):
48 |         """
49 |         Calculate features and save as a "FeatureDataset"
50 |         Args:
51 |             filename:
52 | 
53 |         Returns:
54 | 
55 |         """
56 |         # TODO: See if this function should save the features in memory
57 |         if isinstance(self.chip_producer, Chipper):
58 |             raise NotImplementedError("Only ChipDatasets are supported at this time")
59 |         chip_keys, chips, features = self.return_features()
60 |         FeatureDataset.save(output_filename, chip_keys, chips, features)
61 | 
62 |     def set_variables(self):
63 |         """Child classes should use this to set self.feat_size, and any other
64 |         needed variables. """
65 |         self.feat_size = None  # Set this in your inherited class
66 |         raise NotImplementedError("set_variables() is not implemented")
67 | 


--------------------------------------------------------------------------------
/pelops/features/hog.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | from skimage import color
 4 | from skimage.feature import hog
 5 | 
 6 | from pelops.features.feature_producer import FeatureProducer
 7 | 
 8 | 
 9 | class HOGFeatureProducer(FeatureProducer):
10 | 
11 |     def __init__(self, chip_producer, image_size=(224,224), cells=(16, 16), orientations=8, histogram_bins_per_channel=256):
12 |         self.image_size = image_size
13 |         self.cells = cells
14 |         self.orientations = orientations
15 |         self.histogram_bins_per_channel = histogram_bins_per_channel
16 |         super().__init__(chip_producer)
17 | 
18 |     def produce_features(self, chip):
19 |         """Takes a chip object and returns a feature vector of size
20 |         self.feat_size. """
21 |         img = self.get_image(chip)
22 |         img = img.resize(self.image_size, Image.BICUBIC)
23 |         img_x, img_y = img.size
24 | 
25 |         # Calculate histogram of each channel
26 |         channels = img.split()
27 |         hist_features = np.full(shape=3 * self.histogram_bins_per_channel, fill_value=-1)
28 | 
29 |         # We expect RGB images. If something else is passed warn the user and
30 |         # continue.
31 |         if len(channels) < 3:
32 |             print("Non-RBG image! Vector will be padded with -1!")
33 |         if len(channels) > 3:
34 |             print("Non-RBG image! Channels beyond the first three will be ignored!")
35 |             channels = channel[:3]
36 | 
37 |         for i, channel in enumerate(channels):
38 |             channel_array = np.array(channel)
39 |             values, _ = np.histogram(channel_array.flat, bins=self.histogram_bins_per_channel)
40 |             start = i * self.histogram_bins_per_channel
41 |             end = (i+1) * self.histogram_bins_per_channel
42 |             hist_features[start:end] = values
43 | 
44 |         # Calculate HOG features, which require a grayscale image
45 |         img = color.rgb2gray(np.array(img))
46 |         features = hog(
47 |             img,
48 |             orientations=self.orientations,
49 |             pixels_per_cell=(img_x / self.cells[0], img_y / self.cells[1]),
50 |             cells_per_block=self.cells,  # Normalize over the whole image
51 |         )
52 | 
53 |         return np.concatenate((features, hist_features))
54 | 
55 |     def set_variables(self):
56 |         hog_size = self.cells[0] * self.cells[1] * self.orientations
57 |         hist_size = 3 * self.histogram_bins_per_channel
58 |         self.feat_size = hog_size + hist_size
59 | 


--------------------------------------------------------------------------------
/pelops/features/keras_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.preprocessing import image
 3 | from keras.applications.resnet50 import preprocess_input
 4 | from keras.models import load_model, Model, model_from_json
 5 | 
 6 | from PIL import Image as PIL_Image
 7 | from pelops.features.feature_producer import FeatureProducer
 8 | 
 9 | 
10 | class KerasModelFeatureProducer(FeatureProducer):
11 |     def __init__(self, chip_producer, model_filename, layer_name, weight_filename=None):
12 |         global resnet_model
13 |         super().__init__(chip_producer)
14 | 
15 |         if weight_filename is None:
16 |             self.original_model = load_model(model_filename)
17 |         else:
18 |             self.original_model = self.load_model_workaround(model_filename,weight_filename)
19 | 
20 |         self.keras_model = Model(input=self.original_model.input,
21 |                                  output=self.original_model.get_layer(layer_name).output)
22 | 
23 |     @staticmethod
24 |     def load_model_workaround(model_filename,weight_filename):
25 |         # load json and create model
26 |         json_file = open(model_filename, 'r')
27 |         loaded_model_json = json_file.read()
28 |         json_file.close()
29 | 
30 |         loaded_model = model_from_json(loaded_model_json)
31 | 
32 |         # load weights into new model
33 |         loaded_model.load_weights(weight_filename)
34 |         return loaded_model
35 | 
36 |     @staticmethod
37 |     def preprocess_image(img, x_dim=224, y_dim=224):
38 |         img = img.resize((x_dim,y_dim), PIL_Image.BICUBIC)
39 |         x = image.img_to_array(img)
40 |         x = np.expand_dims(x, axis=0)
41 |         x = preprocess_input(x)
42 |         return x
43 | 
44 |     def produce_features(self, chip):
45 |         pil_image = self.get_image(chip)
46 |         preprocessed_image = self.preprocess_image(pil_image)
47 |         image_features = self.keras_model.predict(preprocessed_image)
48 |         return image_features
49 | 
50 |     def set_variables(self):
51 |         self.feat_size = 2048
52 | 


--------------------------------------------------------------------------------
/pelops/features/resnet50.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.applications.resnet50 import ResNet50
 3 | from keras.preprocessing import image
 4 | from keras.applications.resnet50 import preprocess_input
 5 | from keras.models import Model
 6 | 
 7 | from PIL import Image as PIL_Image
 8 | from pelops.features.feature_producer import FeatureProducer
 9 | 
10 | # Use global so we only load the resnet model once
11 | # TODO: find a better way to do this
12 | resnet_model = None
13 | 
14 | 
15 | class ResNet50FeatureProducer(FeatureProducer):
16 |     def __init__(self, chip_producer):
17 |         global resnet_model
18 |         super().__init__(chip_producer)
19 | 
20 |         if resnet_model is None:
21 |             # include_top needs to be True for this to work
22 |             base_model = ResNet50(weights='imagenet', include_top=True)
23 |             resnet_model = Model(input=base_model.input,
24 |                       output=base_model.get_layer('flatten_1').output)
25 | 
26 |         self.resnet_model = resnet_model
27 | 
28 |     @staticmethod
29 |     def preprocess_image(img, x_dim=224, y_dim=224):
30 |         if img.size != (x_dim, y_dim):
31 |             img = img.resize((x_dim,y_dim), PIL_Image.BICUBIC)
32 |         x = image.img_to_array(img)
33 |         x = np.expand_dims(x, axis=0)
34 |         x = preprocess_input(x)
35 |         return x
36 | 
37 |     def produce_features(self, chip):
38 |         pil_image = self.get_image(chip)
39 |         preprocessed_image = self.preprocess_image(pil_image)
40 |         image_features = self.resnet_model.predict(preprocessed_image)
41 |         return image_features
42 | 
43 |     def set_variables(self):
44 |         self.feat_size = 2048
45 | 


--------------------------------------------------------------------------------
/pelops/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/models/__init__.py


--------------------------------------------------------------------------------
/pelops/models/makesvm.py:
--------------------------------------------------------------------------------
  1 | """ work with SVM and chips """
  2 | import time
  3 | 
  4 | import sklearn
  5 | from scipy.stats import uniform as sp_rand
  6 | from sklearn import svm
  7 | from sklearn.externals import joblib
  8 | from sklearn.model_selection import RandomizedSearchCV
  9 | from tqdm import tnrange
 10 | 
 11 | from pelops.analysis.camerautil import get_match_id, make_good_bad
 12 | from pelops.analysis.comparecameras import make_work
 13 | 
 14 | 
 15 | def train_svm(examples, fd_train, eg_train):
 16 |     """
 17 |     train a support vector machine
 18 | 
 19 |     examples(int): number of examples to generate
 20 |     fd_train(featureDataset): where to join features to chips
 21 |     eg_train(experimentGenerator): makes experiments
 22 | 
 23 |     clf(SVM): scm classifier trainined on the input examples
 24 |     """
 25 |     lessons_train = list()
 26 |     outcomes_train = list()
 27 |     for _ in tnrange(examples):
 28 |         cameras_train = eg_train.generate()
 29 |         match_id = get_match_id(cameras_train)
 30 |         goods, bads = make_good_bad(cameras_train, match_id)
 31 |         make_work(fd_train, lessons_train, outcomes_train, goods, 1)
 32 |         make_work(fd_train, lessons_train, outcomes_train, bads, 0)
 33 | 
 34 |     clf = svm.SVC()
 35 | 
 36 |     print('fitting')
 37 |     start = time.time()
 38 |     clf.fit(lessons_train, outcomes_train)
 39 |     end = time.time()
 40 |     print('fitting took {} seconds'.format(end - start))
 41 |     return clf
 42 | 
 43 | 
 44 | def search(examples, fd_train, eg_train, iterations):
 45 |     """
 46 |     beginnnings of hyperparameter search for svm
 47 |     """
 48 |     param_grid = {'C': sp_rand()}
 49 |     lessons_train = list()
 50 |     outcomes_train = list()
 51 |     for _ in tnrange(examples):
 52 |         cameras_train = eg_train.generate()
 53 |         match_id = get_match_id(cameras_train)
 54 |         goods, bads = make_good_bad(cameras_train, match_id)
 55 |         make_work(fd_train, lessons_train, outcomes_train, goods, 1)
 56 |         make_work(fd_train, lessons_train, outcomes_train, bads, 0)
 57 |     clf = svm.SVC()
 58 |     print('searching')
 59 |     start = time.time()
 60 |     rsearch = RandomizedSearchCV(
 61 |         estimator=clf, param_distributions=param_grid, n_iter=iterations)
 62 |     rsearch.fit(lessons_train, outcomes_train)
 63 |     end = time.time()
 64 |     print('searching took {} seconds'.format(end - start))
 65 |     print(rsearch.best_score_)
 66 |     print(rsearch.best_estimator_.C)
 67 | 
 68 | 
 69 | def save_model(model, filename):
 70 |     """
 71 |     save a model to disk
 72 | 
 73 |     model(somemodel): trained model to save
 74 |     filename(str): location to safe the model
 75 |     """
 76 |     joblib.dump(model, filename)
 77 | 
 78 | 
 79 | def load_model(filename):
 80 |     """
 81 |     load a model from disk. make sure that models only
 82 |     show up from version 0.18.1 of sklearn as other versions
 83 |     may not load correctly
 84 | 
 85 |     filename(str): name of file to load
 86 |     """
 87 |     if sklearn.__version__ == '0.18.1':
 88 |         model = joblib.load(filename)
 89 |         return model
 90 |     else:
 91 |         print('upgrade sklearn to version 0.18.1')
 92 | 
 93 | 
 94 | def test_svm(examples, clf_train, fd_test, eg_test):
 95 |     """
 96 |     score the trained SVM against test features
 97 | 
 98 |     examples(int): number of examples to run
 99 |     clf_train(modle): model for evaluating testing data
100 |     fd_test(featureDataset): testing dataset
101 |     eg_test(experimentGenerator): generated experiments from testing dataset
102 | 
103 |     out(int): score from the model
104 |     """
105 |     lessons_test = list()
106 |     outcomes_test = list()
107 | 
108 |     for _ in tnrange(examples):
109 |         cameras_test = eg_test.generate()
110 |         match_id = get_match_id(cameras_test)
111 |         goods, bads = make_good_bad(cameras_test, match_id)
112 |         make_work(fd_test, lessons_test, outcomes_test, goods, 1)
113 |         make_work(fd_test, lessons_test, outcomes_test, bads, 0)
114 | 
115 |     print('scoring')
116 |     start = time.time()
117 |     out = clf_train.score(lessons_test, outcomes_test)
118 |     end = time.time()
119 |     print('scoring took {} seconds'.format(end - start))
120 |     return out
121 | 


--------------------------------------------------------------------------------
/pelops/transform_img/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/transform_img/__init__.py


--------------------------------------------------------------------------------
/pelops/transform_img/run.sh:
--------------------------------------------------------------------------------
1 | python3 transform.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | matplotlib
 4 | logger
 5 | python-resize-image
 6 | h5py
 7 | imageio
 8 | scikit-image
 9 | keras
10 | tensorflow
11 | tqdm
12 | pytest
13 | hdfs3
14 | opencv-python
15 | docopt
16 | scikit-learn


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from setuptools import find_packages
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name="Pelops",
 7 |     version="0.1.1",
 8 |     description="Car re-identification via deep learning",
 9 |     url="https://www.python.org/sigs/distutils-sig/",
10 |     author="Lab41",
11 |     author_email="lab41@iqt.org",
12 |     license="Apache Software License",
13 |     packages=find_packages(),
14 |     classifiers=[
15 |         "Development Status :: 3 - Alpha",
16 |         "Environment :: Console",
17 |         "Intended Audience :: Science/Research",
18 |         "License :: OSI Approved :: Apache Software License",
19 |         "Natural Language :: English",
20 |         "Operating System :: OS Independent",
21 |         "Programming Language :: Python :: 3",
22 |         "Programming Language :: Python :: 3.5",
23 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
24 |     ],
25 |     keywords=[
26 |         "computer vision",
27 |         "deep learning",
28 |         "resnet",
29 |         "vehicle re-identification",
30 |     ],
31 | )
32 | 


--------------------------------------------------------------------------------
/testci/install.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # This script is meant to be called by the "install" step defined in
  3 | # .travis.yml. See http://docs.travis-ci.com/ for more details.
  4 | # The behavior of the script is controlled by environment variabled defined
  5 | # in the .travis.yml in the top level folder of the project.
  6 | #
  7 | # This script is adapted from a similar script from the scikit-learn repository.
  8 | # Taken from: https://github.com/aabadie/nilearn/blob/circle_ci_anaconda/continuous_integration/install.sh
  9 | #
 10 | # License: 3-clause BSD
 11 | 
 12 | set -e
 13 | 
 14 | # Fix the compilers to workaround avoid having the Python 3.4 build
 15 | # lookup for g++44 unexpectedly.
 16 | export CC=gcc
 17 | export CXX=g++
 18 | 
 19 | create_new_venv() {
 20 |     # At the time of writing numpy 1.9.1 is included in the travis
 21 |     # virtualenv but we want to be in control of the numpy version
 22 |     # we are using for example through apt-get install
 23 |     deactivate
 24 |     virtualenv --system-site-packages testvenv
 25 |     source testvenv/bin/activate
 26 |     pip install nose
 27 | }
 28 | 
 29 | print_conda_requirements() {
 30 |     # Echo a conda requirement string for example
 31 |     # "pip nose python='2.7.3 scikit-learn=*". It has a hardcoded
 32 |     # list of possible packages to install and looks at _VERSION
 33 |     # environment variables to know whether to install a given package and
 34 |     # if yes which version to install. For example:
 35 |     #   - for numpy, NUMPY_VERSION is used
 36 |     #   - for scikit-learn, SCIKIT_LEARN_VERSION is used
 37 |     TO_INSTALL_ALWAYS="pip nose"
 38 |     REQUIREMENTS="$TO_INSTALL_ALWAYS"
 39 |     TO_INSTALL_MAYBE="python numpy scipy matplotlib scikit-learn flake8"
 40 |     for PACKAGE in $TO_INSTALL_MAYBE; do
 41 |         # Capitalize package name and add _VERSION
 42 |         PACKAGE_VERSION_VARNAME="${PACKAGE^^}_VERSION"
 43 |         # replace - by _, needed for scikit-learn for example
 44 |         PACKAGE_VERSION_VARNAME="${PACKAGE_VERSION_VARNAME//-/_}"
 45 |         # dereference $PACKAGE_VERSION_VARNAME to figure out the
 46 |         # version to install
 47 |         PACKAGE_VERSION="${!PACKAGE_VERSION_VARNAME}"
 48 |         if [ -n "$PACKAGE_VERSION" ]; then
 49 |             REQUIREMENTS="$REQUIREMENTS $PACKAGE=$PACKAGE_VERSION"
 50 |         fi
 51 |     done
 52 |     echo $REQUIREMENTS
 53 | }
 54 | 
 55 | create_new_conda_env() {
 56 |     # Skip Travis related code on circle ci.
 57 |     if [ -z $CIRCLECI ]; then
 58 |         # Deactivate the travis-provided virtual environment and setup a
 59 |         # conda-based environment instead
 60 |         deactivate
 61 |     fi
 62 | 
 63 |     # Use the miniconda installer for faster download / install of conda
 64 |     # itself
 65 |     wget https://repo.continuum.io/miniconda/Miniconda3-4.2.11-Linux-x86_64.sh \
 66 |         -O ~/miniconda.sh
 67 |     chmod +x ~/miniconda.sh && ~/miniconda.sh -b
 68 |     export PATH=$HOME/miniconda2/bin:$PATH
 69 |     echo $PATH
 70 |     conda update --quiet --yes conda
 71 | 
 72 |     # Configure the conda environment and put it in the path using the
 73 |     # provided versions
 74 |     REQUIREMENTS=$(print_conda_requirements)
 75 |     echo "conda requirements string: $REQUIREMENTS"
 76 |     conda create -n testenv --quiet --yes $REQUIREMENTS
 77 |     source activate testenv
 78 | 
 79 |     if [[ "$INSTALL_MKL" == "true" ]]; then
 80 |         # Make sure that MKL is used
 81 |         conda install --quiet --yes mkl
 82 |     elif [[ -z $CIRCLECI ]]; then
 83 |         # Travis doesn't use MKL but circle ci does for speeding up examples
 84 |         # generation in the html documentation.
 85 |         # Make sure that MKL is not used
 86 |         conda remove --yes --features mkl || echo "MKL not installed"
 87 |     fi
 88 | }
 89 | 
 90 | if [[ "$DISTRIB" == "conda" ]]; then
 91 |     create_new_conda_env
 92 |     pip install nose-timer
 93 |     # Note: nibabel is in setup.py install_requires so nibabel will
 94 |     # always be installed eventually. Defining NIBABEL_VERSION is only
 95 |     # useful if you happen to want a specific nibabel version rather
 96 |     # than the latest available one.
 97 |     if [ -n "$NIBABEL_VERSION" ]; then
 98 |         pip install nibabel=="$NIBABEL_VERSION"
 99 |     fi
100 | 
101 | else
102 |     echo "Unrecognized distribution ($DISTRIB); cannot setup CI environment."
103 |     exit 1
104 | fi
105 | 
106 | pip install psutil memory_profiler
107 | 
108 | if [[ "$COVERAGE" == "true" ]]; then
109 |     pip install coverage coveralls
110 | fi
111 | 


--------------------------------------------------------------------------------
/testci/small.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/testci/small.hdf5


--------------------------------------------------------------------------------
/testci/small.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Sequential", "keras_version": "1.2.0", "config": [{"class_name": "Dense", "config": {"bias": true, "trainable": true, "W_regularizer": null, "input_dtype": "float32", "output_dim": 12, "name": "dense_8", "activation": "relu", "batch_input_shape": [null, 8], "init": "uniform", "activity_regularizer": null, "input_dim": 8, "b_constraint": null, "b_regularizer": null, "W_constraint": null}}]}


--------------------------------------------------------------------------------
/testci/test_chip.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from pelops.datasets.chip import ChipDataset, Chip
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def chips():
  8 |     CHIPS = (
  9 |         # filepath, car_id, cam_id, time, misc
 10 |         ("car1_cam1.png", 1, 1, 100, None),
 11 |         ("car1_cam2.png", 1, 2, 105, None),
 12 |         ("car1_cam3.png", 1, 3, 110, None),
 13 |         ("car2_cam1.png", 2, 1, 100, None),
 14 |         ("car2_cam2.png", 2, 1, 102, None),
 15 |         ("car2_cam3.png", 2, 1, 104, None),
 16 |     )
 17 | 
 18 |     chips = {}
 19 |     for filepath, car_id, cam_id, time, misc in CHIPS:
 20 |         chip = Chip(filepath, car_id, cam_id, time, misc)
 21 |         chips[filepath] = chip
 22 | 
 23 |     return chips
 24 | 
 25 | 
 26 | @pytest.fixture
 27 | def chip_dataset(chips):
 28 |     """ Set up a instance of ChipDataset(). """
 29 |     # Setup the class
 30 |     instantiated_class = ChipDataset(dataset_path="Test")
 31 | 
 32 |     # Monkey Patch in a fake chips dictionary
 33 |     instantiated_class.chips = chips
 34 | 
 35 |     return instantiated_class
 36 | 
 37 | 
 38 | def test_chips_len(chip_dataset, chips):
 39 |     """ Test that ChipDataset.chips is the correct length """
 40 |     assert len(chips) == len(chip_dataset)
 41 | 
 42 | 
 43 | def get_all_function_tester(in_chips, in_chipbase, index, test_function):
 44 |     """ Check that a chip getting function gets all the correct chips.
 45 | 
 46 |     This function tests a chip getting function, such as
 47 |     `get_all_chips_by_carid()` by creating a list of every correct chip from
 48 |     the true list of chips, and comparing it to the list returned by the
 49 |     function.
 50 | 
 51 |     Args:
 52 |         in_chips: The output of chips()
 53 |         in_chipbase: The output of chipbase()
 54 |         index: The location of the id in the chips object to use to compare. 
 55 |             0 is the filepath (aka chip_id), 1 is the car_id, 2 is the cam_id.
 56 |         test_function: The function to test, it should return a list of chips
 57 |             selected by some id value.
 58 | 
 59 |     Returns:
 60 |         None
 61 |     """
 62 |     seen_ids = []
 63 |     for tup in in_chips.values():
 64 |         test_id = tup[index]
 65 |         # Generate all the chips by hand, and compare
 66 |         if test_id in seen_ids:
 67 |             continue
 68 |         seen_ids.append(test_id)
 69 |         chips_list = []
 70 |         for _, val in in_chipbase.chips.items():
 71 |             if val[index] == test_id:
 72 |                 chips_list.append(val)
 73 | 
 74 |         chips_list.sort()
 75 |         test_chips = sorted(test_function(test_id))
 76 |         assert chips_list == test_chips
 77 | 
 78 | 
 79 | def test_get_all_chips_by_car_id(chip_dataset, chips):
 80 |     """ Test ChipDataset.get_all_chips_by_carid() """
 81 |     CAR_ID_INDEX = 1
 82 |     get_all_function_tester(chips, chip_dataset, CAR_ID_INDEX,
 83 |                             chip_dataset.get_all_chips_by_car_id)
 84 | 
 85 | 
 86 | def test_get_all_chips_by_cam_id(chip_dataset, chips):
 87 |     """ Test ChipDataset.get_all_chips_by_camid() """
 88 |     CAM_ID_INDEX = 2
 89 |     get_all_function_tester(chips, chip_dataset, CAM_ID_INDEX,
 90 |                             chip_dataset.get_all_chips_by_cam_id)
 91 | 
 92 | 
 93 | def test_get_distinct_cams_by_car_id(chip_dataset):
 94 |     """ Test ChipDataset.get_distinct_cams_by_car_id() and get_distinct_cams_per_car() """
 95 |     CAR_ID = 1
 96 |     TEST_CAMS = [1, 2, 3]
 97 |     for test_cam, cam in zip(TEST_CAMS, sorted(chip_dataset.get_distinct_cams_by_car_id(CAR_ID))):
 98 |         assert test_cam == cam
 99 | 
100 | 
101 | def test_get_all_cam_ids(chip_dataset):
102 |     """ Test ChipDataset.get_all_cam_ids() """
103 |     TEST_CAMS = [1, 2, 3]
104 |     for test_cam, cam in zip(TEST_CAMS, sorted(chip_dataset.get_all_cam_ids())):
105 |         assert test_cam == cam
106 | 
107 | 
108 | def test_get_all_car_ids(chip_dataset):
109 |     TEST_CARS = [1, 2]
110 |     for test_car, car in zip (TEST_CARS, sorted(chip_dataset.get_all_car_ids())):
111 |         assert test_car == car
112 | 
113 | 
114 | def test_chipdataset_iter(chip_dataset, chips):
115 |     """ Test iteration over ChipDataset() """
116 |     for chip in chip_dataset:
117 |         assert chip in chips.values()
118 | 


--------------------------------------------------------------------------------
/testci/test_chipper.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import datetime as dt
 4 | 
 5 | # OpenCV is *VERY* hard to install in CircleCI, so if we don't have it, skip these tests
 6 | cv2 = pytest.importorskip("cv2")  # Skip all tests if not found
 7 | from pelops.datasets.chipper import FrameProducer
 8 | 
 9 | 
10 | @pytest.fixture
11 | def frame_time_fp(tmpdir):
12 |     # Define a FrameProducer with just enough information to run __get_frame_time()
13 |     ifp = FrameProducer(
14 |         file_list = [],
15 |     )
16 |     ifp.vid_metadata = {"fps": 30}
17 | 
18 |     return ifp
19 | 
20 | 
21 | @pytest.fixture
22 | def frame_time_fp_data(tmpdir):
23 |     # Data to test __get_frame_time()
24 |     DATA = (
25 |         # (filename, frame number), (answer)
26 |         (("/foo/bar/baz_20000101T000000-00000-006000.mp4", 0), dt.datetime(2000, 1, 1)),
27 |         (("/foo/bar/baz_20000101T000000-00600-012000.mp4", 0), dt.datetime(2000, 1, 1, 0, 10)),
28 |         (("/foo/bar/baz_20000101T000000-00000-006000.mp4", 1), dt.datetime(2000, 1, 1, 0, 0, 0, 33333)),
29 |         (("/foo/bar/baz_20000101T000000-00600-012000.mp4", 10), dt.datetime(2000, 1, 1, 0, 10, 0, 333333)),
30 |     )
31 |     return DATA
32 | 
33 | 
34 | def test_get_frame_time(frame_time_fp, frame_time_fp_data):
35 |     for input, answer in frame_time_fp_data:
36 |          output = frame_time_fp._FrameProducer__get_frame_time(input[0], input[1])
37 |          assert output == answer
38 | 


--------------------------------------------------------------------------------
/testci/test_dgcars.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import os.path
  4 | import json
  5 | 
  6 | import pelops.utils as utils
  7 | from pelops.datasets.dgcars import DGCarsDataset
  8 | from pelops.datasets.chip import Chip
  9 | from pelops.utils import SetType
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def dgcars(tmpdir):
 14 |     # Define some test and training data, all will be the sum
 15 |     TRAIN = [
 16 |         {"url": "http://example.com/img.jpg", "hash": "2a8cedfa145b4345aed3fd9e82796c3e", "resnet50": "minivan", "model": "ZX2", "filename": "black/Ford/2a8cedfa145b4345aed3fd9e82796c3e.jpg", "make": "Ford", "color": "black"},
 17 |         {"url": "http://example.com/img.jpg", "hash": "8241daf452ace679162c69386f26ddc7", "resnet50": "sports_car", "model": "Mazda6 Sport", "filename": "red/Mazda/8241daf452ace679162c69386f26ddc7.jpg", "make": "Mazda", "color": "red"},
 18 |         {"url": "http://example.com/img.jpg", "hash": "e8dc3fb78206b14fe3568c1b28e5e5a1", "resnet50": "cab", "model": "XJ Series", "filename": "yellow/Jaguar/e8dc3fb78206b14fe3568c1b28e5e5a1.jpg", "make": "Jaguar", "color": "yellow"},
 19 |     ]
 20 |     TEST = [
 21 |         {"url": "http://example.com/img.jpg", "hash": "8881e7b561393f1d778a70dd449433e9", "resnet50": "racer", "model": "IS F", "filename": "yellow/Lexus/8881e7b561393f1d778a70dd449433e9.jpg", "make": "Lexus", "color": "yellow"},
 22 |         {"url": "http://example.com/img.jpg", "hash": "38e857d5235afda4315676c0b7756832", "resnet50": "pickup", "model": "Mark VII", "filename": "silver/Lincoln/38e857d5235afda4315676c0b7756832.jpg", "make": "Lincoln", "color": "silver"},
 23 |         {"url": "http://example.com/img.jpg", "hash": "6eb2b407cc398e70604bfd336bb2efad", "resnet50": "pickup", "model": "Lightning", "filename": "orange/Ford/6eb2b407cc398e70604bfd336bb2efad.jpg", "make": "Ford", "color": "orange"},
 24 |         {"url": "http://example.com/img.jpg", "hash": "eb3811772ec012545c8952d88906d355", "resnet50": "racer", "model": "Rockette", "filename": "green/Fairthorpe/eb3811772ec012545c8952d88906d355.jpg", "make": "Fairthorpe", "color": "green"},
 25 |         {"url": "http://example.com/img.jpg", "hash": "8dbbc1d930c7f2e4558efcc596728945", "resnet50": "minivan", "model": "S70", "filename": "white/Volvo/8dbbc1d930c7f2e4558efcc596728945.jpg", "make": "Volvo", "color": "white"},
 26 |         {"url": "http://example.com/img.jpg", "hash": "ed45784812d1281bcb61f217f4422ab5", "resnet50": "convertible", "model": "A8", "filename": "green/Audi/ed45784812d1281bcb61f217f4422ab5.jpg", "make": "Audi", "color": "green"},
 27 |         {"url": "http://example.com/img.jpg", "hash": "763ca4abbbb9b042b21f19fd80986179", "resnet50": "pickup", "model": "W126", "filename": "green/Mercedes-Benz/763ca4abbbb9b042b21f19fd80986179.jpg", "make": "Mercedes-Benz", "color": "green"},
 28 |     ]
 29 | 
 30 |     WRITE_LIST = (
 31 |         # filename, data list, settype
 32 |         ("allFiles", TRAIN + TEST, SetType.ALL),
 33 |         ("training", TRAIN, SetType.TRAIN),
 34 |         ("testing", TEST, SetType.TEST),
 35 |     )
 36 | 
 37 |     output_chips = {
 38 |         SetType.ALL: [],
 39 |         SetType.TRAIN: [],
 40 |         SetType.TEST: [],
 41 |     }
 42 |     for filename, data_list, settype in WRITE_LIST:
 43 |         fn = tmpdir.join(filename)
 44 |         with open(fn.strpath, "w") as f:
 45 |             for d in data_list:
 46 |                 # Write the data list files
 47 |                 line = json.dumps(d)
 48 |                 f.write(line + "\n")
 49 | 
 50 |                 # Make a chip
 51 |                 fp = os.path.join(tmpdir.strpath, d["filename"])
 52 |                 chip = Chip(fp, None, None, None, d)
 53 |                 output_chips[settype].append(chip)
 54 | 
 55 |     # Instantiate a DGCarsDataset() class
 56 |     output_classes = {
 57 |         SetType.ALL: DGCarsDataset(tmpdir.strpath, SetType.ALL),
 58 |         SetType.TRAIN: DGCarsDataset(tmpdir.strpath, SetType.TRAIN),
 59 |         SetType.TEST: DGCarsDataset(tmpdir.strpath, SetType.TEST),
 60 |     }
 61 | 
 62 |     return (output_classes, output_chips)
 63 | 
 64 | 
 65 | def test_dgcars_chips_len(dgcars):
 66 |     classes = dgcars[0]
 67 |     answer_chips = dgcars[1]
 68 |     # check that self.chips has been created, is not empty, and has the right
 69 |     # number of entries
 70 |     for key, cls in classes.items():
 71 |         ans = answer_chips[key]
 72 |         assert len(cls.chips) == len(ans)
 73 | 
 74 | def test_dgcars_chips_vals(dgcars):
 75 |     classes = dgcars[0]
 76 |     answer_chips = dgcars[1]
 77 | 
 78 |     for key, cls in classes.items():
 79 |         ans = answer_chips[key]
 80 |         for chip in cls:
 81 |             # The chip must match one of our hand built chips
 82 |             assert chip in ans
 83 |             # Various values are None
 84 |             assert chip.car_id is None
 85 |             assert chip.cam_id is None
 86 |             assert chip.time is None
 87 |             # Misc and filepath should exist
 88 |             assert chip.filepath
 89 |             assert chip.misc
 90 |             # Misc is a dictionary like object
 91 |             assert hasattr(chip.misc, "get")
 92 | 
 93 | 
 94 | def test_get_all_chips_by_car_id(dgcars):
 95 |     classes = dgcars[0]
 96 |     answer_chips = dgcars[1]
 97 | 
 98 |     for key, cls in classes.items():
 99 |         ans = answer_chips[key]
100 | 
101 |         # All car_id values are None in DG Cars
102 |         all_chips = sorted(cls.get_all_chips_by_car_id(None))
103 |         assert all_chips == sorted(ans)
104 | 
105 | 
106 | def test_get_all_chips_by_cam_id(dgcars):
107 |     classes = dgcars[0]
108 |     answer_chips = dgcars[1]
109 | 
110 |     for key, cls in classes.items():
111 |         ans = answer_chips[key]
112 | 
113 |         # All cam_id values are None in DG Cars
114 |         all_chips = sorted(cls.get_all_chips_by_cam_id(None))
115 |         assert all_chips == sorted(ans)
116 | 
117 | 
118 | def test_get_distinct_cams_by_car_id(dgcars):
119 |     classes = dgcars[0]
120 |     answer_chips = dgcars[1]
121 | 
122 |     for key, cls in classes.items():
123 |         ans = answer_chips[key]
124 | 
125 |         # All car_id values are None in DG Cars
126 |         assert cls.get_distinct_cams_by_car_id(None) == {None}
127 | 
128 | 
129 | def test_get_all_cam_ids(dgcars):
130 |     classes = dgcars[0]
131 |     answer_chips = dgcars[1]
132 | 
133 |     for key, cls in classes.items():
134 |         ans = answer_chips[key]
135 | 
136 |         # All cam_id values are None in DG Cars
137 |         assert cls.get_all_cam_ids() == [None]
138 | 
139 | 
140 | def test_get_all_car_ids(dgcars):
141 |     classes = dgcars[0]
142 |     answer_chips = dgcars[1]
143 | 
144 |     for key, cls in classes.items():
145 |         ans = answer_chips[key]
146 | 
147 |         # All car_id values are None in DG Cars
148 |         assert cls.get_all_car_ids() == [None]
149 | 
150 | 
151 | def test_dgcars_iter(dgcars):
152 |     classes = dgcars[0]
153 |     answer_chips = dgcars[1]
154 | 
155 |     for key, cls in classes.items():
156 |         ans = answer_chips[key]
157 | 
158 |         # Ensure that we can iterate and get all of the items
159 |         for chip in cls:
160 |             assert chip in ans
161 | 
162 |         # Ensure list can access the iterator, and that there are no extra
163 |         # chips
164 |         cls_chips = list(cls)
165 |         for chip in ans:
166 |             assert chip in cls_chips
167 | 


--------------------------------------------------------------------------------
/testci/test_experiment_utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import pelops.utils as utils
 4 | 
 5 | 
 6 | def test_SetType():
 7 |     vals = utils.SetType.__members__
 8 |     assert 'ALL' in vals
 9 |     assert 'QUERY' in vals
10 |     assert 'TEST' in vals
11 |     assert 'TRAIN' in vals
12 | 
13 | 
14 | def test_get_index_of_tuple():
15 |     TEST_LIST = [
16 |         (0, 'Who', 'John'),
17 |         (1, 'What', 'Pizza'),
18 |         (2, 'Where', 'Little Caesar'),
19 |         (3, 'When', 'Noon'),
20 |         (4, 'How', 'Eat'),
21 |         (5, None, None),
22 |     ]
23 | 
24 |     # Test that we can find ints, strings, and Nones
25 |     assert 1 == utils.get_index_of_tuple(TEST_LIST, 0, 0)
26 |     assert 2 == utils.get_index_of_tuple(TEST_LIST, 1, 'What')
27 |     assert 6 == utils.get_index_of_tuple(TEST_LIST, 1, None)
28 | 
29 |     # Test that we report the last position if we don't find an answer
30 |     assert len(TEST_LIST) == utils.get_index_of_tuple(
31 |         TEST_LIST, 0, 'NOT THERE')
32 | 
33 | def test_get_index_of_pairs():
34 |     TEST_LIST = [
35 |         (0, 0, 'Mozart'),
36 |         (1, 'Twinkle', 'Twinkle'),
37 |         (2, 'Where', 'Little Caesar'),
38 |         (3, 'When', 'Noon'),
39 |         (4, 'How', 'Eat'),
40 |         (5, None, None),
41 |     ]
42 | 
43 |     # Test that we can find ints, strings, and Nones
44 |     assert 1 == utils.get_index_of_pairs(TEST_LIST, 0, 1, 0)
45 |     assert 2 == utils.get_index_of_pairs(TEST_LIST, 1, 2, 'Twinkle')
46 |     assert 6 == utils.get_index_of_pairs(TEST_LIST, 1, 2, None)
47 | 
48 |     # Test that we report the last position if we don't find an answer
49 |     assert len(TEST_LIST) == utils.get_index_of_pairs(
50 |         TEST_LIST, 0, 1, 'NOT THERE')
51 | 
52 | 
53 | def test_get_basename():
54 |     TEST_FILEPATHS = (
55 |         ("/path/to/file/hello.py", "hello.py"),
56 |         ("hello.py", "hello.py")
57 |     ) 
58 | 
59 |     for test_input, answer in TEST_FILEPATHS:
60 |         assert answer == utils.get_basename(test_input)
61 | 
62 | 
63 | def test_get_numeric():
64 |     TEST_STRINGS = (
65 |         ('c002.jpg', '002'),
66 |         ('_012_', '012'),
67 |     )
68 | 
69 |     for test_input, answer in TEST_STRINGS:
70 |         assert answer == utils.get_numeric(test_input)
71 | 
72 | 
73 | def test_get_timestamp():
74 |     assert "2012-09-16 12:03:04" == str(utils.get_timestamp(datetime.datetime(2012, 9, 16, 12, 3, 4)))
75 |     assert 1 == utils.get_timestamp(1)
76 |     assert "Saturday" == utils.get_timestamp("Saturday")
77 | 
78 | 
79 | def test_should_drop():
80 |     # Never drop
81 |     assert utils.should_drop(1.) is True
82 |     # Always drop
83 |     assert utils.should_drop(0.) is False
84 | 


--------------------------------------------------------------------------------
/testci/test_featuredataset.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import pytest
 3 | 
 4 | import numpy as np
 5 | from pelops.datasets.chip import ChipDataset, Chip
 6 | from pelops.datasets.featuredataset import FeatureDataset
 7 | 
 8 | FEAT_LENGTH = 2048
 9 | 
10 | @pytest.fixture
11 | def chips():
12 |     CHIPS = (
13 |         # filepath, car_id, cam_id, time, misc
14 |         ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=100), {}),
15 |         ("car1_cam2.png", 1, 2, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=105), {}),
16 |         ("car1_cam3.png", 1, 3, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=110), {}),
17 |         ("car2_cam1.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=100), {}),
18 |         ("car2_cam2.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=102), {}),
19 |         ("car2_cam3.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=104), {}),
20 |     )
21 | 
22 |     chips = {}
23 |     for filepath, car_id, cam_id, time, misc in CHIPS:
24 |         chip = Chip(filepath, car_id, cam_id, time, misc)
25 |         chips[filepath] = chip
26 | 
27 |     return chips
28 | 
29 | @pytest.fixture
30 | def feature_dataset(chips, tmpdir):
31 |     OUTPUT_FNAME = tmpdir.join("test_feature_dataset.hdf5").strpath
32 |     feat_data = np.random.random((len(chips), FEAT_LENGTH))
33 |     FeatureDataset.save(OUTPUT_FNAME, list(chips.keys()), list(chips.values()), feat_data)
34 |     return FeatureDataset(OUTPUT_FNAME)
35 | 
36 | def test_get_feats(chips, feature_dataset):
37 |     chip_key = next(iter(chips))
38 |     chip = chips[chip_key]
39 |     assert len(feature_dataset.get_feats_for_chip(chip)) == FEAT_LENGTH
40 | 
41 | def test_load_save(chips, feature_dataset):
42 |     chip_key = next(iter(chips))
43 |     assert feature_dataset.chips[chip_key] == chips[chip_key]
44 | 


--------------------------------------------------------------------------------
/testci/test_featureproducer.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import datetime
 3 | import pytest
 4 | import numpy as np
 5 | from PIL import Image
 6 | 
 7 | from pelops.features.feature_producer import FeatureProducer
 8 | 
 9 | 
10 | @pytest.fixture
11 | def img_data():
12 |     DATA = [[[  0,   0,   0],
13 |              [255, 255, 255],
14 |              [  0,   0,   0]],
15 |             [[255, 255, 255],
16 |              [  0,   0,   0],
17 |              [255, 255, 255]],
18 |             [[  0,   0,   0],
19 |              [255, 255, 255],
20 |              [  0,   0,   0]]]
21 |     return np.array(DATA, dtype=np.uint8)
22 | 
23 | 
24 | @pytest.fixture
25 | def chip_producer(img_data):
26 |     Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"])
27 |     ChipProducer = collections.namedtuple("ChipProducer", ["chips"])
28 |     CHIPS = (
29 |         # filepath, car_id, cam_id, time, img_data, misc
30 |         ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100), img_data, {}),
31 |     )
32 | 
33 |     chip_producer = ChipProducer({})
34 |     for filepath, car_id, cam_id, time, img_data, misc in CHIPS:
35 |         print(img_data.shape)
36 |         chip = Chip(filepath, car_id, cam_id, time, img_data, misc)
37 |         chip_producer.chips[filepath] = chip
38 | 
39 |     return chip_producer
40 | 
41 | 
42 | @pytest.fixture
43 | def monkey_feature_producer(chip_producer):
44 |     # Monkey patch the __init__() function so that it will succeed
45 |     def new_init(self, chip_producer):
46 |         self.chip_producer = chip_producer
47 |         self.feat_size = 1
48 | 
49 |     FeatureProducer.__init__ = new_init
50 | 
51 |     return FeatureProducer(chip_producer)
52 | 
53 | 
54 | def test_set_variables_raises():
55 |     with pytest.raises(NotImplementedError):
56 |         fp = FeatureProducer(None)
57 | 
58 | 
59 | def test_produce_features_raises(monkey_feature_producer):
60 |     with pytest.raises(NotImplementedError):
61 |         monkey_feature_producer.produce_features(None)
62 | 
63 | 
64 | def test_get_image_img_data(monkey_feature_producer, chip_producer, img_data):
65 |     for key, chip in chip_producer.chips.items():
66 |         image = monkey_feature_producer.get_image(chip)
67 |         image_array = np.array(image)
68 |         assert np.array_equal(img_data, np.array(image))
69 | 
70 | 
71 | def test_return_features_raises(monkey_feature_producer):
72 |     with pytest.raises(NotImplementedError):
73 |         monkey_feature_producer.return_features()
74 | 


--------------------------------------------------------------------------------
/testci/test_hog_feature.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | from skimage import color
  3 | from skimage.feature import hog
  4 | import collections
  5 | import datetime
  6 | import numpy as np
  7 | import pytest
  8 | from itertools import product
  9 | 
 10 | from pelops.features.hog import HOGFeatureProducer
 11 | 
 12 | def hog_features(img):
 13 |     img = color.rgb2gray(np.array(img))
 14 |     features = hog(img, orientations=8, pixels_per_cell=(14, 14), cells_per_block=(16, 16))
 15 |     return features
 16 | 
 17 | 
 18 | def hist_features(img):
 19 |         MAX_CHANNELS = 3
 20 |         BINS = 256
 21 | 
 22 |         channels = img.split()
 23 | 
 24 |         # Remove alpha channels
 25 |         if len(channels) > MAX_CHANNELS:
 26 |             channels = channel[:MAX_CHANNELS]
 27 | 
 28 |         # Calculate features
 29 |         hist_features = np.zeros(MAX_CHANNELS * BINS)
 30 |         for i, channel in enumerate(channels):
 31 |             channel_array = np.array(channel)
 32 |             values, _ = np.histogram(channel_array.flat, bins=BINS)
 33 |             start = i * BINS
 34 |             end = (i+1) * BINS
 35 |             hist_features[start:end] = values
 36 | 
 37 |         return hist_features
 38 | 
 39 | 
 40 | @pytest.fixture(scope="module")
 41 | def img_data():
 42 |     data = {
 43 |         "DATA_1":{},
 44 |         "DATA_3":{},
 45 |         "DATA_4":{},
 46 |     }
 47 | 
 48 |     # Raw data
 49 |     data["DATA_1"]["array"] = np.array([
 50 |         [[  0,   0,   0],
 51 |          [255, 255, 255],
 52 |          [  0,   0,   0]],
 53 |     ], dtype=np.uint8)
 54 | 
 55 |     data["DATA_3"]["array"] = np.array([
 56 |         [[  0,   0,   0],
 57 |          [255, 255, 255],
 58 |          [  0,   0,   0]],
 59 |         [[255, 255, 255],
 60 |          [  0,   0,   0],
 61 |          [255, 255, 255]],
 62 |         [[  0,   0,   0],
 63 |          [255, 255, 255],
 64 |          [  0,   0,   0]],
 65 |     ], dtype=np.uint8)
 66 | 
 67 |     data["DATA_4"]["array"] = np.array([
 68 |         [[  0,   0,   0],
 69 |          [255, 255, 255],
 70 |          [  0,   0,   0]],
 71 |         [[255, 255, 255],
 72 |          [  0,   0,   0],
 73 |          [255, 255, 255]],
 74 |         [[  0,   0,   0],
 75 |          [255, 255, 255],
 76 |          [  0,   0,   0]],
 77 |         [[  0,   0,   0],
 78 |          [  0,   0,   0],
 79 |          [  0,   0,   0]],
 80 |     ], dtype=np.uint8)
 81 | 
 82 |     # PIL images
 83 |     for data_id in data:
 84 |         arr = data[data_id]["array"]
 85 |         img = Image.fromarray(arr)
 86 |         img = img.convert("RGB")
 87 |         img = img.resize((224, 224), Image.BICUBIC)
 88 |         data[data_id]["image"] = img
 89 | 
 90 |     # Calculate HOG features
 91 |     for data_id in data:
 92 |         img = data[data_id]["image"]
 93 |         hog = hog_features(img)
 94 |         data[data_id]["hog_features"] = hog
 95 | 
 96 |     # Calculate Histogram features
 97 |     for data_id in data:
 98 |         img = data[data_id]["image"]
 99 |         hist = hist_features(img)
100 |         data[data_id]["hist_features"] = hist
101 | 
102 |     return data
103 | 
104 | 
105 | @pytest.fixture
106 | def chip_producer(img_data):
107 |     Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"])
108 |     CHIPS = []
109 |     for i, data_id in enumerate(img_data):
110 |         data = img_data[data_id]
111 |         arr = data["array"]
112 |         # We use the data_id as the filepath since we do not actually open the
113 |         # file and it only needs to be unique
114 |         #
115 |         # filepath, car_id, cam_id, time, img_data, misc
116 |         chip = (data_id, i, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100+i), arr, {})
117 |         CHIPS.append(chip)
118 | 
119 |     chip_producer = {"chips": {}}
120 |     for filepath, car_id, cam_id, time, data, misc in CHIPS:
121 |         chip = Chip(filepath, car_id, cam_id, time, data, misc)
122 |         chip_producer["chips"][filepath] = chip
123 | 
124 |     return chip_producer
125 | 
126 | 
127 | @pytest.fixture
128 | def feature_producer(chip_producer):
129 |     hog = HOGFeatureProducer(chip_producer)
130 | 
131 |     return hog
132 | 
133 | 
134 | def test_features(feature_producer, chip_producer, img_data):
135 |     fp = feature_producer
136 | 
137 |     for _, chip in chip_producer["chips"].items():
138 |         data_id = chip.filepath
139 |         data = img_data[data_id]
140 |         hog_features = data["hog_features"]
141 |         hist_features = data["hist_features"]
142 |         hog_len = len(hog_features)
143 |         hist_len = len(hist_features)
144 | 
145 |         features = feature_producer.produce_features(chip)
146 |         assert len(features) == hog_len + hist_len
147 | 
148 |         total_features = np.concatenate((hog_features, hist_features))
149 |         assert np.array_equal(features, total_features)
150 | 
151 | 
152 | def test_inputs(chip_producer):
153 |     pix_sizes = (32, 64, 128, 256, 512)
154 |     cell_counts = (1, 2, 4, 16)
155 |     orientation_counts = (2, 4, 8, 16)
156 |     histogram_bins = (32, 64, 128, 256)
157 |     for pix, cell, orientation, histogram_bin in product(pix_sizes, cell_counts, orientation_counts, histogram_bins):
158 |         hog = HOGFeatureProducer(
159 |             chip_producer,
160 |             image_size=(pix, pix),
161 |             cells=(cell, cell),
162 |             orientations=orientation,
163 |             histogram_bins_per_channel=histogram_bin,
164 |         )
165 |         for _, chip in chip_producer["chips"].items():
166 |             features = hog.produce_features(chip)
167 |             assert len(features) == ((cell**2) * orientation) + (3 * histogram_bin)
168 | 


--------------------------------------------------------------------------------
/testci/test_keras_load_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from pelops.features.keras_model import KerasModelFeatureProducer
 6 | 
 7 | 
 8 | def test_load_model_workaround():
 9 |     # @TODO get some environment variable set when in CI environment
10 |     # test to see, modify path...
11 |     if os.getenv('CIRCLECI', None) is not None:
12 |         model_filename = '/home/ubuntu/pelops/testci/small.json'
13 |         weight_filename = '/home/ubuntu/pelops/testci/small.hdf5'
14 |     if os.getenv('INDOCKERCONTAINER', None) is not None:
15 |         model_filename = '/pelops_root/testci/small.json'
16 |         weight_filename = '/pelops_root/testci/small.hdf5'
17 | 
18 |     model = KerasModelFeatureProducer.load_model_workaround(
19 |         model_filename, weight_filename)
20 |     assert model.layers[0].name == 'dense_8'
21 | 


--------------------------------------------------------------------------------
/testci/test_keras_model_feature.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import collections
 3 | import datetime
 4 | import numpy as np
 5 | import pytest
 6 | 
 7 | from pelops.features.keras_model import KerasModelFeatureProducer
 8 | 
 9 | 
10 | @pytest.fixture
11 | def img_data():
12 |     DATA = [[[  0,   0,   0],
13 |              [255, 255, 255],
14 |              [  0,   0,   0]],
15 |             [[255, 255, 255],
16 |              [  0,   0,   0],
17 |              [255, 255, 255]],
18 |             [[  0,   0,   0],
19 |              [255, 255, 255],
20 |              [  0,   0,   0]]]
21 |     return np.array(DATA, dtype=np.uint8)
22 | 
23 | 
24 | def test_preprocess_image(img_data):
25 |     img = Image.fromarray(img_data)
26 |     img_resized = KerasModelFeatureProducer.preprocess_image(img, 224, 224)
27 |     assert img_resized.shape == (1, 224, 224, 3)
28 | 


--------------------------------------------------------------------------------
/testci/test_resnet50_feature.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import collections
 3 | import datetime
 4 | import numpy as np
 5 | import pytest
 6 | 
 7 | from pelops.features.resnet50 import ResNet50FeatureProducer
 8 | 
 9 | 
10 | @pytest.fixture
11 | def img_data():
12 |     DATA = [[[  0,   0,   0],
13 |              [255, 255, 255],
14 |              [  0,   0,   0]],
15 |             [[255, 255, 255],
16 |              [  0,   0,   0],
17 |              [255, 255, 255]],
18 |             [[  0,   0,   0],
19 |              [255, 255, 255],
20 |              [  0,   0,   0]]]
21 |     return np.array(DATA, dtype=np.uint8)
22 | 
23 | 
24 | @pytest.fixture
25 | def chip_producer(img_data):
26 |     Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"])
27 |     CHIPS = (
28 |         # filepath, car_id, cam_id, time, img_data, misc
29 |         ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100), img_data, {}),
30 |     )
31 | 
32 |     chip_producer = {"chips": {}}
33 |     for filepath, car_id, cam_id, time, img_data, misc in CHIPS:
34 |         chip = Chip(filepath, car_id, cam_id, time, img_data, misc)
35 |         chip_producer["chips"][filepath] = chip
36 | 
37 |     return chip_producer
38 | 
39 | 
40 | @pytest.fixture
41 | def feature_producer(chip_producer):
42 |     res = ResNet50FeatureProducer(chip_producer)
43 |     return res
44 | 
45 | 
46 | def test_features(feature_producer, chip_producer):
47 |     for _, chip in chip_producer["chips"].items():
48 |         features = feature_producer.produce_features(chip)
49 |         assert features.shape == (1, 2048)
50 |         assert np.sum(features) != 0
51 | 
52 | 
53 | def test_preprocess_image(feature_producer, img_data):
54 |     img = Image.fromarray(img_data)
55 |     img_resized = feature_producer.preprocess_image(img, 224, 224)
56 |     assert img_resized.shape == (1, 224, 224, 3)
57 | 


--------------------------------------------------------------------------------
/testci/test_slice.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import datetime
  3 | import io
  4 | 
  5 | import pytest
  6 | 
  7 | import pelops.datasets.slice as slice
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def slice_env(tmpdir):
 12 |     """Setup mock STR SLiCE dataset"""
 13 |     work_dir = tmpdir.mkdir('pelops_testing')
 14 |     truth = [
 15 |         ['% obSetIdx', ' chipIdx', ' targetID'],
 16 |         ['1', ' 1', '0'],
 17 |         ['1', ' 2', '1'],
 18 |         ['1', ' 3', '0'],
 19 |         ['2', ' 1', '1'],
 20 |         ['100', ' 1', '2'],
 21 |     ]
 22 | 
 23 |     truth_file = work_dir.join('truth.txt')
 24 |     with io.StringIO(newline='') as truth_hdl:
 25 |         csv.writer(truth_hdl).writerows(truth)
 26 |         truth_hdl.seek(0)
 27 |         truth_file.write(truth_hdl.read())
 28 | 
 29 |     for obset, chipid in {(row[0], row[1].strip()) for row in truth[1:]}:
 30 |         obset_dir = work_dir.join('ObSet00{}_1492560663_TestDir'.format(obset))
 31 |         obset_dir.ensure(dir=True)
 32 |         img_dir = obset_dir.join('images')
 33 |         img_dir.ensure(dir=True)
 34 |         img_file = img_dir.join('ObSet001-00{}.png'.format(chipid))
 35 |         img_file.ensure(dir=False)
 36 | 
 37 |     yield work_dir.strpath
 38 | 
 39 | 
 40 | def test_slice_chip_load(slice_env):
 41 |     """Test that SLiCE chips load without error"""
 42 |     slice_dataset = slice.SliceDataset(slice_env)
 43 |     assert len(slice_dataset.chips) == 5
 44 | 
 45 | 
 46 | def test_slice_chip_tgt_car_id(slice_env):
 47 |     """Test that SLiCE chips for target vehicles are processed properly."""
 48 |     slice_dataset = slice.SliceDataset(slice_env)
 49 |     target_ids = [chip.car_id for chip in slice_dataset.chips.values() if chip.car_id.startswith('tgt-')]
 50 |     assert 'tgt-000000001' in target_ids
 51 |     assert len(target_ids) == 3
 52 |     assert len(set(target_ids)) == 2
 53 | 
 54 | 
 55 | def test_slice_chip_unk_car_id(slice_env):
 56 |     """Test that SLiCE chips for non-target vehicles are processed properly."""
 57 |     slice_dataset = slice.SliceDataset(slice_env)
 58 |     unk_ids = [chip.car_id for chip in slice_dataset.chips.values() if chip.car_id.startswith('unk-')]
 59 |     assert 'unk-000000001' in unk_ids
 60 |     assert len(unk_ids) == 2
 61 | 
 62 | 
 63 | def test_slice_chip_dtg(slice_env):
 64 |     """Test that date/times encoded in filenames are processed properly."""
 65 |     slice_dataset = slice.SliceDataset(slice_env)
 66 |     dtgs = {datetime.datetime.fromtimestamp(float(chip.time)).isoformat() for chip in slice_dataset.chips.values()}
 67 |     assert len(dtgs) == 1
 68 | 
 69 | 
 70 | def test_slice_index_chip():
 71 |     TRUTH = (
 72 |         # STR like chip
 73 |         (
 74 |             "ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
 75 |             (
 76 |                 (9, 14),
 77 |                 {
 78 |                     'file': "ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
 79 |                     'meta': {
 80 |                         'obSetName': "IH37_Jones",
 81 |                         'epoch': "1473015765",
 82 |                     },
 83 |                 },
 84 |             ),
 85 |         ),
 86 |         # STR like chip
 87 |         (
 88 |             "/root/data/stuff/ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
 89 |             (
 90 |                 (9, 14),
 91 |                 {
 92 |                     'file': "/root/data/stuff/ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
 93 |                     'meta': {
 94 |                         'obSetName': "IH37_Jones",
 95 |                         'epoch': "1473015765",
 96 |                     },
 97 |                 },
 98 |             ),
 99 |         ),
100 |         # SLICE like chip
101 |         (
102 |             "ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
103 |             (
104 |                 (101, 1),
105 |                 {
106 |                     'file': "ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
107 |                     'meta': {
108 |                         'obSetName': "day5_camera3",
109 |                         'epoch': "1473101743",
110 |                     },
111 |                 },
112 |             ),
113 |         ),
114 |         # SLICE like chip
115 |         (
116 |             "/test/test/data/ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
117 |             (
118 |                 (101, 1),
119 |                 {
120 |                     'file': "/test/test/data/ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
121 |                     'meta': {
122 |                         'obSetName': "day5_camera3",
123 |                         'epoch': "1473101743",
124 |                     },
125 |                 },
126 |             ),
127 |         ),
128 |         # Special cases
129 |         ("/test/test/truth.txt", None),
130 |         ("/test/masks/image_mask.png", None),
131 |     )
132 | 
133 |     for file_path, answer in TRUTH:
134 |         assert answer == slice.SliceDataset.index_chip(file_path)
135 | 


--------------------------------------------------------------------------------
/testci/test_str.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | 
  4 | from pelops.datasets.str import get_sa_cam_id
  5 | from pelops.datasets.str import get_sa_car_id
  6 | from pelops.datasets.str import int_from_string
  7 | from pelops.datasets.str import StrDataset
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def str_sa(tmpdir):
 12 |     """ Set up some test files and an instance of StrDataset(). """
 13 |     # Write a file to read back
 14 |     FILE_NAMES = (
 15 |         # filepath, car_id, cam_id, time, misc
 16 |         ("match00001_cam02.png", 1, 2, None, None),
 17 |         ("match00001_cam01_mask.png", None, None, None, None),
 18 |         ("match00010_cam01.png", 10, 1, None, None),
 19 |         ("match00011_cam02_mask.png", None, None, None, None)
 20 |     )
 21 |     # The contents of the files do not matter, the name is enough
 22 |     internal_dir = tmpdir.mkdir("crossCameraMatches")
 23 |     for name, _, _, _, _ in FILE_NAMES:
 24 |         out_file = internal_dir.join(name)
 25 |         out_file.write("TEST")
 26 | 
 27 |     # Setup the class
 28 |     instantiated_class = StrDataset(os.path.dirname(out_file.dirname))
 29 | 
 30 |     # Rename filepath
 31 |     FILE_NAMES = (
 32 |         (os.path.join(out_file.dirname, "match00001_cam02.png"), 1, 2, None, None),
 33 |         (os.path.join(out_file.dirname, "match00001_cam01_mask.png"), None, None, None, None),
 34 |         (os.path.join(out_file.dirname, "match00010_cam01.png"), 10, 1, None, None),
 35 |         (os.path.join(out_file.dirname, "match00011_cam02_mask.png"), None, None, None, None)
 36 |     )
 37 | 
 38 |     # Filter out the files that were not read
 39 |     RET_FILE_NAMES = tuple(t for t in FILE_NAMES if t[1] is not None)
 40 |     return (instantiated_class, RET_FILE_NAMES)
 41 | 
 42 | 
 43 | def test_str_sa_chips_len(str_sa):
 44 |     """ Test that StrDataset.chips is the correct length """
 45 |     instantiated_class = str_sa[0]
 46 |     FILE_NAMES = str_sa[1]
 47 |     # check that self.chips has been created, is not empty, and has the right
 48 |     # number of entries
 49 |     assert len(FILE_NAMES)
 50 |     assert len(FILE_NAMES) == len(instantiated_class.chips)
 51 | 
 52 | 
 53 | def test_str_sa_chips_vals(str_sa):
 54 |     """ Test that StrDataset chips have the correct values. """
 55 |     instantiated_class = str_sa[0]
 56 |     FILE_NAMES = str_sa[1]
 57 | 
 58 |     # Check that the correct chips exist
 59 |     for filepath, car_id, cam_id, time, misc in FILE_NAMES:
 60 |         chip = instantiated_class.chips[filepath]
 61 |         assert car_id == chip.car_id
 62 |         assert cam_id == chip.cam_id
 63 |         # No time data
 64 |         assert chip.time is None
 65 |         # No misc data
 66 |         assert chip.misc is None
 67 |         # Filepath should be filled
 68 |         assert chip.filepath
 69 | 
 70 | 
 71 | def test_get_all_chips_by_car_id(str_sa):
 72 |     """ Test StrDataset.get_all_chips_by_car_id() """
 73 |     instantiated_class = str_sa[0]
 74 |     FILE_NAMES = str_sa[1]
 75 | 
 76 |     seen_ids = []
 77 |     for filepath, car_id, cam_id, time, misc in FILE_NAMES:
 78 |         # Generate all the chips by hand, and compare
 79 |         if car_id in seen_ids:
 80 |             continue
 81 |         seen_ids.append(car_id)
 82 |         chips = []
 83 |         for key, val in instantiated_class.chips.items():
 84 |             if val.car_id == car_id:
 85 |                 chips.append(val)
 86 | 
 87 |         chips.sort()
 88 |         test_chips = sorted(instantiated_class.get_all_chips_by_car_id(car_id))
 89 |         assert chips == test_chips
 90 | 
 91 | 
 92 | def test_get_all_chips_by_cam_id(str_sa):
 93 |     """ Test StrDataset.get_all_chips_by_cam_id() """
 94 |     instantiated_class = str_sa[0]
 95 |     FILE_NAMES = str_sa[1]
 96 | 
 97 |     seen_ids = []
 98 |     for filepath, car_id, cam_id, time, misc in FILE_NAMES:
 99 |         # Generate all the chips by hand, and compare
100 |         if cam_id in seen_ids:
101 |             continue
102 |         seen_ids.append(cam_id)
103 |         chips = []
104 |         for key, val in instantiated_class.chips.items():
105 |             if val.cam_id == cam_id:
106 |                 chips.append(val)
107 | 
108 |         chips.sort()
109 |         test_chips = sorted(instantiated_class.get_all_chips_by_cam_id(cam_id))
110 |         assert chips == test_chips
111 | 
112 | 
113 | def test_get_distinct_cams_by_car_id(str_sa):
114 |     """ Test StrDataset.get_distinct_cams_by_car_id() and get_distinct_cams_per_car """
115 |     instantiated_class = str_sa[0]
116 |     CAR_ID = 1
117 |     TEST_CAMS = [2]
118 |     for test_cam, cam in zip(TEST_CAMS, sorted(instantiated_class.get_distinct_cams_by_car_id(CAR_ID))):
119 |         assert test_cam == cam
120 | 
121 | def test_get_all_cam_ids(str_sa):    
122 |     """ Test StrDataset.get_distinct_cams_by_car_id() """
123 |     instantiated_class = str_sa[0]
124 |     TEST_CAMS = [1, 2]
125 |     for test_cam, cam in zip(TEST_CAMS, sorted(instantiated_class.get_all_cam_ids())):
126 |         assert test_cam == cam
127 | 
128 | def test_get_all_car_ids(str_sa):
129 |     """ Test StrDataset.get_distinct_cams_by_car_id() """
130 |     instantiated_class = str_sa[0]
131 |     TEST_CARS = [1, 10]
132 |     for test_car, car in zip (TEST_CARS, sorted(instantiated_class.get_all_car_ids())):
133 |         assert test_car == car
134 | 
135 | 
136 | def test_str_sa_iter(str_sa):
137 |     """ Test StrDataset.__iter__() """
138 |     instantiated_class = str_sa[0]
139 |     FILE_NAMES = str_sa[1]
140 |     chip_ids = tuple(i for i, _, _, _, _ in FILE_NAMES)
141 | 
142 |     for chip in instantiated_class:
143 |         assert chip.filepath in chip_ids
144 | 
145 | 
146 | def test_int_from_string():
147 |     """ Test int_from_string() """
148 |     TEST_STRINGS = (
149 |         # String, Args, Answer
150 |         ("test_010_test",                     ("test_", 3), 10),
151 |         ("test_010_test",                     ("FAIL_", 3), None),
152 |         ("test_010",                          ("test_", 3), 10),
153 |         ("test_11_test",                      ("test_", 2), 11),
154 |         ("010_test",                          ("",      3), 10),
155 |         ("/foo/bar/bass/test_/test_010_test", ("test_", 3), 10),
156 |     )
157 | 
158 |     for test_string, args, answer in TEST_STRINGS:
159 |         assert answer == int_from_string(test_string, args[0], args[1])
160 | 
161 | 
162 | def test_get_sa_cam_id():
163 |     """ Test get_sa_cam_id() """
164 |     TEST_STRINGS = (
165 |         # String, Answer
166 |         ("match00001_cam02.png",      2),
167 |         ("match00001_cam01_mask.png", 1),
168 |         ("match00010_cam01.png",      1),
169 |         ("match00011_cam02_mask.png", 2),
170 |     )
171 | 
172 |     for test_string, answer in TEST_STRINGS:
173 |         assert answer == get_sa_cam_id(test_string)
174 | 
175 | 
176 | def test_get_sa_car_id():
177 |     """ Test get_sa_car_id() """
178 |     TEST_STRINGS = (
179 |         # String, Answer
180 |         ("match00001_cam02.png",      1),
181 |         ("match00001_cam01_mask.png", 1),
182 |         ("match00010_cam01.png",      10),
183 |         ("match00011_cam02_mask.png", 11),
184 |     )
185 | 
186 |     for test_string, answer in TEST_STRINGS:
187 |         assert answer == get_sa_car_id(test_string)
188 | 


--------------------------------------------------------------------------------