├── .coveragerc
├── .gitignore
├── .pre-commit-config.yaml
├── AUTHORS
├── CHANGELOG.md
├── LICENSE
├── README.md
├── VERSION
├── circle.yml
├── docker
├── Dockerfile.base
├── Dockerfile.csv2json
├── Dockerfile.images2vecs
├── Dockerfile.notebook
├── Dockerfile.rankDirectories
├── Dockerfile.test
├── Dockerfile.vectorSiamese
└── pelops_start.sh
├── docs
└── chips_to_features.md
├── etl
├── compareDirectory2Directory.py
├── convertCsvToJson.py
├── makeFeaturesResNet50.py
├── makeFeaturesTopSiamese.py
└── makeFeaturesYOURMODEL.py
├── maintainers.md
├── makefile
├── misc
├── pelops.png
└── pelops.svg
├── pelops
├── __init__.py
├── analysis
│ ├── CMC_Confidence.ipynb
│ ├── CameraVsCamera.ipynb
│ ├── MakeChips.ipynb
│ ├── SVMBinaryCarMatch.ipynb
│ ├── ScoreChips.ipynb
│ ├── __init__.py
│ ├── analysis.py
│ ├── camerautil.py
│ ├── colormakemodel_dataset_maker.ipynb
│ ├── comparecameras.py
│ ├── isFileImage.ipynb
│ ├── labelImageCars.ipynb
│ ├── makeCMCplots.ipynb
│ ├── makeFeatureFiles-TEST.ipynb
│ ├── makeFeatureFiles.ipynb
│ ├── makeFeaturesResNet50.ipynb
│ ├── makeSiameseCMC.ipynb
│ ├── makeVeri.py
│ ├── recomputeCorpus.ipynb
│ ├── saveExtractFeatsFromChips.ipynb
│ ├── siamese.ipynb
│ ├── siameseModelIterator-15.py
│ ├── siameseModelIterator.ipynb
│ ├── splitDataset.ipynb
│ ├── test_analysis.py
│ └── unsorted
│ │ ├── __init__.py
│ │ ├── makeH5pyFile.ipynb
│ │ └── recompute
│ │ ├── __init__.py
│ │ ├── compute.py
│ │ └── extract_feats_from_chips.py
├── const.py
├── datasets
│ ├── __init__.py
│ ├── chip.py
│ ├── chipper.py
│ ├── compcar.py
│ ├── dgcars.py
│ ├── featuredataset.py
│ ├── slice.py
│ ├── str.py
│ └── veri.py
├── etl
│ ├── __init__.py
│ ├── computeMatrixCMC.py
│ ├── json2h5.py
│ ├── makeDistMatrix.py
│ ├── makeFeaturesResNet50.py
│ └── veriFileList2Json.py
├── experiment_api
│ ├── __init__.py
│ ├── experiment.py
│ ├── metric.py
│ └── run_metric.sh
├── features
│ ├── feature_producer.py
│ ├── hog.py
│ ├── keras_model.py
│ └── resnet50.py
├── models
│ ├── __init__.py
│ └── makesvm.py
├── training
│ ├── CNN Retrainer.ipynb
│ ├── Debug CNN Retrainer.ipynb
│ ├── cnn_retrainer.py
│ └── utils.py
├── transform_img
│ ├── __init__.py
│ ├── run.sh
│ └── transform.py
└── utils.py
├── requirements.txt
├── setup.py
└── testci
├── install.sh
├── small.hdf5
├── small.json
├── test_chip.py
├── test_chipper.py
├── test_compcar.py
├── test_dgcars.py
├── test_experiment_utils.py
├── test_featuredataset.py
├── test_featureproducer.py
├── test_hog_feature.py
├── test_keras_load_model.py
├── test_keras_model_feature.py
├── test_resnet50_feature.py
├── test_slice.py
├── test_str.py
├── test_training_utils.py
└── test_veri.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 |
4 | [report]
5 | # Regexes for lines to exclude from consideration
6 | exclude_lines =
7 | # Have to re-enable the standard pragma
8 | pragma: no cover
9 |
10 | # Don't complain about missing debug-only code:
11 | def __repr__
12 | if self\.debug
13 |
14 | # Don't complain if tests don't hit defensive assertion code:
15 | raise AssertionError
16 | raise NotImplementedError
17 |
18 | # Don't complain if non-runnable code isn't run:
19 | if 0:
20 | if __name__ == .__main__.:
21 |
22 | ignore_errors = True
23 |
24 | [html]
25 | directory = coverage_html_report
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | venv/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 |
56 | # Sphinx documentation
57 | docs/_build/
58 |
59 | # PyBuilder
60 | target/
61 |
62 | #PyCharm
63 | .idea
64 |
65 | #Ipython Notebook
66 | .ipynb_checkpoints
67 |
68 | # swap files
69 | *.swp
70 |
71 | # OSX crap
72 | .DS_Store
73 |
74 | # pickled models
75 | **/*.pickle
76 |
77 | #other crap
78 | **/.ropeproject
79 | checkscript.sh
80 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | - repo: https://github.com/pre-commit/pre-commit
2 | sha: v0.9.4
3 | hooks:
4 | - id: validate_config
5 | - repo: git@github.com:pre-commit/pre-commit-hooks
6 | sha: v0.6.1
7 | hooks:
8 | - id: autopep8-wrapper
9 | - id: check-case-conflict
10 | - id: check-json
11 | - id: check-merge-conflict
12 | - id: check-symlinks
13 | - id: check-yaml
14 | - id: end-of-file-fixer
15 | - id: pretty-format-json
16 | args:
17 | - --autofix
18 | - id: trailing-whitespace
19 | - repo: git@github.com:asottile/reorder_python_imports
20 | sha: v0.3.0
21 | hooks:
22 | - id: reorder-python-imports
23 | - repo: git@github.com:Lab41/verboten_words.git
24 | sha: v1.0.0
25 | hooks:
26 | - id: verboten-words
27 |
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | # This file lists all individuals having contributed content to the repository.
2 | # If you're submitting a patch, please add your name here in alphabetical order as part of the patch.
3 | #
4 | # For a list of active project maintainers, see the MAINTAINERS file.
5 | #
6 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 0.1.0-dev (current, unreleased)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pelops
2 |
3 | [](https://circleci.com/gh/Lab41/pelops)[](https://codecov.io/gh/Lab41/pelops)
4 |
5 |
6 |
7 |
8 | Pelops is a project by [Lab41](http://www.lab41.org/) that uses deep learning
9 | based methods to automatically identify cars by using their large scale
10 | features—color, shape, light configuration, etc.
11 |
12 | ## Install Instructions
13 |
14 | Pelops provides several Docker containers the assist in running the project.
15 | You can build them by checking out the code and running make:
16 |
17 | ```bash
18 | git clone https://github.com/Lab41/pelops.git
19 | cd pelops
20 | make
21 | ```
22 |
23 | Then:
24 |
25 | ```bash
26 | make notebook
27 | ```
28 |
29 | Which will run a container containing Pelops and a notebook server.
30 |
31 | Otherwise you can install Pelops using `pip`:
32 |
33 | ```bash
34 | git clone https://github.com/Lab41/pelops.git
35 | pip install pelops
36 | ```
37 |
38 | There are several dependencies that will need to be installed. The
39 | [`requirements.txt`](requirements.txt) should include most of them, but other
40 | programs such as [keras](https://keras.io/) and
41 | [Tensorflow](https://www.tensorflow.org/) are also required. For this reason
42 | it is suggested to use the notebook container to run Pelops.
43 |
44 | ## Documentation
45 |
46 | - [Turning Chips into features](docs/chips_to_features.md)
47 |
48 | ## Tests
49 |
50 | Tests are currently written in [pytest](https://docs.pytest.org/en/latest/). The tests are automatically run when submitting pull requests.
51 |
52 | You can run the tests in a container by calling:
53 |
54 | ```bash
55 | make test
56 | ```
57 |
58 | This will build a docker container, mount your local version of the code, and
59 | run the tests.
60 |
61 | ## Contributing to Pelops
62 |
63 | Want to contribute? Awesome!
64 |
65 | Please make sure you have [`pre-commit`](http://pre-commit.com/) installed so
66 | that your code is checked for various issues.
67 |
68 | After that, send us a pull request! We're happy to review them!
69 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | v0.1.0-dev
2 |
--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
1 | # Controls the build environment
2 | machine:
3 | python:
4 | version: 3.5.2
5 | environment:
6 | PATH: /home/ubuntu/miniconda3/bin:$PATH
7 |
8 | dependencies:
9 | override:
10 | # Moving to nilearn directory before performing the installation.
11 | - cd ~/pelops
12 | - source testci/install.sh:
13 | environment:
14 | DISTRIB: "conda"
15 | PYTHON_VERSION: "3.5"
16 | NUMPY_VERSION: "*"
17 | SCIPY_VERSION: "*"
18 | SCIKIT_LEARN_VERSION: "*"
19 | MATPLOTLIB_VERSION: "*"
20 | - conda install -y opencv hdfs3
21 | - conda install -y pytest pytest-cov pillow h5py scipy scikit-image
22 | - /home/ubuntu/miniconda3/bin/pip install imageio
23 | - /home/ubuntu/miniconda3/bin/pip install tensorflow==0.12.* git+git://github.com/fchollet/keras.git@2ad3544b017fe9c0d7a25ef0640baa52281372b5
24 |
25 | # Set up the commands to run as a test (override), as well as the commands to
26 | # run before (pre) and after (post).
27 | test:
28 | pre:
29 | - mkdir -p $CIRCLE_TEST_REPORTS/junit/
30 | override:
31 | # Test installation via pip
32 | - cd /home/ubuntu/pelops && /home/ubuntu/miniconda3/bin/pip install .
33 | # Test importing installed package
34 | - /home/ubuntu/miniconda3/bin/python3 -c "import pelops; import pelops.datasets"
35 | # Run pytest tests
36 | - cd /home/ubuntu/pelops && /home/ubuntu/miniconda3/bin/python3 -m pytest -v --cov --cov-report=term-missing:skip-covered --junitxml=$CIRCLE_TEST_REPORTS/junit/junit_output.xml
37 | post:
38 | - bash <(curl -s https://codecov.io/bash) -t 08234947-61d0-48ea-b0f0-1c82d3f2dfd7
39 |
--------------------------------------------------------------------------------
/docker/Dockerfile.base:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
2 |
3 | MAINTAINER Lab41
4 |
5 | RUN apt-get update && \
6 | apt-get install -y \
7 | bzip2 \
8 | ca-certificates \
9 | git \
10 | libglib2.0-0 \
11 | libsm6 \
12 | libxext6 \
13 | libxrender1 \
14 | wget
15 |
16 | #Configure environment
17 | ENV CONDA_DIR=/opt/conda \
18 | # 4.2.12 is the last version with Python3.5, which we need
19 | MINICONDA_SCRIPT=Miniconda3-4.2.12-Linux-x86_64.sh \
20 | MINICONDA_SHA=c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a
21 |
22 | # Install conda
23 | RUN cd /tmp && \
24 | mkdir -p $CONDA_DIR && \
25 | wget --quiet https://repo.continuum.io/miniconda/${MINICONDA_SCRIPT} && \
26 | echo "${MINICONDA_SHA} ${MINICONDA_SCRIPT}" | sha256sum -c - && \
27 | /bin/bash ${MINICONDA_SCRIPT} -f -b -p $CONDA_DIR && \
28 | rm ${MINICONDA_SCRIPT}
29 |
30 | RUN $CONDA_DIR/bin/conda install --quiet --yes \
31 | 'conda-build=2.1.*' \
32 | 'cython=0.24*' \
33 | 'h5py=2.6*' \
34 | 'hdfs3=0.1.*' \
35 | 'libhdfs3=2.2.*' \
36 | 'numpy=1.11*' \
37 | 'pillow=3.4*' \
38 | 'pytest=3.0.*' \
39 | 'python=3.5.*' \
40 | 'scikit-image=0.12*' \
41 | 'scikit-learn=0.18*' \
42 | && $CONDA_DIR/bin/conda clean -tipsy
43 |
44 | RUN $CONDA_DIR/bin/conda update pip --quiet --yes
45 |
46 | # Install Python packages
47 | ENV TENSORFLOW_VERSION=0.12.* \
48 | KERAS_VERSION=2ad3544b017fe9c0d7a25ef0640baa52281372b5
49 | RUN $CONDA_DIR/bin/pip install git+git://github.com/fchollet/keras.git@${KERAS_VERSION} \
50 | tensorflow==${TENSORFLOW_VERSION} \
51 | imageio
52 |
53 | ENV INDOCKERCONTAINER 1
54 |
55 | ADD . /pelops_root
56 | WORKDIR /pelops_root
57 | ENV PYTHONPATH=/pelops_root/pelops:$PYTHONPATH \
58 | PATH=/usr/local/cuda/bin:/usr/local/nvidia/bin:$CONDA_DIR/bin:$PATH
59 |
60 | # install dependencies of plugins for pelops
61 | RUN for file in $(find . -name "requirements.txt"); \
62 | do \
63 | $CONDA_DIR/bin/pip install -r $file; \
64 | done
65 |
--------------------------------------------------------------------------------
/docker/Dockerfile.csv2json:
--------------------------------------------------------------------------------
1 | FROM continuumio/anaconda3:4.3.1
2 |
3 | MAINTAINER Lab41
4 |
5 | RUN mkdir -p /pelops_root
6 | WORKDIR /pelops_root
7 | COPY . .
8 | RUN pip install --no-cache-dir -r requirements.txt
9 |
10 | CMD python3 -m etl.convertCsvToJson
--------------------------------------------------------------------------------
/docker/Dockerfile.images2vecs:
--------------------------------------------------------------------------------
1 | FROM l41-pelops-base
2 |
3 | MAINTAINER Lab41
4 |
5 | RUN mkdir INPUT_DIR
6 | RUN mkdir OUTPUT_DIR
7 | RUN mkdir MODEL_DIR
8 |
9 | CMD ["python", "/pelops_root/etl/makeFeaturesYOURMODEL.py","./INPUT_DIR","./OUTPUT_DIR"]
10 |
--------------------------------------------------------------------------------
/docker/Dockerfile.notebook:
--------------------------------------------------------------------------------
1 | FROM l41-pelops-base
2 |
3 | MAINTAINER Lab41
4 |
5 | # The startup script installs Pelops with pip from this directory
6 | RUN mkdir /pelops
7 | WORKDIR /pelops
8 |
9 | # Run a notebook
10 | EXPOSE 8888
11 |
12 | # Install Jupyter notebook
13 | RUN conda install --quiet --yes \
14 | 'notebook=4.1*' \
15 | && conda clean -tipsy
16 |
17 | ADD pelops_start.sh /
18 |
19 | CMD ["/pelops_start.sh"]
20 |
--------------------------------------------------------------------------------
/docker/Dockerfile.rankDirectories:
--------------------------------------------------------------------------------
1 | FROM l41-pelops-base
2 |
3 | MAINTAINER Lab41
4 |
5 |
6 | RUN mkdir INPUT_DIR1
7 | RUN mkdir INPUT_DIR2
8 | RUN mkdir MODEL_DIR
9 | RUN mkdir OUTPUT_DIR
10 |
11 | CMD ["python", "/pelops_root/etl/compareDirectory2Directory.py","./INPUT_DIR1","./INPUT_DIR2","./OUTPUT_DIR"]
12 |
--------------------------------------------------------------------------------
/docker/Dockerfile.test:
--------------------------------------------------------------------------------
1 | FROM l41-pelops-base
2 |
3 | MAINTAINER Lab41
4 |
5 | # Run the tests
6 | CMD ["python", "-m","pytest","-v","-s"]
7 |
--------------------------------------------------------------------------------
/docker/Dockerfile.vectorSiamese:
--------------------------------------------------------------------------------
1 | FROM l41-pelops-base
2 |
3 | MAINTAINER Lab41
4 |
5 | RUN mkdir INPUT_DIR1
6 | RUN mkdir INPUT_DIR2
7 | RUN mkdir MODEL_DIR
8 | RUN mkdir OUTPUT_DIR
9 |
10 | CMD ["python", "/pelops_root/etl/makeFeaturesTopSiamese.py","./INPUT_DIR1","./INPUT_DIR2","./OUTPUT_DIR"]
11 |
--------------------------------------------------------------------------------
/docker/pelops_start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (c) 2014, IPython: interactive computing in Python
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
27 | # Strict mode
28 | set -euo pipefail
29 |
30 | # Clone the repo
31 | git clone https://github.com/lab41/pelops /pelops
32 | pip install /pelops
33 |
34 | # Launch the notebook
35 | jupyter notebook --no-browser --port 8888 --ip=* --NotebookApp.token=
36 |
--------------------------------------------------------------------------------
/docs/chips_to_features.md:
--------------------------------------------------------------------------------
1 | # Turning Chips to Features
2 |
3 | 1. build the docker containers using make:
4 |
5 | ```bash
6 | make
7 | ```
8 |
9 | 2. map folders with images and and output directory, and run:
10 |
11 | ```bash
12 | CHIPDIR1=/folder/with/chips && \
13 | OUTPUTDIR=/folder/for/output && \
14 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR l41-pelops-i2v
15 | ```
16 |
17 | Note: Docker creates output files owned by root. Grant write privileges to OUTPUT_DIR for the current user and add `-u $(id -u $USER)` to the docker run command above to create output files owned by the current user.
18 |
19 | 3. Advanced, bring your own model:
20 |
21 | ```bash
22 | CHIPDIR1=/folder/with/chips && \
23 | OUTPUTDIR=/folder/for/output && \
24 | MODELDIR=/folder/with/models && \
25 | MODELFILE=name_of_model_file && \
26 | WEIGHTFILE=name_of_weight_file && \
27 | LAYERNAME=layername && \
28 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e LAYER="${LAYERNAME}" l41-pelops-i2v
29 | ```
30 |
31 | Run the Siamese model as follows:
32 |
33 | ```bash
34 | CHIPDIR1=/folder/with/chips && \
35 | CHIPDIR2=/folder/with/other/chips && \
36 | OUTPUTDIR=/folder/for/output && \
37 | MODELDIR=/folder/with/models && \
38 | MODELFILE=name_of_model_file.json && \
39 | WEIGHTFILE=name_of_weight_file.hdf5 && \
40 | VECTORFILE=name_of_VECTOR_file.json && \
41 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR1 -v ${CHIPDIR2}:/pelops_root/INPUT_DIR2 -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e VECTORS="/pelops_root/INPUT_DIR1/${VECTORFILE}" l41-pelops-siamese
42 | ```
43 |
44 | Run the Ranker to compare two directories as follows:
45 |
46 | ```bash
47 | CHIPDIR1=/folder/with/chips && \
48 | CHIPDIR2=/folder/with/other/chips && \
49 | OUTPUTDIR=/folder/for/output && \
50 | MODELDIR=/folder/with/models && \
51 | MODELFILE=name_of_model_file.json && \
52 | WEIGHTFILE=name_of_weight_file.hdf5 && \
53 | LAYERNAME=layername && \
54 | docker run -v ${CHIPDIR1}:/pelops_root/INPUT_DIR1 -v ${CHIPDIR2}:/pelops_root/INPUT_DIR2 -v ${OUTPUTDIR}:/pelops_root/OUTPUT_DIR -v ${MODELDIR}:/pelops_root/MODEL_DIR -e WEIGHTS="/pelops_root/MODEL_DIR/${WEIGHTFILE}" -e MODEL="/pelops_root/MODEL_DIR/${MODELFILE}" -e LAYER="${LAYERNAME}" l41-pelops-ranker
55 | ```
56 |
57 | Note: Docker creates output files owned by root. Grant write privileges to OUTPUT_DIR for the current user and add `-u $(id -u $USER)` to the docker run commands above to create output files owned by the current user.
58 |
59 | Run the CSV to JSON docker conversion operations as follows:
60 |
61 | ```bash
62 | CSV1=/path/to/file1.csv && \
63 | CSV2=/path/to/file2.csv && \
64 | MODE=product && \
65 | JSON=/path/to/output.json && \
66 | docker run -e pelops_csv_1="${CSV1}" -e pelops_csv_2="${CSV2}" -e pelops_csv_mode=${MODE} -e pelops_json="${JSON}" l41-pelops-c2j
67 | ```
68 |
--------------------------------------------------------------------------------
/etl/compareDirectory2Directory.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import json
3 | import os
4 | import sys
5 | import time
6 |
7 | import numpy as np
8 | import scipy.spatial.distance
9 | from keras.applications.resnet50 import preprocess_input
10 | from keras.models import Model
11 | from keras.models import model_from_json
12 | from keras.preprocessing import image
13 |
14 |
15 | def load_image(img_path):
16 | data = image.load_img(img_path, target_size=(224, 224))
17 | x = image.img_to_array(data)
18 | x = np.expand_dims(x, axis=0)
19 | x = preprocess_input(x)
20 | return x
21 |
22 |
23 | def load_model_workaround(model_file, weight_file):
24 | # load json and create model
25 | json_file = open(model_file, 'r')
26 | loaded_model_json = json_file.read()
27 | json_file.close()
28 | loaded_model = model_from_json(loaded_model_json)
29 | # load weights into new model
30 | loaded_model.load_weights(weight_file)
31 | return loaded_model
32 |
33 |
34 | def get_models(model=None, weights=None):
35 | model = load_model_workaround(model, weights)
36 | return model
37 |
38 |
39 | def image_features(left, right, model):
40 | predictions = model.predict([left, right])
41 | return predictions
42 |
43 |
44 | def find_images(topdir):
45 | retval = []
46 | exten = ['jpg', 'bmp', 'png']
47 | images = 'images'
48 |
49 | for dirpath, dirnames, files in os.walk(topdir):
50 | for name in files:
51 | if name.lower().split('.')[-1] in exten:
52 | if dirpath.lower().find(images):
53 | retval.append(os.path.join(dirpath, name))
54 | return retval
55 |
56 |
57 | def write_data(vector_file, limage_file, rimage_file, feature):
58 | list_feature = feature.flatten().tolist()
59 | str_feature = ','.join(str(j) for j in list_feature)
60 | outdata = '{0},{1},{2}\n'.format(limage_file, rimage_file, str_feature)
61 | vector_file.write(outdata)
62 | vector_file.flush()
63 |
64 |
65 | def main(argv=None):
66 | if argv is None:
67 | argv = sys.argv
68 | image_dir_l = argv[1]
69 | image_dir_r = argv[2]
70 | vector_dir = argv[3]
71 |
72 | model_file = os.environ.get('MODEL', None)
73 | weights_file = os.environ.get('WEIGHTS', None)
74 | layer = os.environ.get('LAYER', None)
75 |
76 | vector_file_name = os.path.join(
77 | vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
78 |
79 | vector_file = open(vector_file_name, 'w')
80 |
81 | images_left = find_images(image_dir_l)
82 | images_right = find_images(image_dir_r)
83 |
84 | model = get_models(model_file, weights_file)
85 |
86 | for limage_file in images_left:
87 | for rimage_file in images_right:
88 |
89 | l_img = load_image(limage_file)
90 | r_img = load_image(rimage_file)
91 |
92 | feature = image_features(l_img, r_img, model)
93 |
94 | write_data(vector_file, limage_file, rimage_file, feature)
95 |
96 | vector_file.close()
97 |
98 | if __name__ == "__main__":
99 | sys.exit(main())
100 |
--------------------------------------------------------------------------------
/etl/convertCsvToJson.py:
--------------------------------------------------------------------------------
1 | """
2 | Conversion script for image2vecs feature vector csvs to siamese json
3 |
4 | Environment Variables:
5 | - pelops_csv_*: one or more file paths to csvs for conversion
6 | - pelops_csv_mode:
7 | - 'product': Combine using the cartesian product of the records from 2x csvs [default]
8 | - 'combo': Combine using pair-wise combinations of records for each csv (1 or more)
9 | - pelops_json: Path to output json file
10 | """
11 |
12 | import os
13 | import sys
14 | import traceback
15 | from pelops.utils import prep_for_siamese
16 |
17 | if __name__ == '__main__':
18 | csv_files = [v for k, v in os.environ.items() if k.startswith('pelops_csv') and os.path.isfile(v)]
19 |
20 | if len(csv_files) == 0:
21 | print("No CSV files were provided for conversion")
22 | sys.exit(-1)
23 | print("Converting {} csv files:\n\t - {}".format(len(csv_files), '\n\t - '.join(csv_files)))
24 |
25 | mode = os.getenv('pelops_csv_mode', 'product')
26 | print("Mode: {}".format(mode))
27 |
28 | out_json = os.getenv('pelops_json', None)
29 | if out_json is None:
30 | print("Output json file path was not specified")
31 | print("Json: {}".format(out_json))
32 |
33 | try:
34 | prep_for_siamese(*csv_files, json_file=out_json, full_combos=(mode != 'product'))
35 | print("Conversion success")
36 | except:
37 | print("Conversion error occurred:\n{}".format(traceback.format_exc()))
38 |
--------------------------------------------------------------------------------
/etl/makeFeaturesResNet50.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import json
3 | import os
4 | import sys
5 | import time
6 |
7 | import numpy as np
8 | import scipy.spatial.distance
9 | from keras.applications.resnet50 import ResNet50, preprocess_input
10 | from keras.models import Model
11 | from keras.preprocessing import image
12 |
13 |
14 | def load_image(img_path):
15 | data = image.load_img(img_path, target_size=(224, 224))
16 | x = image.img_to_array(data)
17 | x = np.expand_dims(x, axis=0)
18 | x = preprocess_input(x)
19 | return x
20 |
21 |
22 | def get_models():
23 | # include_top needs to be True for this to work
24 | base_model = ResNet50(weights='imagenet', include_top=True)
25 | model = Model(input=base_model.input,
26 | output=base_model.get_layer('flatten_1').output)
27 | return (model, base_model)
28 |
29 |
30 | def image_features(img, model):
31 | features = np.zeros((1, 2048), dtype=np.float16)
32 | predictions = model.predict(img)
33 | return predictions
34 |
35 |
36 | def find_images(topdir):
37 | retval = []
38 | exten = ['jpg', 'bmp', 'png']
39 | images = 'images'
40 |
41 | for dirpath, dirnames, files in os.walk(topdir):
42 | for name in files:
43 | if name.lower().split('.')[-1] in exten:
44 | if dirpath.lower().find(images):
45 | retval.append(os.path.join(dirpath, name))
46 | return retval
47 |
48 |
49 | def write_data(vector_file, image_file, feature):
50 | list_feature = feature.flatten().tolist()
51 | str_feature = ','.join(str(j) for j in list_feature)
52 | outdata = '{0},{1}\n'.format(image_file, str_feature)
53 | vector_file.write(outdata)
54 | vector_file.flush()
55 |
56 |
57 | def main(argv=None):
58 | if argv is None:
59 | argv = sys.argv
60 | image_dir = argv[1]
61 | vector_dir = argv[2]
62 | vector_file_name = os.path.join(
63 | vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
64 | vector_file = open(vector_file_name, 'w')
65 |
66 | images = find_images(image_dir)
67 |
68 | model, base_model = get_models()
69 |
70 | for image_file in images:
71 | img = load_image(image_file)
72 | feature = image_features(img, model)
73 | write_data(vector_file, image_file, feature)
74 | print('processed {0}'.format(image_file))
75 |
76 | vector_file.close()
77 |
78 | if __name__ == "__main__":
79 | sys.exit(main())
80 |
--------------------------------------------------------------------------------
/etl/makeFeaturesTopSiamese.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import sys
4 | import time
5 |
6 | import keras.backend.tensorflow_backend as KTF
7 | import numpy as np
8 | import tensorflow as tf
9 | from keras import backend as K
10 | from keras.applications.resnet50 import preprocess_input
11 | from keras.applications.resnet50 import ResNet50
12 | from keras.callbacks import EarlyStopping
13 | from keras.callbacks import ModelCheckpoint
14 | from keras.callbacks import ReduceLROnPlateau
15 | from keras.callbacks import TensorBoard
16 | from keras.layers import Dense
17 | from keras.layers import GlobalAveragePooling2D
18 | from keras.layers import Input
19 | from keras.layers import Lambda
20 | from keras.layers import merge
21 | from keras.layers.normalization import BatchNormalization
22 | from keras.models import load_model
23 | from keras.models import Model
24 | from keras.models import model_from_json
25 | from keras.optimizers import RMSprop
26 | from keras.preprocessing import image
27 | from keras.utils.np_utils import to_categorical
28 |
29 |
30 | def just_the_top(num_training_classes, model_file, weights_file):
31 |
32 | def load_model_workaround(model_file, weight_file):
33 | # load json and create model
34 | json_file = open(model_file, 'r')
35 | loaded_model_json = json_file.read()
36 | json_file.close()
37 | loaded_model = model_from_json(loaded_model_json)
38 | # load weights into new model
39 | loaded_model.load_weights(weight_file)
40 | return loaded_model
41 |
42 | def s_distance(vects):
43 | """
44 | return the abs difference between vectors
45 | """
46 | x, y = vects
47 | s = K.abs(x - y)
48 | return s
49 |
50 | def s_shape(shapes):
51 | """
52 | return the sape of the vector being used
53 | """
54 | shape = list(shapes)
55 | outshape = (shape[0])
56 | return tuple(outshape)
57 |
58 | original_model = load_model_workaround(model_file, weights_file)
59 | d1 = original_model.get_layer('dense_1')
60 | d1_len = d1_len = d1.get_output_shape_for(d1.get_input_shape_at(0))[1]
61 | d2 = original_model.get_layer('dense_2')
62 | b1 = original_model.get_layer('batchnormalization_1')
63 |
64 | input_left = Input(shape=(1, 1, 2048))
65 | input_right = Input(shape=(1, 1, 2048))
66 |
67 | # use a distance measure for making the join
68 | siamese_join = Lambda(s_distance,
69 | output_shape=s_shape)([input_left, input_right])
70 | my_layer = GlobalAveragePooling2D()(siamese_join)
71 | my_d1 = Dense(d1_len, activation='relu')(my_layer)
72 | bn = BatchNormalization()(my_d1)
73 | predictions = Dense(num_training_classes, activation='sigmoid')(bn)
74 | model = Model([input_left, input_right], output=predictions)
75 |
76 | print(model.summary())
77 | model.get_layer('dense_1').set_weights(d1.get_weights())
78 | model.get_layer('dense_2').set_weights(d2.get_weights())
79 | model.get_layer('batchnormalization_1').set_weights(b1.get_weights())
80 |
81 | return model
82 |
83 |
84 | def write_data(vector_file, index, feature):
85 | list_feature = feature.flatten().tolist()
86 | str_feature = ','.join(str(j) for j in list_feature)
87 | outdata = '{0}|{1}\n'.format(index, str_feature)
88 | vector_file.write(outdata)
89 | vector_file.flush()
90 |
91 |
92 | def make_top():
93 | a = np.ones((1, 1, 1, 2048))
94 | top = just_the_top(3,
95 | '/pelops_root/MODEL_DIR/VeRi-siamese-weekend.model.json',
96 | '/pelops_root/MODEL_DIR/VeRi-siamese-weekend.weights.hdf5')
97 | print('*********** test **********')
98 | print(top.predict([a, a])[0])
99 | # Out[8]: array([[ 0.98460394, 0.99653435, 0.99870515]], dtype=float32)
100 | print('*********** test **********')
101 | return top
102 |
103 |
104 | def main(argv=None):
105 |
106 | #model = make_top()
107 | # test()
108 |
109 | if argv is None:
110 | argv = sys.argv
111 | image_dir_l = argv[1]
112 | image_dir_r = argv[2]
113 | output_dir = argv[3]
114 |
115 | input_file_name = os.environ.get('VECTORS', None)
116 | model_file = os.environ.get('MODEL', None)
117 | weights_file = os.environ.get('WEIGHTS', None)
118 |
119 | vector_file_name = os.path.join(
120 | output_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
121 |
122 | vector_o_file = open(vector_file_name, 'w')
123 | vector_i_file = open(input_file_name, 'r')
124 |
125 | print(3, model_file, weights_file)
126 | model = just_the_top(3, model_file, weights_file)
127 |
128 | for index, line in enumerate(vector_i_file):
129 | line = line.strip()
130 | j_line = json.loads(line)
131 | left = j_line['left']
132 | right = j_line['right']
133 | np_l = np.array(left)
134 | np_r = np.array(right)
135 | np_l = np_l.reshape(1, 1, 1, 2048)
136 | np_r = np_r.reshape(1, 1, 1, 2048)
137 | data = [np_l, np_r]
138 | feature = model.predict(data)
139 | feature = feature[0]
140 | write_data(vector_o_file, index, feature)
141 |
142 | vector_o_file.close()
143 |
144 | if __name__ == "__main__":
145 | sys.exit(main())
146 |
--------------------------------------------------------------------------------
/etl/makeFeaturesYOURMODEL.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import sys
4 | import time
5 |
6 | import numpy as np
7 | from keras.applications.resnet50 import preprocess_input
8 | from keras.applications.resnet50 import ResNet50
9 | from keras.models import Model, model_from_json
10 | from keras.preprocessing import image
11 |
12 | DEFAULT_LAYER_NAME = 'flatten_1'
13 |
14 |
15 | def load_image(img_path):
16 | data = image.load_img(img_path, target_size=(224, 224))
17 | x = image.img_to_array(data)
18 | x = np.expand_dims(x, axis=0)
19 | x = preprocess_input(x)
20 | return x
21 |
22 |
23 | def save_model_workaround(model, layer, model_output_file, weights_output_file, layer_output_file):
24 | print('saving model to {}'.format(model_output_file))
25 | print('saving weights to {}'.format(weights_output_file))
26 | print('saving layer to {}'.format(layer_output_file))
27 | # serialize model to JSON
28 | model_json = model.to_json()
29 | with open(model_output_file, 'w') as json_file:
30 | json_file.write(model_json)
31 | # serialize weights to HDF5
32 | model.save_weights(weights_output_file)
33 | # Write layer name to text
34 | with open(layer_output_file, 'w') as lyr_out:
35 | lyr_out.write(layer)
36 |
37 |
38 | def load_model_workaround(model_file, weight_file):
39 | # load json and create model
40 | json_file = open(model_file, 'r')
41 | loaded_model_json = json_file.read()
42 | json_file.close()
43 | loaded_model = model_from_json(loaded_model_json)
44 | # load weights into new model
45 | loaded_model.load_weights(weight_file)
46 | return loaded_model
47 |
48 |
49 | def get_models(model=None, weights=None, layer=None):
50 | # include_top needs to be True for this to work
51 | if model is None or weights is None or layer is None:
52 | print('MODEL NOT FULLY SPECIFIED, USING RESNET FEATURES')
53 | base_model = ResNet50(weights='imagenet', include_top=True)
54 | model = Model(input=base_model.input,
55 | output=base_model.get_layer(DEFAULT_LAYER_NAME).output)
56 | else:
57 | base_model = load_model_workaround(model, weights)
58 | base_layer_names = {lyr.name for lyr in base_model.layers}
59 | base_is_siamese = all([(name in base_layer_names) for name in ['dense_1', 'dense_2', 'lambda_1']])
60 |
61 | if base_is_siamese:
62 | print('Input model is siamese, extracting resnet.')
63 | fresh_resnet = ResNet50(weights='imagenet', include_top=True)
64 | fresh_resnet.set_weights(base_model.get_layer('resnet50').get_weights())
65 | model = Model(input=fresh_resnet.input,
66 | output=fresh_resnet.get_layer(DEFAULT_LAYER_NAME).output)
67 | else:
68 | model = Model(input=base_model.input,
69 | output=base_model.get_layer(layer).output)
70 | return model
71 |
72 |
73 | def image_features(img, model):
74 | predictions = model.predict(img)
75 | return predictions
76 |
77 |
78 | def find_images(topdir):
79 | retval = []
80 | exten = ['jpg', 'bmp', 'png']
81 | images = 'images'
82 |
83 | for dirpath, dirnames, files in os.walk(topdir):
84 | for name in files:
85 | if name.lower().split('.')[-1] in exten:
86 | if dirpath.lower().find(images):
87 | retval.append(os.path.join(dirpath, name))
88 | return retval
89 |
90 |
91 | def write_data(vector_file, image_file, feature):
92 | list_feature = feature.flatten().tolist()
93 | str_feature = ','.join(str(j) for j in list_feature)
94 | outdata = '{0},{1}\n'.format(image_file, str_feature)
95 | vector_file.write(outdata)
96 | vector_file.flush()
97 |
98 |
99 | def main(argv=None):
100 | if argv is None:
101 | argv = sys.argv
102 | image_dir = argv[1]
103 | vector_dir = argv[2]
104 |
105 | model_file = os.environ.get('MODEL', None)
106 | weights_file = os.environ.get('WEIGHTS', None)
107 | layer_name = os.environ.get('LAYER', None)
108 |
109 | vector_file_name = os.path.join(
110 | vector_dir, 'vectorOutputFile_{0}.csv'.format(time.time()))
111 | vector_file = open(vector_file_name, 'w')
112 |
113 | images = find_images(image_dir)
114 |
115 | model = get_models(model_file, weights_file, layer_name)
116 |
117 | # Export model, weights, and layer if not originally supplied by the environment
118 | if all(map(lambda v: v is None, [model_file, weights_file, layer_name])):
119 | date_time = time.strftime('%Y%m%d_%H%M%S')
120 | make_out_file = lambda n: os.path.join(vector_dir, date_time + '.' + n)
121 | save_model_workaround(model, DEFAULT_LAYER_NAME, make_out_file('model'),
122 | make_out_file('weights'), make_out_file('layer'))
123 |
124 | for image_file in images:
125 | img = load_image(image_file)
126 | feature = image_features(img, model)
127 | write_data(vector_file, image_file, feature)
128 | print('processed {0}'.format(image_file))
129 |
130 | vector_file.close()
131 |
132 | if __name__ == "__main__":
133 | sys.exit(main())
134 |
--------------------------------------------------------------------------------
/maintainers.md:
--------------------------------------------------------------------------------
1 | Listing of the project Maintainers
--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
1 | all: base image2vecs siamese ranker build-notebook csv2json
2 |
3 | # Base requirements for all containers
4 | base:
5 | docker build -t l41-pelops-base -f docker/Dockerfile.base .
6 |
7 | # Jupyter notebook server
8 | build-notebook: base
9 | docker build -t l41-pelops-notebook -f docker/Dockerfile.notebook ./docker/
10 |
11 | notebook: build-notebook
12 | docker run -p 8888:8888 -it l41-pelops-notebook
13 |
14 | # Tests
15 | test: base
16 | docker build -t l41-pelops-tests -f docker/Dockerfile.test .
17 | docker run l41-pelops-tests
18 |
19 | # Image processing
20 | image2vecs: base
21 | docker build -t l41-pelops-i2v -f docker/Dockerfile.images2vecs .
22 |
23 | siamese: base
24 | docker build -t l41-pelops-siamese -f docker/Dockerfile.vectorSiamese .
25 |
26 | ranker: base
27 | docker build -t l41-pelops-ranker -f docker/Dockerfile.rankDirectories .
28 |
29 | # Conversion utility
30 | csv2json:
31 | docker build -t l41-pelops-c2j -f docker/Dockerfile.csv2json .
32 |
--------------------------------------------------------------------------------
/misc/pelops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/misc/pelops.png
--------------------------------------------------------------------------------
/pelops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/__init__.py
--------------------------------------------------------------------------------
/pelops/analysis/CMC_Confidence.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#set some constants\n",
10 | "DATASETFILE = '/path/to/dataFile'\n",
11 | "ITEMSPERCAMERA = 10\n",
12 | "YRANDOM=1024\n",
13 | "CAMERAS=2\n",
14 | "DROPPED=0\n",
15 | "CMC=100\n",
16 | "EXPERIMENTS=100"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "from pelops.datasets.featuredataset import FeatureDataset\n",
26 | "from pelops.experiment_api.experiment import ExperimentGenerator\n",
27 | "from pelops.analysis import analysis\n",
28 | "\n",
29 | "\n",
30 | "#do the math\n",
31 | "featureData = FeatureDataset(DATASETFILE)\n",
32 | "expGen = ExperimentGenerator(featureData, CAMERAS, ITEMSPERCAMERA, DROPPED, YRANDOM)\n",
33 | "experimentHolder = analysis.repeat_pre_cmc(featureData,expGen,NUMCMC=CMC,EXPPERCMC=EXPERIMENTS)\n",
34 | "stats,gdata = analysis.make_cmc_stats(experimentHolder,ITEMSPERCAMERA)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "%matplotlib inline\n",
44 | "import matplotlib.pyplot as plt\n",
45 | "\n",
46 | "#make the plots\n",
47 | "fig = plt.figure()\n",
48 | "ax = plt.subplot(111)\n",
49 | "\n",
50 | "ax.plot(gdata.transpose())\n",
51 | "plt.title('{} CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
52 | "ax.legend(('-stddev','avg','+stddev'),bbox_to_anchor=(1, -0.05),\n",
53 | " fancybox=True, shadow=True, ncol=5)"
54 | ]
55 | }
56 | ],
57 | "metadata": {
58 | "anaconda-cloud": {},
59 | "kernelspec": {
60 | "display_name": "Python 3",
61 | "language": "python",
62 | "name": "python3"
63 | },
64 | "language_info": {
65 | "codemirror_mode": {
66 | "name": "ipython",
67 | "version": 3.0
68 | },
69 | "file_extension": ".py",
70 | "mimetype": "text/x-python",
71 | "name": "python",
72 | "nbconvert_exporter": "python",
73 | "pygments_lexer": "ipython3",
74 | "version": "3.5.2"
75 | }
76 | },
77 | "nbformat": 4,
78 | "nbformat_minor": 0
79 | }
--------------------------------------------------------------------------------
/pelops/analysis/CameraVsCamera.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "cd '~/work/pelops'"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "%matplotlib inline\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "#import time\n",
21 | "import numpy as np\n",
22 | "from pelops.datasets.featuredataset import FeatureDataset\n",
23 | "from pelops.experiment_api.experiment import ExperimentGenerator\n",
24 | "from pelops.datasets import chip\n",
25 | "from pelops.models.makesvm import train_svm\n",
26 | "from pelops.analysis.comparecameras import mad_matrix"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "ITEMSPERCAMERA = 2\n",
36 | "NUMCAMERAS = 2\n",
37 | "TRAIN_RANDOM=1024\n",
38 | "DROPPED=0\n",
39 | "TRAIN_FEATURES = '/Users/dgrossman/image_NEW_TRAIN'\n",
40 | "EXAMPLES = 1000 \n",
41 | "\n",
42 | "fd_train = FeatureDataset(TRAIN_FEATURES)\n",
43 | "eg_train = ExperimentGenerator(fd_train, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TRAIN_RANDOM)\n"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "clf_train = train_svm(EXAMPLES,fd_train,eg_train)\n"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "g_train,b_train = mad_matrix(EXAMPLES,clf_train,fd_train,eg_train,'cam')"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "ITEMSPERCAMERA = 2\n",
71 | "NUMCAMERAS = 2\n",
72 | "TEST_RANDOM=1024\n",
73 | "DROPPED=0\n",
74 | "TEST_FEATURES = '/Users/dgrossman/image_NEW_TEST'\n",
75 | "EXAMPLES = 1000\n",
76 | "\n",
77 | "fd_test = FeatureDataset(TEST_FEATURES)\n",
78 | "eg_test = ExperimentGenerator(fd_test, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TEST_RANDOM)"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "g_test,b_test = mad_matrix(EXAMPLES,clf_train,fd_test,eg_test,'cam')"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "allset = set()\n",
97 | "for key in g_test.keys():\n",
98 | " l,r = key.split('|')\n",
99 | " allset.add(l)\n",
100 | " allset.add(r)\n",
101 | "for key in b_test.keys():\n",
102 | " l,r = key.split('|')\n",
103 | " allset.add(l)\n",
104 | " allset.add(r)"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "len(allset)\n",
114 | "s = (len(allset),len(allset))\n",
115 | "names = [x for x in allset]\n",
116 | "name2index = dict()\n",
117 | "index2name = dict()\n",
118 | "for index,item in enumerate(names):\n",
119 | " name2index[item] = index\n",
120 | " index2name[index] = item"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "matrix = np.zeros(s)"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "for carpcar in g_test.keys():\n",
139 | " n = g_test[carpcar]\n",
140 | " d = n\n",
141 | " if carpcar in b_test:\n",
142 | " d += b_test[carpcar]\n",
143 | " l,r = carpcar.split('|')\n",
144 | " matrix[name2index[l]][name2index[r]] = n / float(d)"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "counts = list()\n",
154 | "spoo = list()\n",
155 | "\n",
156 | "for i in index2name:\n",
157 | " counts.append(i)\n",
158 | " spoo.append(index2name[i])"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "import numpy as np\n",
168 | "import matplotlib.pyplot as plt\n",
169 | "plt.imshow(matrix,cmap='hot')\n",
170 | "plt.colorbar()\n",
171 | "plt.xticks(counts, spoo)\n",
172 | "plt.yticks(counts,spoo)"
173 | ]
174 | }
175 | ],
176 | "metadata": {
177 | "anaconda-cloud": {},
178 | "celltoolbar": "Raw Cell Format",
179 | "kernelspec": {
180 | "display_name": "Python [conda root]",
181 | "language": "python",
182 | "name": "conda-root-py"
183 | },
184 | "language_info": {
185 | "codemirror_mode": {
186 | "name": "ipython",
187 | "version": 3
188 | },
189 | "file_extension": ".py",
190 | "mimetype": "text/x-python",
191 | "name": "python",
192 | "nbconvert_exporter": "python",
193 | "pygments_lexer": "ipython3",
194 | "version": "3.5.2"
195 | }
196 | },
197 | "nbformat": 4,
198 | "nbformat_minor": 1
199 | }
200 |
--------------------------------------------------------------------------------
/pelops/analysis/SVMBinaryCarMatch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "cd '~/work/pelops'"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "%matplotlib inline\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "#import time\n",
21 | "#import numpy as np\n",
22 | "from pelops.datasets.featuredataset import FeatureDataset\n",
23 | "from pelops.experiment_api.experiment import ExperimentGenerator\n",
24 | "from pelops.datasets import chip\n",
25 | "from pelops.models.makesvm import train_svm, test_svm\n",
26 | "from pelops.analysis.comparecameras import mad_matrix"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "ITEMSPERCAMERA = 2\n",
36 | "NUMCAMERAS = 2\n",
37 | "TRAIN_RANDOM=1024\n",
38 | "DROPPED=0\n",
39 | "TRAIN_FEATURES = '/Users/dgrossman/image_NEW_TINY_TRAIN'\n",
40 | "\n",
41 | "fd_train = FeatureDataset(TRAIN_FEATURES)\n",
42 | "eg_train = ExperimentGenerator(fd_train, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TRAIN_RANDOM)\n"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "ITEMSPERCAMERA = 2\n",
52 | "NUMCAMERAS = 2\n",
53 | "TEST_RANDOM=1024\n",
54 | "DROPPED=0\n",
55 | "TEST_FEATURES = '/Users/dgrossman/image_NEW_TINY_TEST'\n",
56 | "\n",
57 | "fd_test = FeatureDataset(TEST_FEATURES)\n",
58 | "eg_test = ExperimentGenerator(fd_test, NUMCAMERAS, ITEMSPERCAMERA, DROPPED, TEST_RANDOM)"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "testpoints = [50,100,200,400,800,1600]"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "results = list()\n",
77 | "for testpoint in testpoints:\n",
78 | " clf_train = train_svm(testpoint,fd_train,eg_train)\n",
79 | " result = test_svm(testpoint,clf_train,fd_test,eg_test)\n",
80 | " print ('items: {}, score {}'.format(testpoint,result))\n",
81 | " results.append((testpoint,result))"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "for i,s in results:\n",
91 | " print('items:{}, score:{}'.format(i,s))"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | }
101 | ],
102 | "metadata": {
103 | "anaconda-cloud": {},
104 | "kernelspec": {
105 | "display_name": "Python [conda root]",
106 | "language": "python",
107 | "name": "conda-root-py"
108 | },
109 | "language_info": {
110 | "codemirror_mode": {
111 | "name": "ipython",
112 | "version": 3
113 | },
114 | "file_extension": ".py",
115 | "mimetype": "text/x-python",
116 | "name": "python",
117 | "nbconvert_exporter": "python",
118 | "pygments_lexer": "ipython3",
119 | "version": "3.5.2"
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 1
124 | }
125 |
--------------------------------------------------------------------------------
/pelops/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/__init__.py
--------------------------------------------------------------------------------
/pelops/analysis/analysis.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import defaultdict
3 | from scipy.spatial.distance import cosine, euclidean
4 |
5 |
6 | # compute cosine distance
7 | # 0 -> things are closer
8 | # 1 -> things are farther
9 | def comp_cosine(cam1_feat, cam2_feat):
10 | retval = 1 - cosine(cam1_feat, cam2_feat)
11 | return (retval)
12 |
13 |
14 | # compute Euclidean distance
15 | # 0 -> things are closer
16 | # + -> things are farther
17 | def comp_euclid(cam1_feat, cam2_feat):
18 | retval = abs(euclidean(cam1_feat, cam2_feat))
19 | return (retval)
20 |
21 |
22 | # do the comparisons between chips
23 | # cam1 - listing of chips seen at cam1
24 | # cam2 - listing of chips seen at cam2
25 | # comparison - function to compare 2 vectors should return small things
26 | # when comparison is close, large otherwise
27 | # verbose - return more info if true
28 | def is_correct_match(featureData,
29 | cam1,
30 | cam2,
31 | comparison=comp_cosine, verbose=False):
32 | similarities = []
33 | for cam1_chip in cam1:
34 | cam1_feat = featureData.get_feats_for_chip(cam1_chip)
35 | for cam2_chip in cam2:
36 | cam2_feat = featureData.get_feats_for_chip(cam2_chip)
37 | similarity = comparison(cam1_feat, cam2_feat)
38 | similarities.append((similarity, cam1_chip, cam2_chip))
39 | similarities.sort(reverse=True)
40 | for i, (similarity, chip1, chip2) in enumerate(similarities):
41 | # return best_match
42 | if chip1.car_id == chip2.car_id:
43 | if verbose:
44 | return i, similarities
45 | else:
46 | return i
47 | raise ValueError("Huh?")
48 |
49 |
50 | # do EXPPERCMC, determine
51 | # featureData - big table to look up data
52 | # experimentGen - function to create experiments
53 | # EXPPERCMC - number of experiments to run for a single CMC
54 | # comparison - function to compare 2 feature vectors
55 | def pre_cmc(featureData, experimentGen,
56 | EXPPERCMC=1000, comparison=comp_cosine):
57 |
58 | num_downs = defaultdict(int)
59 | for i in range(EXPPERCMC):
60 | a = experimentGen.generate()
61 | num_down = is_correct_match(featureData, a[0], a[1],
62 | comparison=comparison)
63 | num_downs[num_down] += 1
64 |
65 | keys = sorted(num_downs)
66 | vals = [num_downs[key] for key in keys]
67 | return((keys, np.array(vals)/EXPPERCMC))
68 |
69 |
70 | # Generate unprocessed CMC curves
71 | # the data needs to be summed to make the correct
72 | # CMC curve
73 | # featureData - FeatureDataset of chips
74 | # experimentGen - ExperimentGenerator
75 | # NUMCMC - number of CMC to build
76 | # EXPPERCMC - number of experiments run per CMC
77 | # comparison - function that compares two feature vectors returning
78 | # distance measure, 0 -> close big -> far
79 | def repeat_pre_cmc(featureData, experimentGen, NUMCMC=100,
80 | EXPPERCMC=1000, comparison=comp_cosine):
81 | experimentHolder = []
82 | for experiment in range(NUMCMC):
83 | experimentHolder.append(pre_cmc(featureData, experimentGen,
84 | EXPPERCMC=EXPPERCMC,
85 | comparison=comparison))
86 | return experimentHolder
87 |
88 |
89 | # finalize creation of the CMC curves
90 | # generate statistics on the CMC curves
91 | # return all
92 | # experimentHolder - array of CMC curves
93 | # itemsPerCamera - number of items on a camera
94 | def make_cmc_stats(experimentHolder, itemsPerCamera):
95 | comparisons = itemsPerCamera*itemsPerCamera
96 | stats = np.zeros((len(experimentHolder), comparisons))
97 |
98 | for index, (keys, vals) in enumerate(experimentHolder):
99 | for keyIndex in range(len(keys)):
100 | stats[index, keys[keyIndex]] = vals[keyIndex]
101 |
102 | for index in range(len(stats[:, ])):
103 | total_sum = 0.0
104 | offsetlen = len(stats[0])
105 | for sample in range(offsetlen):
106 | total_sum += stats[index, sample]
107 | stats[index, sample] = total_sum
108 |
109 | gdata = np.zeros((3, comparisons))
110 |
111 | for i in range(comparisons):
112 | gdata[1, i] = np.average(stats[:, i])
113 | for i in range(comparisons):
114 | stddev = np.std(stats[:, i])
115 | gdata[0, i] = gdata[1, i] - stddev
116 | gdata[2, i] = gdata[1, i] + stddev
117 |
118 | return (stats, gdata)
119 |
--------------------------------------------------------------------------------
/pelops/analysis/camerautil.py:
--------------------------------------------------------------------------------
1 | """ utilities when working with cameras"""
2 |
3 | from collections import defaultdict
4 |
5 |
6 | def nameit_cam(first, second):
7 | """
8 | concatenate chip names together in a seperable way
9 | first(chip) - first item
10 | second(chip) - second item
11 | """
12 | return '{}|{}'.format(first.cam_id, second.cam_id)
13 |
14 |
15 | def nameit_car(first, second):
16 | """
17 | concatenate chip.car names together in a seperable way
18 | first(chip) - first item
19 | second(chip) - second imte
20 | """
21 | return '{}|{}'.format(first.car_id, second.car_id)
22 |
23 |
24 | def get_match_id(cameras):
25 | """
26 | find the car of interest from a set of cameras
27 |
28 | cameras(list(list(chips)))): list of the cameras with cars in each camera
29 | """
30 | chosendict = defaultdict(int)
31 | for camera in cameras:
32 | for car in camera:
33 | chosendict[car.car_id] += 1
34 | mymax = -1
35 | myid = None
36 | for k in chosendict.keys():
37 | if chosendict[k] > mymax:
38 | mymax = chosendict[k]
39 | myid = k
40 | return myid
41 |
42 |
43 | def make_good_bad(cameras, car_id):
44 | """
45 | make a list of cars of interest, and a list of other
46 |
47 | cameras(list(list(chips))): list of the cameras with the cars in each cameras
48 | car_id(): the id of the car of interest
49 | """
50 | goodlist = list()
51 | bad_list = list()
52 | for camera in cameras:
53 | for car in camera:
54 | if car.car_id == car_id:
55 | goodlist.append(car)
56 | else:
57 | bad_list.append(car)
58 | return (goodlist, bad_list)
59 |
60 |
61 | def glue(vec_a, vec_b):
62 | """
63 | concatenate two smaller vectors to a larger vector
64 | vec_a : first vector
65 | vec_b : second vector
66 | """
67 | retval = list()
68 | retval.extend(vec_a)
69 | retval.extend(vec_b)
70 | return retval
71 |
--------------------------------------------------------------------------------
/pelops/analysis/comparecameras.py:
--------------------------------------------------------------------------------
1 | """ camera comparison """
2 |
3 | import itertools
4 | from collections import defaultdict
5 |
6 | import numpy as np
7 | from tqdm import tnrange
8 |
9 | from pelops.analysis.camerautil import (get_match_id, glue, make_good_bad,
10 | nameit_cam, nameit_car)
11 |
12 |
13 | def eval_good_bad(first, second, clf, featuredataset, goodmatches, badmatches, attribute_name):
14 | """
15 | label examples of good and bad comparisons
16 |
17 | take two chips, concantenate their feature vectors
18 | and create a balanced dataset of matches and differences
19 |
20 | first(Chip): image to evaluate
21 | second(Chip): image to evaluate
22 | clr(classifier): classifier used to evaluate chips
23 | fd(featureDataset): maps chips to features
24 | goodmatches(defaultdictionary(int)): counts of good matches
25 | badmatches(defaultdictionary(int)): counts of bad matches
26 | attribute_name(str): which attribute to pull names from
27 | """
28 |
29 | namefunc = None
30 | if attribute_name == 'car':
31 | namefunc = nameit_car
32 | else:
33 | namefunc = nameit_cam
34 |
35 | bigvec1 = glue(featuredataset.get_feats_for_chip(first),
36 | featuredataset.get_feats_for_chip(second))
37 |
38 | bigvec1np = np.array(bigvec1)
39 | #bigvec1np.reshape(1, -1)
40 |
41 | bigvec2 = glue(featuredataset.get_feats_for_chip(second),
42 | featuredataset.get_feats_for_chip(first))
43 |
44 | bigvec2np = np.array(bigvec2)
45 | # bigvec2np.reshape(1, -1))
46 |
47 | decision = clf.predict(bigvec1np.reshape(1, -1))
48 | name = namefunc(first, second)
49 |
50 | tally_decision(decision, goodmatches, name, badmatches)
51 |
52 | decision = clf.predict(bigvec2np.reshape(1, -1))
53 | name = namefunc(second, first)
54 |
55 | tally_decision(decision, goodmatches, name, badmatches)
56 |
57 |
58 | def tally_decision(decision, goodpic, name, badpic):
59 | """
60 | count the number of matches for a name
61 |
62 | decision(int): whether the classifier said they matched
63 | goodpic(defaultdict(int)): list of good matches
64 | badpic(defaultdict(int)): list of bad matches
65 | name(str): concatenation of names of first and second pics
66 | """
67 | if decision == 1:
68 | goodpic[name] += 1
69 | else:
70 | badpic[name] += 1
71 |
72 |
73 | def mad_matrix(examples, clf, featuredataset, examplegenerator, attribute_name='car'):
74 | """
75 | run examples experiments to see how cars are declaired
76 | the same or different by the clf classifier.abs
77 |
78 | examples(int): number of trials
79 | clf(classifier): classifier to make same/different distinciton
80 | fd(featureDataset) : allows joining of chip to features
81 | eg(experimentGenerator): makes expermients for testing
82 | """
83 |
84 | ddg = defaultdict(int)
85 | ddb = defaultdict(int)
86 |
87 | for _ in tnrange(examples):
88 | cameras_test = examplegenerator.generate()
89 | match_id = get_match_id(cameras_test)
90 | goods, bads = make_good_bad(cameras_test, match_id)
91 | good0 = goods[0]
92 | good1 = goods[1]
93 | bad0 = bads[0]
94 | bad1 = bads[1]
95 |
96 | eval_good_bad(good0, good1, clf, featuredataset,
97 | ddg, ddb, attribute_name)
98 | eval_good_bad(bad0, bad1, clf, featuredataset,
99 | ddb, ddg, attribute_name)
100 |
101 | return(ddg, ddb)
102 |
103 |
104 | def make_work(fd_train, lessons, outcomes, items, label):
105 | """
106 | makes a listing of work from chips for classification
107 |
108 | fd_train(featureDataset): training features
109 | lessons(list): feature vectors
110 | outcomes(list): expected outcome for the comparison
111 | items(list(chips)): list of chips for comparison
112 | label(int): expected label for the comparison
113 | """
114 | workitems = itertools.permutations(items, 2)
115 | for workitem in workitems:
116 | item = glue(fd_train.get_feats_for_chip(
117 | workitem[0]), fd_train.get_feats_for_chip(workitem[1]))
118 |
119 | lessons.append(item)
120 | outcomes.append(label)
121 |
122 | item = glue(fd_train.get_feats_for_chip(
123 | workitem[1]), fd_train.get_feats_for_chip(workitem[0]))
124 |
125 | lessons.append(item)
126 | outcomes.append(label)
127 |
--------------------------------------------------------------------------------
/pelops/analysis/isFileImage.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "cd 'deep-learning-models/'"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "from resnet50 import ResNet50\n",
19 | "from keras.preprocessing import image\n",
20 | "from imagenet_utils import preprocess_input, decode_predictions\n",
21 | "import numpy as np\n",
22 | "import json\n",
23 | "import time\n",
24 | "from multiprocessing import Pool\n",
25 | "import functools"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "\n",
35 | "root = '/local_data/dgrossman/dgCars'\n",
36 | "allFiles = 'allImages'\n",
37 | "\n"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "def readTasking(filename):\n",
47 | "\n",
48 | " af = open(allFiles,'r')\n",
49 | " data = list()\n",
50 | " fileProblems = list()\n",
51 | "\n",
52 | " for jline in af:\n",
53 | " jline = jline.strip()\n",
54 | " line = json.loads(jline)\n",
55 | " data.append(line)\n",
56 | " af.close()\n",
57 | " return data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "def procLine2(l, r):\n",
67 | " img_path = '{0}/{1}'.format(r,l['filename'])\n",
68 | " try:\n",
69 | " img = image.load_img(img_path, target_size=(224, 224))\n",
70 | " return (1,l['filename'])\n",
71 | " except:\n",
72 | " return (0,l['filename'])\n",
73 | " \n",
74 | "procLine = functools.partial(procLine2, r=root )"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "time.sleep(60*60*6) # sleep 6 hours then try to do the images"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": [
92 | "p = Pool(32)"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "data = readTasking(allFiles)"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "a = p.map(procLine,data)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "def goodBad(data):\n",
120 | " good= 0\n",
121 | " bad = 0\n",
122 | " for item in data:\n",
123 | " if item[0]==1:\n",
124 | " good = good + 1\n",
125 | " else:\n",
126 | " bad = bad + 1\n",
127 | " print('good',good,' bad',bad)\n",
128 | " return (good,bad)"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": [
137 | "out = goodBad(a)"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "bad = list()\n",
147 | "for item in a:\n",
148 | " if item[0] == 0:\n",
149 | " bad.append(item[1])\n"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "bad"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "newdata = list()\n",
168 | "for d in data:\n",
169 | " if d['filename'] not in bad:\n",
170 | " newdata.append(d)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "length(newdata)"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "len(newdata)"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "import json\n",
198 | "out = open('allImages','w')\n",
199 | "for d in newdata:\n",
200 | " out.write(json.dumps(d)+'\\n');\n",
201 | "out.close()"
202 | ]
203 | }
204 | ],
205 | "metadata": {
206 | "anaconda-cloud": {},
207 | "celltoolbar": "Raw Cell Format",
208 | "kernelspec": {
209 | "display_name": "Python 3",
210 | "language": "python",
211 | "name": "python3"
212 | },
213 | "language_info": {
214 | "codemirror_mode": {
215 | "name": "ipython",
216 | "version": 3
217 | },
218 | "file_extension": ".py",
219 | "mimetype": "text/x-python",
220 | "name": "python",
221 | "nbconvert_exporter": "python",
222 | "pygments_lexer": "ipython3",
223 | "version": "3.5.2"
224 | },
225 | "nbpresent": {
226 | "slides": {},
227 | "themes": {
228 | "default": "197aed3e-040e-45b3-b365-855332b06482",
229 | "theme": {}
230 | }
231 | }
232 | },
233 | "nbformat": 4,
234 | "nbformat_minor": 1
235 | }
236 |
--------------------------------------------------------------------------------
/pelops/analysis/labelImageCars.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import sys,os,os.path\n",
10 | "import tensorflow as tf\n",
11 | "os.environ['THEANO_FLAGS']='mode=FAST_RUN,device=gpu,floatX=float32'\n",
12 | "from keras import backend as K\n",
13 | "sess = tf.Session()\n",
14 | "K.set_session(sess)"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "cd 'deep-learning-models/'"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "import json\n",
33 | "import time\n",
34 | "allFiles = 'allImages'\n",
35 | "root = '/local_data/dgrossman/dgCars/'\n",
36 | "af = open(allFiles,'r')\n",
37 | "data = list()\n",
38 | "for d in af:\n",
39 | " d = d.strip()\n",
40 | " data.append(json.loads(d))"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "from resnet50 import ResNet50\n",
50 | "from keras.preprocessing import image\n",
51 | "from imagenet_utils import preprocess_input, decode_predictions\n",
52 | "import numpy as np"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "s = time.time()\n",
62 | "model = ResNet50(weights='imagenet')\n",
63 | "print ('loadResNet50',time.time() - s)\n",
64 | "\n"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "good = list()\n",
74 | "bad = list()\n",
75 | "file = list()"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "interesting = set()\n",
85 | "\n",
86 | "for x in ['police_van','moving_van','car','pickup','suv','truck','crossover','van','minivan','sports_car','cab','racer','convertible','car_wheel','jeep','ambulance']:\n",
87 | " interesting.add(x)\n",
88 | "\n",
89 | "count = 0\n",
90 | "im = 0\n",
91 | "#with tf.device('/gpu:0'):\n",
92 | "# print ('time:',time.time())\n",
93 | "s = time.time()\n",
94 | "print ('time:',time.time() - s)\n",
95 | "if True:\n",
96 | " for d in data: \n",
97 | " img_path = '{0}/{1}'.format(root,d['filename'])\n",
98 | " flag = True\n",
99 | " try:\n",
100 | " img = image.load_img(img_path, target_size=(224, 224))\n",
101 | "\n",
102 | " except:\n",
103 | " #print('FILE :',d['filename'])\n",
104 | " file.append(d)\n",
105 | " flag = False\n",
106 | "\n",
107 | " if flag:\n",
108 | " x = image.img_to_array(img)\n",
109 | " x = np.expand_dims(x, axis=0)\n",
110 | " x = preprocess_input(x)\n",
111 | " preds = model.predict(x)\n",
112 | " predictions = decode_predictions(preds)[0][:4]\n",
113 | " #out = ''\n",
114 | " found = False\n",
115 | " for prediction in predictions:\n",
116 | " i,t,score = prediction\n",
117 | " #out = d['filename'], prediction\n",
118 | " if t in interesting:\n",
119 | " #out = 'GOOD'+' ' + d['filename']+' '+t\n",
120 | " good.append((d,t))\n",
121 | " found = True\n",
122 | " break\n",
123 | " if not found:\n",
124 | " bad.append((d,predictions[0][1]))\n",
125 | " #out = 'BAD'+ ' ' + d['filename']+ ' ' + predictions[0][1]\n",
126 | " #print (out)\n",
127 | " #print ('Predicted',decode_predictions(preds)[0][:4],' sec:',time.time() - s )\n",
128 | "\n",
129 | " atOnce = 10000\n",
130 | " if count == atOnce:\n",
131 | " count = 0\n",
132 | " im = im + 1\n",
133 | " z = time.time() - s\n",
134 | " print('processed:',im * atOnce,'Images','good',len(good),'bad',len(bad),'file',len(file),z)\n",
135 | " s = time.time()\n",
136 | " count = count + 1"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "print('processed:',len(good) + len(bad) + len(file),'Images','good',len(good),'bad',len(bad),'file',len(file))"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": null,
151 | "metadata": {},
152 | "outputs": [],
153 | "source": [
154 | "from collections import defaultdict\n",
155 | "q = defaultdict(int)"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "for b in good:\n",
165 | " q[b[1]] = q[b[1]]+1"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "for qq in q:\n",
175 | " print (qq,q[qq])\n"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "gfile = open('foundCars','w')\n",
185 | " for g in good:\n",
186 | " gfile.write(g+'\\n')\n",
187 | "gfile.close()"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "gfile = open('foundCars','w')\n",
197 | "for g in good:\n",
198 | " dat, classification = g\n",
199 | " dat['resnet50'] = classification\n",
200 | " gfile.write(json.dumps(dat)+'\\n')\n",
201 | "gfile.close()\n"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": null,
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "good[0]"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "metadata": {},
217 | "outputs": [],
218 | "source": [
219 | "7+2"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": []
228 | }
229 | ],
230 | "metadata": {
231 | "anaconda-cloud": {},
232 | "kernelspec": {
233 | "display_name": "Python 3",
234 | "language": "python",
235 | "name": "python3"
236 | },
237 | "language_info": {
238 | "codemirror_mode": {
239 | "name": "ipython",
240 | "version": 3
241 | },
242 | "file_extension": ".py",
243 | "mimetype": "text/x-python",
244 | "name": "python",
245 | "nbconvert_exporter": "python",
246 | "pygments_lexer": "ipython3",
247 | "version": "3.5.2"
248 | }
249 | },
250 | "nbformat": 4,
251 | "nbformat_minor": 1
252 | }
253 |
--------------------------------------------------------------------------------
/pelops/analysis/makeCMCplots.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#set some constants\n",
10 | "ITEMSPERCAMERA = 10\n",
11 | "YRANDOM=1024\n",
12 | "CAMERAS=2\n",
13 | "DROPPED=0\n",
14 | "CMC=100\n",
15 | "EXPERIMENTS=400"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "DATASETFILES = [('/local_data/dgrossman/image_body_type',4),\n",
25 | " ('/local_data/dgrossman/image_color_type',10),\n",
26 | " ('/local_data/dgrossman/image_color_body_type',40),\n",
27 | " ('/local_data/dgrossman/image_make_model_type',1057),\n",
28 | " ('/local_data/dgrossman/resnet50','-1')]\n",
29 | "DATASETFILES = [('/local_data/dgrossman/compcars_color',10),\n",
30 | " ('/local_data/dgrossman/compcars_make_model',284),\n",
31 | " ('/local_data/dgrossman/image_color_type',10),\n",
32 | " ('/local_data/dgrossman/resnet50','-1')]\n",
33 | "DATASETFILES = [('/local_data/dgrossman/resnet50','-1')]"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "from pelops.datasets.featuredataset import FeatureDataset\n",
43 | "from pelops.experiment_api.experiment import ExperimentGenerator\n",
44 | "from pelops.analysis import analysis\n",
45 | "\n",
46 | "alldata = list()\n",
47 | "for datasetfile,num in DATASETFILES:\n",
48 | " #do the math\n",
49 | " print(datasetfile)\n",
50 | " featureData = FeatureDataset(datasetfile)\n",
51 | " expGen = ExperimentGenerator(featureData, CAMERAS, ITEMSPERCAMERA, DROPPED, YRANDOM)\n",
52 | " experimentHolder = analysis.repeat_pre_cmc(featureData,expGen,NUMCMC=CMC,EXPPERCMC=EXPERIMENTS)\n",
53 | " stats,gdata = analysis.make_cmc_stats(experimentHolder,ITEMSPERCAMERA)\n",
54 | " alldata.append(gdata)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "import numpy as np\n",
64 | "stats = np.zeros((100,len(DATASETFILES)))\n"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "for x in range(len(DATASETFILES)):\n",
74 | " for y in range(100):\n",
75 | " stats[y][x] = alldata[x][1][y]"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "%matplotlib inline\n",
85 | "import matplotlib.pyplot as plt\n",
86 | "\n",
87 | "#make the plots\n",
88 | "fig = plt.figure()\n",
89 | "ax = plt.subplot(111)\n",
90 | "\n",
91 | "ax.plot(stats)\n",
92 | "#plt.title('color: 10\\ncolor mixed with structure: 40 \\n structure only: 4, 1057\\n CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
93 | "plt.title('compcars color:10\\ncompcars make model:284\\ndgcars color:10\\nuntrained resnet\\nCMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
94 | "plt.grid(True)\n",
95 | "#ax.legend(('4','10','40','1057','untrained'),bbox_to_anchor=(1, -0.05),fancybox=True, shadow=True, ncol=5)\n",
96 | "ax.legend(('10cc','284cc','10dg','untrained'),bbox_to_anchor=(1, -0.05),fancybox=True, shadow=True, ncol=5)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "%matplotlib inline\n",
106 | "import matplotlib.pyplot as plt\n",
107 | "\n",
108 | "#make the plots\n",
109 | "fig = plt.figure()\n",
110 | "ax = plt.subplot(111)\n",
111 | "\n",
112 | "ax.plot(stats)\n",
113 | "plt.title('[color out performs structure]\\n CMC curves with {} experiments per curve'.format(CMC,EXPERIMENTS))\n",
114 | "plt.grid(True)\n",
115 | "ax.legend(('4','10','40','1057 classes'),bbox_to_anchor=(1, -0.05),\n",
116 | " fancybox=True, shadow=True, ncol=5)"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {},
123 | "outputs": [],
124 | "source": []
125 | }
126 | ],
127 | "metadata": {
128 | "anaconda-cloud": {},
129 | "kernelspec": {
130 | "display_name": "Python 3",
131 | "language": "python",
132 | "name": "python3"
133 | },
134 | "language_info": {
135 | "codemirror_mode": {
136 | "name": "ipython",
137 | "version": 3
138 | },
139 | "file_extension": ".py",
140 | "mimetype": "text/x-python",
141 | "name": "python",
142 | "nbconvert_exporter": "python",
143 | "pygments_lexer": "ipython3",
144 | "version": "3.5.2"
145 | }
146 | },
147 | "nbformat": 4,
148 | "nbformat_minor": 2
149 | }
150 |
--------------------------------------------------------------------------------
/pelops/analysis/makeFeatureFiles-TEST.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pelops.datasets.veri import VeriDataset\n",
10 | "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n",
11 | "import pelops.utils as utils\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n",
21 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n",
22 | "layer = 'avg_pool'"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
32 | " set_type=utils.SetType.TEST.value)\n",
33 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_body_type_TEST',\n",
34 | " model_output_file,\n",
35 | " weights_output_file,\n",
36 | " layer)"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n",
46 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n",
47 | "layer = 'avg_pool'"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
57 | " set_type=utils.SetType.TEST.value)\n",
58 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_type_TEST',\n",
59 | " model_output_file,\n",
60 | " weights_output_file,\n",
61 | " layer)"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n",
71 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n",
72 | "layer = 'avg_pool'"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
82 | " set_type=utils.SetType.TEST.value)\n",
83 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_body_type_TEST',\n",
84 | " model_output_file,\n",
85 | " weights_output_file,\n",
86 | " layer)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n",
96 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n",
97 | "layer = 'avg_pool'"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
107 | " set_type=utils.SetType.TEST.value)\n",
108 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_make_model_type_TEST',\n",
109 | " model_output_file,\n",
110 | " weights_output_file,\n",
111 | " layer)"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.model.json'\n",
121 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.weights.hdf5'\n",
122 | "layer = 'avg_pool'"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n",
132 | "extract_feats_from_chips(veri, '/local_data/dgrossman/resnet50_TEST',\n",
133 | " model_output_file,\n",
134 | " weights_output_file,\n",
135 | " layer)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n",
145 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n",
146 | "layer = 'avg_pool'"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n",
156 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_make_model_TEST',\n",
157 | " model_output_file,\n",
158 | " weights_output_file,\n",
159 | " layer)"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {},
166 | "outputs": [],
167 | "source": [
168 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n",
169 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n",
170 | "layer = 'avg_pool'"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TEST.value)\n",
180 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_color_TEST',\n",
181 | " model_output_file,\n",
182 | " weights_output_file,\n",
183 | " layer)"
184 | ]
185 | }
186 | ],
187 | "metadata": {
188 | "anaconda-cloud": {},
189 | "kernelspec": {
190 | "display_name": "Python 3",
191 | "language": "python",
192 | "name": "python3"
193 | },
194 | "language_info": {
195 | "codemirror_mode": {
196 | "name": "ipython",
197 | "version": 3
198 | },
199 | "file_extension": ".py",
200 | "mimetype": "text/x-python",
201 | "name": "python",
202 | "nbconvert_exporter": "python",
203 | "pygments_lexer": "ipython3",
204 | "version": "3.5.2"
205 | }
206 | },
207 | "nbformat": 4,
208 | "nbformat_minor": 2
209 | }
210 |
--------------------------------------------------------------------------------
/pelops/analysis/makeFeatureFiles.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pelops.datasets.veri import VeriDataset\n",
10 | "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n",
11 | "import pelops.utils as utils\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n",
21 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n",
22 | "layer = 'avg_pool'"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
32 | " set_type=utils.SetType.TRAIN.value)\n",
33 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_body_type',\n",
34 | " model_output_file,\n",
35 | " weights_output_file,\n",
36 | " layer)"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n",
46 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n",
47 | "layer = 'avg_pool'"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
57 | " set_type=utils.SetType.TRAIN.value)\n",
58 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_type',\n",
59 | " model_output_file,\n",
60 | " weights_output_file,\n",
61 | " layer)"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n",
71 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n",
72 | "layer = 'avg_pool'"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
82 | " set_type=utils.SetType.TRAIN.value)\n",
83 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_color_body_type',\n",
84 | " model_output_file,\n",
85 | " weights_output_file,\n",
86 | " layer)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n",
96 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n",
97 | "layer = 'avg_pool'"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "veri = VeriDataset('/local_data/dgrossman/VeRi',\n",
107 | " set_type=utils.SetType.TRAIN.value)\n",
108 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_make_model_type',\n",
109 | " model_output_file,\n",
110 | " weights_output_file,\n",
111 | " layer)"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.model.json'\n",
121 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet.weights.hdf5'\n",
122 | "layer = 'avg_pool'"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n",
132 | "extract_feats_from_chips(veri, '/local_data/dgrossman/resnet50',\n",
133 | " model_output_file,\n",
134 | " weights_output_file,\n",
135 | " layer)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n",
145 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n",
146 | "layer = 'avg_pool'"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n",
156 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_make_model',\n",
157 | " model_output_file,\n",
158 | " weights_output_file,\n",
159 | " layer)"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {},
166 | "outputs": [],
167 | "source": [
168 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n",
169 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n",
170 | "layer = 'avg_pool'"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "veri = VeriDataset('/local_data/dgrossman/VeRi',set_type=utils.SetType.TRAIN.value)\n",
180 | "extract_feats_from_chips(veri, '/local_data/dgrossman/compcars_color',\n",
181 | " model_output_file,\n",
182 | " weights_output_file,\n",
183 | " layer)"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {},
190 | "outputs": [],
191 | "source": [
192 | "1+1\n"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": []
201 | }
202 | ],
203 | "metadata": {
204 | "anaconda-cloud": {},
205 | "kernelspec": {
206 | "display_name": "Python 3",
207 | "language": "python",
208 | "name": "python3"
209 | },
210 | "language_info": {
211 | "codemirror_mode": {
212 | "name": "ipython",
213 | "version": 3
214 | },
215 | "file_extension": ".py",
216 | "mimetype": "text/x-python",
217 | "name": "python",
218 | "nbconvert_exporter": "python",
219 | "pygments_lexer": "ipython3",
220 | "version": "3.5.2"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 2
225 | }
226 |
--------------------------------------------------------------------------------
/pelops/analysis/makeVeri.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pelops.datasets.veri import VeriDataset
3 | from pelops.etl.extract_feats_from_chips import extract_feats_from_chips
4 |
5 | # make the stuff that we run on
6 | if __name__ == '__main__':
7 | # path to the veri dataset
8 | v_file_name = sys.argv[0]
9 |
10 | # filename of where to place the output
11 | out_file_name = sys.argv[1]
12 |
13 | veri = VeriDataset(v_file_name)
14 | extract_feats_from_chips(veri, out_file_name)
15 |
--------------------------------------------------------------------------------
/pelops/analysis/recomputeCorpus.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pelops.analysis.unsorted.recompute.compute import do_training"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "tbld = '/local_data/dgrossman/tensorboard_logs'\n",
19 | "mcfs = '/local_data/dgrossman/model_save_dir/dg_carsweights.{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5'\n",
20 | "batch_size=32\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "training_basepath = '/local_data/dgrossman/keras/make_model/train'\n",
30 | "validation_basepath = '/local_data/dgrossman/keras/make_model/validate'\n",
31 | "\n",
32 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.model.json'\n",
33 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-make_model.weights.hdf5'\n",
34 | "\n",
35 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "training_basepath = '/local_data/dgrossman/keras/color/train'\n",
45 | "validation_basepath = '/local_data/dgrossman/keras/color/validate'\n",
46 | "\n",
47 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.model.json'\n",
48 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color.weights.hdf5'\n",
49 | "\n",
50 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "training_basepath = '/local_data/dgrossman/keras/color_body_type/train'\n",
60 | "validation_basepath = '/local_data/dgrossman/keras/color_body_type/validate'\n",
61 | "\n",
62 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.model2.json'\n",
63 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-color_body_type.weights2.hdf5'\n",
64 | "\n",
65 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "training_basepath = '/local_data/dgrossman/keras/body_type/train'\n",
75 | "validation_basepath = '/local_data/dgrossman/keras/body_type/validate'\n",
76 | "\n",
77 | "model_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.model.json'\n",
78 | "weights_output_file = '/local_data/dgrossman/model_save_dir/dgcars_resenet-body_type.weights.hdf5'\n",
79 | "\n",
80 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "training_basepath = '/local_data/teams/pelops/compcars_keras/make_model/train'\n",
90 | "validation_basepath = '/local_data/teams/pelops/compcars_keras/make_model/test'\n",
91 | "\n",
92 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.model.json'\n",
93 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-make_model.weights.hdf5'\n",
94 | "\n",
95 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "training_basepath = '/local_data/dgrossman/compcars/colors/train'\n",
105 | "validation_basepath = '/local_data/dgrossman/compcars/colors/test'\n",
106 | "\n",
107 | "model_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.model.json'\n",
108 | "weights_output_file = '/local_data/dgrossman/model_save_dir/compcars_resenet-colors.weights.hdf5'\n",
109 | "\n",
110 | "do_training(training_basepath,validation_basepath, model_output_file, weights_output_file, tbld, mcfs, 32)"
111 | ]
112 | }
113 | ],
114 | "metadata": {
115 | "kernelspec": {
116 | "display_name": "Python 3",
117 | "language": "python",
118 | "name": "python3"
119 | },
120 | "language_info": {
121 | "codemirror_mode": {
122 | "name": "ipython",
123 | "version": 3
124 | },
125 | "file_extension": ".py",
126 | "mimetype": "text/x-python",
127 | "name": "python",
128 | "nbconvert_exporter": "python",
129 | "pygments_lexer": "ipython3",
130 | "version": "3.5.2"
131 | }
132 | },
133 | "nbformat": 4,
134 | "nbformat_minor": 2
135 | }
136 |
--------------------------------------------------------------------------------
/pelops/analysis/saveExtractFeatsFromChips.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pelops.datasets.veri import VeriDataset\n",
10 | "from pelops.analysis.unsorted.recompute.extract_feats_from_chips import extract_feats_from_chips\n",
11 | "import pelops.utils as utils\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "veri = VeriDataset('/local_data/dgrossman/newVeRi',set_type=utils.SetType.TRAIN.value)\n",
21 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TRAIN')"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "veri = VeriDataset('/local_data/dgrossman/newVeRi',set_type=utils.SetType.TEST.value)\n",
31 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TEST')"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": []
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "veri = VeriDataset('/local_data/dgrossman/tinyVeRi',set_type=utils.SetType.TRAIN.value)\n",
48 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TINY_TRAIN')"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "veri = VeriDataset('/local_data/dgrossman/tinyVeRi',set_type=utils.SetType.TEST.value)\n",
58 | "extract_feats_from_chips(veri, '/local_data/dgrossman/image_NEW_TINY_TEST')"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": []
67 | }
68 | ],
69 | "metadata": {
70 | "anaconda-cloud": {},
71 | "kernelspec": {
72 | "display_name": "Python 3",
73 | "language": "python",
74 | "name": "python3"
75 | },
76 | "language_info": {
77 | "codemirror_mode": {
78 | "name": "ipython",
79 | "version": 3
80 | },
81 | "file_extension": ".py",
82 | "mimetype": "text/x-python",
83 | "name": "python",
84 | "nbconvert_exporter": "python",
85 | "pygments_lexer": "ipython3",
86 | "version": "3.5.2"
87 | }
88 | },
89 | "nbformat": 4,
90 | "nbformat_minor": 2
91 | }
92 |
--------------------------------------------------------------------------------
/pelops/analysis/splitDataset.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "from random import shuffle\n",
11 | "import glob\n",
12 | "import shutil\n",
13 | "import tqdm"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": null,
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "def make_dir(path):\n",
23 | " if not os.path.exists(path):\n",
24 | " os.makedirs(path)"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "def move_link(src,dst):\n",
34 | " real_src = os.path.realpath(src)\n",
35 | " #print(real_src,dst)\n",
36 | " os.symlink(real_src,dst)\n",
37 | " os.unlink(src)\n",
38 | " #os.rename(src,dst)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "def prep_datasets(srcpath,destpath,percent=0.3):\n",
48 | " \n",
49 | " if percent >1 or percent < 0:\n",
50 | " print ('bad')\n",
51 | " raise ValueError('percent needs to be in [0,1]')\n",
52 | " found = 0\n",
53 | " moved = 0\n",
54 | " for image_class_filepath in tqdm.tqdm(glob.glob(os.path.join(srcpath, '*'))):\n",
55 | " \n",
56 | " if os.path.isdir(image_class_filepath):\n",
57 | " image_class_num = int(os.path.basename(image_class_filepath))\n",
58 | " \n",
59 | " directory_name = os.path.join(destpath, '{}'.format(image_class_num))\n",
60 | " #print(directory_name)\n",
61 | " make_dir(directory_name)\n",
62 | " \n",
63 | " dir_contents = list()\n",
64 | " \n",
65 | " for filename in glob.glob(os.path.join(image_class_filepath, '*')):\n",
66 | " found+=1\n",
67 | " dir_contents.append(filename)\n",
68 | " \n",
69 | " \n",
70 | " shuffle(dir_contents)\n",
71 | " threshold = int (percent * len(dir_contents))\n",
72 | " mixed = dir_contents[:threshold]\n",
73 | " for filename in mixed:\n",
74 | " moved +=1\n",
75 | " #print ('filename:',os.path.basename(filename))\n",
76 | " src = os.path.join(srcpath,'{}'.format(image_class_num),filename)\n",
77 | " #print(directory_name,filename)\n",
78 | " dst = os.path.join(directory_name,os.path.basename(filename))\n",
79 | " #print('src:{0}\\ndst:{1}'.format(src,dst))\n",
80 | " move_link(src,dst)\n",
81 | " print('total:',found,'moved:',moved,'remains:',found-moved)"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "train = '/local_data/dgrossman/keras/color/train'\n",
91 | "test = '/local_data/dgrossman/keras/color/test'\n",
92 | "validate = '/local_data/dgrossman/keras/color/validate'"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "prep_datasets(train,test,0.3)"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "prep_datasets(test,validate,0.3)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": []
119 | }
120 | ],
121 | "metadata": {
122 | "kernelspec": {
123 | "display_name": "Python 3",
124 | "language": "python",
125 | "name": "python3"
126 | },
127 | "language_info": {
128 | "codemirror_mode": {
129 | "name": "ipython",
130 | "version": 3
131 | },
132 | "file_extension": ".py",
133 | "mimetype": "text/x-python",
134 | "name": "python",
135 | "nbconvert_exporter": "python",
136 | "pygments_lexer": "ipython3",
137 | "version": "3.5.2"
138 | }
139 | },
140 | "nbformat": 4,
141 | "nbformat_minor": 2
142 | }
143 |
--------------------------------------------------------------------------------
/pelops/analysis/test_analysis.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pelops.analysis import analysis
3 |
4 |
5 | class experimentGen():
6 | def __init__(self):
7 | self.fd = featureData()
8 | self.experiment = list()
9 | c1 = ['a', 'b', 'c', 'd']
10 | c2 = ['a', 'e', 'f', 'g']
11 | cam1 = list()
12 | cam2 = list()
13 |
14 | for c in c1:
15 | cam1.append(self.fd.getchip(c))
16 |
17 | for c in c2:
18 | cam2.append(self.fd.getchip(c))
19 |
20 | self.experiment.append(cam1)
21 | self.experiment.append(cam2)
22 |
23 | def generate(self):
24 | return self.experiment
25 |
26 |
27 | class chip():
28 | def __init__(self, x):
29 | self.car_id = x[0]
30 | self.feature = x[1]
31 |
32 |
33 | class featureData():
34 | def __init__(self):
35 | self.data = list()
36 |
37 | fun = [('a', [1, 2, 3, 4, 5, 6, 7]),
38 | ('b', [10, 20, 30, 40, 11, 9, 2.7]),
39 | ('c', [100, 20, 30, 40, 11, 9, 2.7]),
40 | ('d', [10, 200, 30, 40, 11, 9, 2.7]),
41 | ('e', [10, 20, 300, 40, 11, 9, 2.7]),
42 | ('f', [10, 20, 30, 400, 11, 9, 2.7]),
43 | ('g', [10, 20, 30, 40, 110, 9, 2.7]),
44 | ('h', [10, 20, 30, 40, 11, 90, 2.7]),
45 | ('i', [10, 20, 30, 40, 11, 9, 27.0])]
46 | for f in fun:
47 | self.data.append(chip(f))
48 |
49 | def get_feats_for_chip(self, chip):
50 | for d in self.data:
51 | if d.car_id == chip.car_id:
52 | return d.feature
53 |
54 | def getchip(self, id):
55 | for d in self.data:
56 | if d.car_id == id:
57 | return d
58 |
59 | # test the comparisons
60 |
61 |
62 | def test_cosine():
63 | a = [1, 2, 3, 4, 5, 6, 7]
64 | b = [10, 20, 30, 40, 11, 9, 2.7]
65 | out = analysis.comp_cosine(a, b)
66 | assert(abs(out - 0.63837193721375185) < 0.0000001)
67 |
68 |
69 | def test_euclidean():
70 | a = [1, 2, 3, 4, 5, 6, 7]
71 | b = [10, 20, 30, 40, 11, 9, 2.7]
72 | out = analysis.comp_euclid(a, b)
73 | assert(abs(out - 49.93485756463114) < 0.0000001)
74 |
75 | # test the matching works correctly
76 |
77 |
78 | def test_is_correct_match():
79 | fd = featureData()
80 |
81 | c1 = ['a', 'b', 'c', 'd']
82 | c2 = ['a', 'e', 'f', 'g']
83 | cam1 = list()
84 | cam2 = list()
85 |
86 | for c in c1:
87 | cam1.append(fd.getchip(c))
88 |
89 | for c in c2:
90 | cam2.append(fd.getchip(c))
91 |
92 | out = analysis.is_correct_match(fd, cam1, cam2)
93 | assert (out == 0)
94 |
95 |
96 | def test_pre_cmc():
97 | eg = experimentGen()
98 | fd = featureData()
99 | keys, values = analysis.pre_cmc(fd, eg, EXPPERCMC=10)
100 | assert values[0] == 1.0
101 |
102 |
103 | #test the statistics are being generated correctly
104 | def test_make_cmc_stats():
105 | eg = experimentGen()
106 | fd = featureData()
107 | experimentHolder = analysis.repeat_pre_cmc(fd, eg, NUMCMC=10, EXPPERCMC=10)
108 | stats, gdata = analysis.make_cmc_stats(experimentHolder, 4)
109 |
110 | for x in range(len(gdata[0])):
111 | assert ( gdata[1][x] ==gdata[2][x] == gdata[0][x])
112 |
--------------------------------------------------------------------------------
/pelops/analysis/unsorted/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/unsorted/__init__.py
--------------------------------------------------------------------------------
/pelops/analysis/unsorted/makeH5pyFile.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "cd '/local_data/dgrossman/VeRi/'"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import h5py\n",
19 | "import json\n",
20 | "import numpy as np\n",
21 | "\n",
22 | "def makeJsonList(fileName):\n",
23 | " retval = list()\n",
24 | " with open(fileName,'r') as f:\n",
25 | " for line in f:\n",
26 | " line = line.strip()\n",
27 | " line = json.loads(line)\n",
28 | " retval.append(line)\n",
29 | " return retval\n",
30 | "\n",
31 | "def extractColumn(colName,jsonList,t):\n",
32 | " retval = list()\n",
33 | " for line in jsonList:\n",
34 | " if t == str:\n",
35 | " retval.append(str(line[colName]).encode('ascii','ignore'))\n",
36 | " if t == int:\n",
37 | " retval.append(int(line[colName]))\n",
38 | " if t == float:\n",
39 | " for element in line[colName]:\n",
40 | " retval.append(float(element))\n",
41 | " return retval\n",
42 | "\n",
43 | "def make5file(file5Name, names, jsonList):\n",
44 | " with h5py.File(file5Name,'w') as f: \n",
45 | " for o, i, t, t2 in names:\n",
46 | " print(o,i)\n",
47 | " temp = extractColumn(o,jsonList,t)\n",
48 | " f.create_dataset(i,data=temp,dtype=t2)\n",
49 | "\n",
50 | "def main(inFileName,outFileName):\n",
51 | " jsonList = makeJsonList(inFileName)\n",
52 | " f = np.dtype('float')\n",
53 | " c = h5py.special_dtype(vlen=bytes)\n",
54 | " names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]\n",
55 | " make5file(outFileNAme,names,jsonList)\n",
56 | " \n",
57 | "if __name__ == '__main__':\n",
58 | " main(sys.argv[1],sys.argv[2])"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "main('./test_uniqfile.json','./test_uniqfile.p5')"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "jsonList[0].keys()"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "f = np.dtype('float')\n",
86 | "c = h5py.special_dtype(vlen=bytes)\n",
87 | "names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "make5file('1test_features',names,jsonList)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": []
105 | }
106 | ],
107 | "metadata": {
108 | "anaconda-cloud": {},
109 | "kernelspec": {
110 | "display_name": "Python 3",
111 | "language": "python",
112 | "name": "python3"
113 | },
114 | "language_info": {
115 | "codemirror_mode": {
116 | "name": "ipython",
117 | "version": 3
118 | },
119 | "file_extension": ".py",
120 | "mimetype": "text/x-python",
121 | "name": "python",
122 | "nbconvert_exporter": "python",
123 | "pygments_lexer": "ipython3",
124 | "version": "3.5.2"
125 | }
126 | },
127 | "nbformat": 4,
128 | "nbformat_minor": 1
129 | }
130 |
--------------------------------------------------------------------------------
/pelops/analysis/unsorted/recompute/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/analysis/unsorted/recompute/__init__.py
--------------------------------------------------------------------------------
/pelops/analysis/unsorted/recompute/extract_feats_from_chips.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from keras.applications.resnet50 import preprocess_input
3 | from keras.applications.resnet50 import ResNet50
4 | from keras.models import Model
5 | from keras.models import model_from_json
6 | from keras.preprocessing import image
7 |
8 | from pelops.datasets.featuredataset import FeatureDataset
9 |
10 |
11 | def load_image(img_path, resizex=224, resizey=224):
12 | data = image.load_img(img_path, target_size=(resizex, resizey))
13 | x = image.img_to_array(data)
14 | x = np.expand_dims(x, axis=0)
15 | x = preprocess_input(x)
16 | return x
17 |
18 |
19 | def save_model_workaround(model, model_file, weight_file):
20 | # serialize model to JSON
21 | model_json = model.to_json()
22 | with open(model_file, 'w') as json_file:
23 | json_file.write(model_json)
24 | # serialize weights to HDF5
25 | model.save_weights(weight_file)
26 |
27 |
28 | def load_model_workaround(model_file, weight_file):
29 | # load json and create model
30 | json_file = open(model_file, 'r')
31 | loaded_model_json = json_file.read()
32 | json_file.close()
33 | loaded_model = model_from_json(loaded_model_json)
34 | # load weights into new model
35 | loaded_model.load_weights(weight_file)
36 | return loaded_model
37 |
38 | # load the imagenet networks
39 |
40 |
41 | def get_models(model_file, weight_file, layer):
42 | # include_top needs to be True for this to work
43 | base_model = load_model_workaround(model_file, weight_file)
44 | output_layer = base_model.get_layer(layer)
45 | output_layer = output_layer.output
46 | model = Model(input=base_model.input, output=output_layer)
47 | # output=base_model.get_layer('flatten_1').output)
48 | return (model, base_model)
49 |
50 | # return feature vector for a given img, and model
51 |
52 |
53 | def image_features(img, model):
54 | features = model.predict(img)
55 | return features
56 |
57 |
58 | def extract_feats_from_chips(chipdataset, output_fname, model_file, weight_file, layer):
59 | model, base_model = get_models(model_file, weight_file, layer)
60 |
61 | features = np.zeros((len(chipdataset), 2048), dtype=np.float16)
62 | chips = []
63 | chip_keys = []
64 | for index, (chip_key, chip) in enumerate(chipdataset.chips.items()):
65 | chip_keys.append(chip_key)
66 | chips.append(chip)
67 | img_path = chip.filepath
68 | img_data = load_image(img_path)
69 | features[index] = image_features(img_data, model)
70 |
71 | FeatureDataset.save(output_fname, chip_keys, chips, features)
72 | return True
73 |
--------------------------------------------------------------------------------
/pelops/const.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | class _Const:
4 | """ Create a constant class
5 | """
6 | class ConstError(TypeError): pass
7 |
8 | def __setattr__(self, name, value):
9 | if name in self.__dict__:
10 | raise self.ConstError("Cannot rebind constant {}".format(name))
11 | self. __dict__[name] = value
12 |
13 | def __delattr__(self, name):
14 | if name in self.__dict__:
15 | raise self.ConstError("Cannot unbind constant {}".format(name))
16 | raise NameError(name)
17 |
18 | sys.modules[__name__] = _Const()
--------------------------------------------------------------------------------
/pelops/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/datasets/__init__.py
--------------------------------------------------------------------------------
/pelops/datasets/chip.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import collections
3 | import os
4 | import pelops.utils as utils
5 |
6 | # ================================================================================
7 | # Chip Factory
8 | # ================================================================================
9 |
10 |
11 | class DatasetFactory(object):
12 | @staticmethod
13 | def create_dataset(dataset_type, dataset_path, set_type=None):
14 | for cls in ChipDataset.__subclasses__():
15 | if cls.check_dataset_type(dataset_type):
16 | return cls(dataset_path, set_type)
17 |
18 | # ================================================================================
19 | # Chip Dataset
20 | # ================================================================================
21 |
22 |
23 | class ChipDataset(metaclass = abc.ABCMeta):
24 | def __init__(self, dataset_path, set_type=None):
25 | self.dataset_path = dataset_path
26 | self.__set_set_type(set_type)
27 | self.chips = dict()
28 | self.chips_by_cam_id = None
29 | self.chips_by_car_id = None
30 |
31 | def __set_set_type(self, set_type):
32 | self.set_type = None
33 |
34 | # The Default ALL
35 | if set_type is None:
36 | self.set_type = utils.SetType.ALL
37 |
38 | # If passed a SetType
39 | if isinstance(set_type, utils.SetType):
40 | self.set_type = set_type
41 |
42 | # If passed a string
43 | if isinstance(set_type, str):
44 | set_type = set_type.lower()
45 | for st in utils.SetType:
46 | if set_type == st.value:
47 | self.set_type = st
48 |
49 | if self.set_type is None:
50 | raise ValueError("set_type is not a valid string or SetType enum")
51 |
52 |
53 | @classmethod
54 | def check_dataset_type(self, dataset_type):
55 | return dataset_type == self.__name__
56 |
57 | def get_all_chips_by_car_id(self, car_id):
58 | if self.chips_by_car_id is None:
59 | self.chips_by_car_id = collections.defaultdict(list)
60 | for chip_key, chip in self.chips.items():
61 | self.chips_by_car_id[chip.car_id].append(chip_key)
62 | return [self.chips[chip_key] for chip_key in self.chips_by_car_id[car_id]]
63 |
64 | def get_all_chips_by_car_id_camera_id(self, car_id, cam_id):
65 | output = []
66 | for chip in self.get_all_chips_by_car_id(car_id):
67 | if chip.cam_id == cam_id:
68 | output.append(chip)
69 | return output
70 |
71 | def get_all_chips_by_cam_id(self, cam_id):
72 | if self.chips_by_cam_id is None:
73 | self.chips_by_cam_id = collections.defaultdict(list)
74 | for chip_key, chip in self.chips.items():
75 | self.chips_by_cam_id[chip.cam_id].append(chip_key)
76 |
77 | return [self.chips[chip_key] for chip_key in self.chips_by_cam_id[cam_id]]
78 |
79 | def get_distinct_cams_by_car_id(self, car_id):
80 | # TODO: Look at performance
81 | return self.get_distinct_cams_per_car()[car_id]
82 |
83 | def get_distinct_cams_per_car(self):
84 | # TODO: Look at performance
85 | list_of_cameras_per_car = collections.defaultdict(set)
86 | for chip in self.chips.values():
87 | list_of_cameras_per_car[chip.car_id].add(chip.cam_id)
88 | return list_of_cameras_per_car
89 |
90 | def get_all_cam_ids(self):
91 | return list(set(chip.cam_id for chip in self.chips.values()))
92 |
93 | def get_all_car_ids(self):
94 | return list(set(chip.car_id for chip in self.chips.values()))
95 |
96 | def __iter__(self):
97 | for chip in self.chips.values():
98 | yield chip
99 | raise StopIteration()
100 |
101 | def __len__(self):
102 | return len(self.chips)
103 |
104 | # ================================================================================
105 | # Chip Base
106 | # ================================================================================
107 |
108 |
109 | # chip_id is the filepath
110 | Chip = collections.namedtuple("Chip",
111 | ["filepath",
112 | "car_id",
113 | "cam_id",
114 | "time",
115 | "misc"])
116 |
--------------------------------------------------------------------------------
/pelops/datasets/compcar.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import os
3 | import scipy.io
4 |
5 | import pelops.datasets.chip as chip
6 | import pelops.utils as utils
7 |
8 |
9 | class CompcarDataset(chip.ChipDataset):
10 | filenames = collections.namedtuple(
11 | "filenames",
12 | [
13 | "image_dir",
14 | "name_train",
15 | "name_test",
16 | "model_mat",
17 | "color_mat",
18 | ]
19 | )
20 | filepaths = filenames (
21 | "image",
22 | "train_surveillance.txt",
23 | "test_surveillance.txt",
24 | "sv_make_model_name.mat",
25 | "color_list.mat",
26 | )
27 |
28 | def __init__(self, dataset_path, set_type=None):
29 | super().__init__(dataset_path, set_type)
30 | self.__set_filepaths() # set self.__filepaths
31 | self.__extract_color_labels() # set self.__color_map
32 | self.__extract_model_labels() # set self.__model_map
33 | self.__set_chips()
34 |
35 | def __set_filepaths(self):
36 | self.__filepaths = self.filenames(
37 | os.path.join(self.dataset_path, CompcarDataset.filepaths.image_dir),
38 | os.path.join(self.dataset_path, CompcarDataset.filepaths.name_train),
39 | os.path.join(self.dataset_path, CompcarDataset.filepaths.name_test),
40 | os.path.join(self.dataset_path, CompcarDataset.filepaths.model_mat),
41 | os.path.join(self.dataset_path, CompcarDataset.filepaths.color_mat),
42 | )
43 |
44 | def __extract_color_labels(self):
45 | self.__color_map = {}
46 |
47 | # Map color_id to its respective name
48 | color_map = {
49 | -1: None,
50 | 0: "black",
51 | 1: "white",
52 | 2: "red",
53 | 3: "yellow",
54 | 4: "blue",
55 | 5: "green",
56 | 6: "purple",
57 | 7: "brown",
58 | 8: "champagne",
59 | 9: "silver",
60 | }
61 |
62 | # Load the matrix of colors
63 | color_matrix = scipy.io.loadmat(
64 | self.__filepaths.color_mat)["color_list"]
65 |
66 | # File is an length 1 array, color_num is a 1x1 matrix
67 | for file_array, color_num_matrix in color_matrix:
68 | filepath = file_array[0]
69 | color_num = int(color_num_matrix[0][0])
70 | self.__color_map[filepath] = color_map[color_num]
71 |
72 | def __extract_model_labels(self):
73 | self.__model_map = {}
74 |
75 | model_matrix = scipy.io.loadmat(
76 | self.__filepaths.model_mat)["sv_make_model_name"]
77 | for car_id, model_matrix in enumerate(model_matrix):
78 | # correct car_id
79 | car_id = int(car_id) + 1
80 | # make contains only the make of the car and occasionally contains whitespaces after
81 | make = model_matrix[0][0].strip()
82 | # correct instance when make is misspelled that affects the model
83 | if make == "Zoyte":
84 | make = "Zotye"
85 | # model sometimes contains both make and model, so ensure that model only contains model
86 | make_and_model = model_matrix[1][0]
87 | model = make_and_model.replace(make, "").strip()
88 | # model_id contains the model id used in the web
89 | model_id = int(model_matrix[2][0][0])
90 | # correct instance when make is misspelled
91 | if make == "BWM":
92 | make = "BMW"
93 | self.__model_map[car_id] = [make, model, model_id]
94 |
95 | def __set_chips(self):
96 | # identify all the chips, default query to all
97 | all_names_filepaths = {
98 | utils.SetType.ALL: [self.__filepaths.name_test, self.__filepaths.name_train],
99 | utils.SetType.TEST: [self.__filepaths.name_test],
100 | utils.SetType.TRAIN: [self.__filepaths.name_train],
101 | }.get(self.set_type, [self.__filepaths.name_test, self.__filepaths.name_train])
102 | # create chip objects based on the names listed in the files
103 | for name_filepath in all_names_filepaths:
104 | for name in open(name_filepath):
105 | current_chip = self.__create_chip(self.__filepaths.image_dir, name.strip())
106 | self.chips[current_chip.filepath] = current_chip
107 |
108 | def __create_chip(self, img_dir, img_name):
109 | splitter = img_name.split("/")
110 | misc = dict()
111 |
112 | filepath = os.path.join(img_dir, img_name)
113 | car_id = int(splitter[0])
114 | cam_id = None
115 | time = None
116 | misc["color"] = self.__color_map[img_name]
117 | make, model, model_id = self.__model_map[car_id]
118 | misc["make"] = make
119 | misc["model"] = model
120 | misc["model_id"] = model_id
121 |
122 | return chip.Chip(filepath, car_id, cam_id, time, misc)
123 |
--------------------------------------------------------------------------------
/pelops/datasets/dgcars.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import json
3 | import os.path
4 |
5 | import pelops.datasets.chip as chip
6 | import pelops.utils as utils
7 |
8 |
9 | class DGCarsDataset(chip.ChipDataset):
10 | filenames = collections.namedtuple(
11 | "filenames",
12 | [
13 | "all_list",
14 | "train_list",
15 | "test_list",
16 | ]
17 | )
18 | filepaths = filenames(
19 | "allFiles",
20 | "training",
21 | "testing",
22 | )
23 |
24 | def __init__(self, dataset_path, set_type=None):
25 | super().__init__(dataset_path, set_type)
26 | self.__set_filepaths() # set self.__filepaths
27 | self.__set_chips()
28 |
29 | def __set_filepaths(self):
30 | self.__filepaths = self.filenames(
31 | os.path.join(self.dataset_path, DGCarsDataset.filepaths.all_list),
32 | os.path.join(self.dataset_path, DGCarsDataset.filepaths.train_list),
33 | os.path.join(self.dataset_path, DGCarsDataset.filepaths.test_list),
34 | )
35 |
36 | def __set_chips(self):
37 | # identify all the chips, default query to all
38 | name_filepath = {
39 | utils.SetType.ALL: self.__filepaths.all_list,
40 | utils.SetType.TEST: self.__filepaths.test_list,
41 | utils.SetType.TRAIN: self.__filepaths.train_list,
42 | }.get(self.set_type, self.__filepaths.all_list)
43 |
44 | # create chip objects based on the names listed in the files
45 | for dg_chip in utils.read_json(name_filepath):
46 | filepath = os.path.normpath(os.path.join(self.dataset_path, dg_chip["filename"]))
47 | car_id = None
48 | cam_id = None
49 | time = None
50 | misc = dg_chip
51 | current_chip = chip.Chip(filepath, car_id, cam_id, time, misc)
52 |
53 | self.chips[filepath] = current_chip
54 |
--------------------------------------------------------------------------------
/pelops/datasets/featuredataset.py:
--------------------------------------------------------------------------------
1 | import json
2 | import datetime
3 | import h5py
4 | import numpy as np
5 | from pelops.datasets.chip import ChipDataset, Chip
6 |
7 | class FeatureDataset(ChipDataset):
8 | def __init__(self, filename):
9 | super().__init__(filename)
10 | self.chip_index_lookup, self.chips, self.feats = self.load(filename)
11 | self.filename_lookup = {}
12 | for chip_key, chip in self.chips.items():
13 | self.filename_lookup[chip.filepath] = chip_key
14 |
15 | def get_feats_for_chip(self, chip):
16 | chip_key = self.filename_lookup[chip.filepath]
17 | return self.feats[self.chip_index_lookup[chip_key]]
18 |
19 | @staticmethod
20 | def load(filename):
21 | with h5py.File(filename) as fIn:
22 | feats = np.array(fIn['feats'])
23 |
24 | num_items = fIn['feats'].shape[0]
25 | # Hack to deal with performance of extracting single items
26 | local_hdf5 = {}
27 | local_hdf5['chip_keys'] = np.array(fIn['chip_keys'])
28 | local_hdf5['filepath'] = np.array(fIn['filepath'])
29 | local_hdf5['car_id'] = np.array(fIn['car_id'])
30 | local_hdf5['cam_id'] = np.array(fIn['cam_id'])
31 | local_hdf5['time'] = np.array(fIn['time'])
32 | local_hdf5['misc'] = np.array(fIn['misc'])
33 |
34 | chips = {}
35 | chip_index_lookup = {}
36 | for i in range(num_items):
37 | filepath = local_hdf5['filepath'][i].decode('utf-8')
38 | car_id = local_hdf5['car_id'][i]
39 | cam_id = local_hdf5['cam_id'][i]
40 | timestamp = local_hdf5['time'][i]
41 | if isinstance(timestamp, str) or isinstance(timestamp, bytes):
42 | # Catch the case where we have encoded time as a string timestamp
43 | timestamp = datetime.datetime.fromtimestamp(float(timestamp))
44 | misc = json.loads(local_hdf5['misc'][i].decode('utf-8'))
45 | chip_key = local_hdf5['chip_keys'][i]
46 | if isinstance(chip_key, bytes):
47 | chip_key = chip_key.decode('utf-8')
48 | chip_index_lookup[chip_key] = i
49 | chips[chip_key] = Chip(filepath, car_id, cam_id, timestamp, misc)
50 | return chip_index_lookup, chips, feats
51 |
52 | @staticmethod
53 | def _save_field(fOut, field_example, field_name, value_array):
54 | if isinstance(field_example, datetime.datetime):
55 | # Encode time as a string seconds since epoch
56 | times = np.array([str(val.timestamp()).encode('ascii', 'ignore') for val in value_array])
57 | fOut.create_dataset(field_name,
58 | data=times,
59 | dtype=h5py.special_dtype(vlen=bytes))
60 | elif isinstance(field_example, str):
61 | output_vals = [val.encode('ascii', 'ignore') for val in value_array]
62 | fOut.create_dataset(field_name,
63 | data= output_vals,
64 | dtype=h5py.special_dtype(vlen=bytes))
65 | elif isinstance(field_example, dict):
66 | output_vals = [json.dumps(val).encode('ascii', 'ignore') for val in value_array]
67 | fOut.create_dataset(field_name,
68 | data=output_vals,
69 | dtype=h5py.special_dtype(vlen=bytes))
70 | else:
71 | fOut.create_dataset(field_name, data=value_array)
72 |
73 | @staticmethod
74 | def save(filename, chip_keys, chips, features):
75 | """ Save a feature dataset
76 | """
77 | with h5py.File(filename, 'w') as fOut:
78 | fOut.create_dataset('feats', data=features)
79 |
80 | FeatureDataset._save_field(fOut,
81 | chip_keys[0],
82 | 'chip_keys',
83 | chip_keys)
84 |
85 | first_chip = chips[0]
86 | fields = first_chip._fields
87 | for field in fields:
88 | field_example = getattr(first_chip, field)
89 | output_data = [getattr(chip, field) for chip in chips]
90 | FeatureDataset._save_field(fOut, field_example, field, output_data)
91 |
--------------------------------------------------------------------------------
/pelops/datasets/slice.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import io
3 | import itertools
4 | import os
5 | import re
6 | import sys
7 | from datetime import datetime
8 |
9 | import pelops.datasets.chip as chip
10 |
11 | # ================================================================================
12 | # SLiCE Test Dataset (labeled by STR)
13 | # ================================================================================
14 |
15 |
16 | class SliceDataset(chip.ChipDataset):
17 |
18 | def __init__(self, dataset_path, set_type=None, debug=False):
19 | super().__init__(dataset_path, set_type)
20 | self.__noise_seq = 0
21 | self.__debug = debug
22 | self.__set_chips()
23 |
24 | @staticmethod
25 | def __decode_truth_file(truth_file):
26 | """The labels for the STR processed SLiCE chips are in a 'truth.txt' file which this function parses."""
27 |
28 | with open(truth_file) as truth_hdl:
29 | truth_text = truth_hdl.read()
30 | for char in [' ', '%']:
31 | truth_text = truth_text.replace(char, '')
32 | truth_fobj = io.StringIO(truth_text)
33 | return {(int(dct['obSetIdx']), int(dct['chipIdx'])): int(dct['targetID'])
34 | for dct in csv.DictReader(truth_fobj)}
35 |
36 | @staticmethod
37 | def index_chip(file_path):
38 | """Parses an arbitrary file path and identifies paths of valid image chips.
39 | Returns None for non-chip file paths."""
40 |
41 | # We have to handle two cases:
42 | #
43 | # 1) The STR San Antonio DOT chips, which have the form:
44 | # ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png
45 | #
46 | # 2) The SLICE chips, which have the form:
47 | # ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg
48 | #
49 | # The epoch on the SLICE chips is per chip, whereas it is per
50 | # observation set for the STR chips. The SLICE chip file names have the
51 | # follow information after the ObSet and chip id:
52 | #
53 | # Obset-ChipID-label-time_unused
54 |
55 | # Split the file path into pieces to extract the information from it
56 | file_path = os.path.normpath(file_path)
57 | directory, img_type, file = file_path.split(os.sep)[-3:]
58 |
59 | # Sometimes we have masks, reject those
60 | if img_type != "images":
61 | return
62 |
63 | # Sometimes we get the truth.txt file, which we do not want
64 | if file == "truth.txt":
65 | return
66 |
67 | # Get the observation set, time, and name from the directory
68 | obset_str, epoch_str, *name = directory.split("_")
69 | name = "_".join(name)
70 |
71 | # We slice off the first part of the string that is non-numeric, where
72 | # 5 = len("ObSet")
73 | obset_int = int(obset_str[5:])
74 |
75 | # Get the chip ID, and perhaps more, from the name of the file
76 | _, chip_id_str, *misc = file.split("-")
77 |
78 | # SLICE chips have more information
79 | if misc:
80 | chip_id_int = int(chip_id_str)
81 | _, time = misc
82 | # Remove file extension
83 | time, _ = os.path.splitext(time)
84 | # Remove _1 at end of each time and convert to microseconds
85 | time = time[:-2] + "000"
86 | # Get milliseconds since the unix epoch
87 | epoch = datetime.utcfromtimestamp(0)
88 | dt = datetime.strptime(time, "%Y%m%d_%H%M%S.%f")
89 | epoch_str = str(int((dt - epoch).total_seconds()))
90 | else:
91 | chip_id, _ = os.path.splitext(chip_id_str)
92 | chip_id_int = int(chip_id)
93 |
94 | idx_key = (obset_int, chip_id_int)
95 | idx_val = {
96 | 'file': file_path,
97 | 'meta': {
98 | 'obSetName': name,
99 | 'epoch': epoch_str,
100 | },
101 | }
102 | return idx_key, idx_val
103 |
104 | def __create_chip(self, file_info, truth_value):
105 | """Converts parsing / indexing results into a pelops.datasets.chip.Chip object"""
106 | if truth_value == 0:
107 | self.__noise_seq += 1
108 | car_id = 'unk-{:09d}'.format(self.__noise_seq)
109 | else:
110 | car_id = 'tgt-{:09d}'.format(truth_value)
111 |
112 | chip_params = [
113 | file_info['file'],
114 | car_id,
115 | file_info['meta']['obSetName'],
116 | file_info['meta']['epoch'],
117 | file_info['meta']
118 | ]
119 | return chip.Chip(*chip_params)
120 |
121 | def __set_chips(self):
122 | """Sets the chips dict of the superclass to contain chip files for the dataset."""
123 |
124 | # Scan filesystem
125 | root_files = [root_file for root_file in os.walk(self.dataset_path)]
126 |
127 | # Decode truth.txt file
128 | truth_files = [os.path.join(walked[0], 'truth.txt') for walked in root_files if 'truth.txt' in walked[2]]
129 | if len(truth_files) == 0:
130 | raise IOError("No truth file found.")
131 | elif len(truth_files) > 1:
132 | raise IOError("Too many truth files available.")
133 |
134 | truth_data = self.__decode_truth_file(truth_files.pop())
135 | if len(truth_data) < 1:
136 | raise IOError("No truth loaded")
137 | if self.__debug:
138 | print("{} truth records loaded.".format(len(truth_data)))
139 |
140 | # Index all image chips
141 | file_paths = [[os.path.join(walked[0], wfile) for wfile in walked[2]] for walked in root_files]
142 | chip_idx = dict(filter(lambda t: t is not None, map(self.index_chip, itertools.chain(*file_paths))))
143 |
144 | if len(chip_idx) != len(truth_data):
145 | raise IOError("Number of truth records not equal to number of chips.")
146 | if self.__debug:
147 | print("{} image chips loaded.".format(len(chip_idx)))
148 |
149 | # Create and store chips
150 | self.chips = {meta['file']: self.__create_chip(meta, truth_data[idx]) for idx, meta in chip_idx.items()}
151 | if self.__debug:
152 | print("{} chip.Chips loaded.".format(len(self.chips)))
153 |
--------------------------------------------------------------------------------
/pelops/datasets/str.py:
--------------------------------------------------------------------------------
1 |
2 | import collections
3 | import os
4 |
5 | import pelops.datasets.chip as chip
6 | import pelops.utils as utils
7 |
8 | # ================================================================================
9 | # STR_SA Dataset
10 | # ================================================================================
11 |
12 |
13 | class StrDataset(chip.ChipDataset):
14 | # define paths to files and directories
15 | filenames = collections.namedtuple(
16 | "filenames",
17 | [
18 | "dir_all"
19 | ]
20 | )
21 | filepaths = filenames (
22 | "crossCameraMatches"
23 | )
24 |
25 | def __init__(self, dataset_path, set_type=None):
26 | super().__init__(dataset_path, set_type)
27 | self.__set_filepaths() # set self.__filepaths
28 | self.__set_chips()
29 | # STR does not differentiate the set type
30 |
31 | def __set_filepaths(self):
32 | self.__filepaths = StrDataset.filenames(
33 | os.path.join(self.dataset_path, StrDataset.filepaths.dir_all)
34 | )
35 |
36 | def __set_chips(self):
37 | directory = self.__filepaths.dir_all
38 | for file in os.listdir(directory):
39 | path = os.path.join(directory, file)
40 |
41 | # Only interested in certain files
42 | is_valid = os.path.isfile(path)
43 | is_png = path.endswith(".png")
44 | is_mask = "mask" in path
45 | if not is_valid or not is_png or is_mask:
46 | continue
47 |
48 | # Set all Chip variables
49 | car_id = get_sa_car_id(path)
50 | cam_id = get_sa_cam_id(path)
51 |
52 | time = None # No timestamp information
53 | misc = None # No miscellaneous information
54 |
55 | # Make chip
56 | current_chip = chip.Chip(
57 | path,
58 | car_id,
59 | cam_id,
60 | time,
61 | misc
62 | )
63 |
64 | self.chips[path] = current_chip
65 |
66 |
67 | def int_from_string(string, start_chars, int_len):
68 | # We only want to use the filename, not the directory names
69 | base_string = os.path.basename(string)
70 | loc = base_string.find(start_chars)
71 |
72 | # Not found
73 | if loc < 0:
74 | return None
75 |
76 | start = loc + len(start_chars)
77 | end = start + int_len
78 | str_num = base_string[start:end]
79 | return int(str_num)
80 |
81 |
82 | def get_sa_cam_id(string):
83 | return int_from_string(string, start_chars="_cam", int_len=2)
84 |
85 |
86 | def get_sa_car_id(string):
87 | return int_from_string(string, start_chars="match", int_len=5)
88 |
--------------------------------------------------------------------------------
/pelops/datasets/veri.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import datetime
3 | import os
4 | import xml.etree.ElementTree
5 |
6 | import pelops.datasets.chip as chip
7 | import pelops.utils as utils
8 |
9 | # ================================================================================
10 | # Veri Dataset
11 | # ================================================================================
12 |
13 |
14 | class VeriDataset(chip.ChipDataset):
15 | filenames = collections.namedtuple(
16 | "filenames",
17 | [
18 | "name_query",
19 | "name_test",
20 | "name_train",
21 | "dir_query",
22 | "dir_test",
23 | "dir_train",
24 | "list_color",
25 | "list_type",
26 | "ground_truths",
27 | "junk_images",
28 | "label_train"
29 | ]
30 | )
31 | filepaths = filenames(
32 | "name_query.txt",
33 | "name_test.txt",
34 | "name_train.txt",
35 | "image_query",
36 | "image_test",
37 | "image_train",
38 | "list_color.txt",
39 | "list_type.txt",
40 | "gt_image.txt",
41 | "jk_image.txt",
42 | "train_label.xml"
43 | )
44 |
45 | def __init__(self, dataset_path, set_type=None):
46 | super().__init__(dataset_path, set_type)
47 | self.__set_filepaths() # set self.__filepaths
48 | self.__color_type = {}
49 | if self.set_type is utils.SetType.ALL or self.set_type is utils.SetType.TRAIN:
50 | self.__build_metadata_dict()
51 | self.__set_chips()
52 |
53 | def __build_metadata_dict(self):
54 | """Extract car type and color from the label file."""
55 | try:
56 | root = xml.etree.ElementTree.parse(self.__filepaths.label_train).getroot()
57 | except ValueError as e:
58 | URL = "https://github.com/Lab41/pelops/issues/72"
59 | ERROR = (
60 | str(e) + "\n\n"
61 | "The label file 'train_label.xml' comes malformed from the\n"
62 | "source. The first line needs to be changed to:\n"
63 | "''\n"
64 | "if it is not already.\n"
65 | "See: " + URL
66 | )
67 | raise ValueError(ERROR)
68 |
69 | colors = {
70 | 1: "yellow", 2: "orange", 3: "green", 4: "gray", 5: "red",
71 | 6: "blue", 7: "white", 8: "golden", 9: "brown", 10: "black",
72 | }
73 | types = {
74 | 0: "unknown", 1: "sedan", 2: "suv", 3: "van", 4: "hatchback",
75 | 5: "mpv", 6: "pickup", 7: "bus", 8: "truck", 9: "estate",
76 | }
77 |
78 | # Version 1.0 of the VeRI data has a bug where several cars are labeled
79 | # as the illegal type 0:
80 | #
81 | # https://github.com/Lab41/pelops/issues/76
82 | #
83 | # These cars are actually SUVs (or, cross-overs) and hence should by
84 | # type 2.
85 | if root.attrib["Version"] == "1.0":
86 | types[0] = "suv"
87 | URL = "https://github.com/Lab41/pelops/issues/76"
88 | output = (
89 | "VeRI Version 1.0 found! Patching `typeID=0` to `typeID=2`.\n"
90 | "See: " + URL
91 | )
92 | print(output)
93 |
94 | self.__color_type = {}
95 | for child in root.iter("Item"):
96 | # Get the IDs from the XML node
97 | vehicle_id = child.attrib["vehicleID"]
98 | color = child.attrib["colorID"]
99 | body_type = child.attrib["typeID"]
100 |
101 | vehicle_id_int = int(vehicle_id)
102 | color_id = int(color)
103 | body_id = int(body_type)
104 | str_color = colors[color_id]
105 | str_body = types[body_id]
106 |
107 | self.__color_type[vehicle_id_int] = (str_color, str_body)
108 |
109 | def __set_filepaths(self):
110 | self.__filepaths = VeriDataset.filenames(
111 | os.path.join(self.dataset_path, VeriDataset.filepaths.name_query),
112 | os.path.join(self.dataset_path, VeriDataset.filepaths.name_test),
113 | os.path.join(self.dataset_path, VeriDataset.filepaths.name_train),
114 | os.path.join(self.dataset_path, VeriDataset.filepaths.dir_query),
115 | os.path.join(self.dataset_path, VeriDataset.filepaths.dir_test),
116 | os.path.join(self.dataset_path, VeriDataset.filepaths.dir_train),
117 | os.path.join(self.dataset_path, VeriDataset.filepaths.list_color),
118 | os.path.join(self.dataset_path, VeriDataset.filepaths.list_type),
119 | os.path.join(self.dataset_path, VeriDataset.filepaths.ground_truths),
120 | os.path.join(self.dataset_path, VeriDataset.filepaths.junk_images),
121 | os.path.join(self.dataset_path, VeriDataset.filepaths.label_train),
122 | )
123 |
124 | def __set_chips(self):
125 | # TODO: ignore images labeled as query, so we do not have to keep tabs for identical chips
126 | # identify all the chips
127 | all_names_filepaths = {
128 | utils.SetType.ALL: [self.__filepaths.name_query, self.__filepaths.name_test, self.__filepaths.name_train],
129 | utils.SetType.QUERY: [self.__filepaths.name_query],
130 | utils.SetType.TEST: [self.__filepaths.name_test],
131 | utils.SetType.TRAIN: [self.__filepaths.name_train],
132 | }.get(self.set_type)
133 | # create chip objects based on the names listed in the files
134 | for name_filepath in all_names_filepaths:
135 | if VeriDataset.filepaths.name_query in name_filepath:
136 | img_dir = self.__filepaths.dir_query
137 | elif VeriDataset.filepaths.name_test in name_filepath:
138 | img_dir = self.__filepaths.dir_test
139 | else: # VeriDataset.filepaths.name_train in filepath
140 | img_dir = self.__filepaths.dir_train
141 | for name in open(name_filepath):
142 | current_chip = self.__create_chip(img_dir, name.strip())
143 | self.chips[current_chip.filepath] = current_chip
144 |
145 | def __create_chip(self, img_dir, img_name):
146 | # information about the chip resides in the chip's name
147 | splitter = img_name.split("_")
148 | misc = {}
149 |
150 | filepath = os.path.join(img_dir, img_name)
151 | car_id = int(splitter[0])
152 | cam_id = int(utils.get_numeric(splitter[1]))
153 | time = datetime.datetime.fromtimestamp(int(splitter[2]))
154 | misc["binary"] = int(os.path.splitext(splitter[3])[0])
155 |
156 | color, vehicle_type = self.__color_type.get(car_id, (None, None))
157 | misc["color"] = color
158 | misc["vehicle_type"] = vehicle_type
159 |
160 | return chip.Chip(filepath, car_id, cam_id, time, misc)
161 |
--------------------------------------------------------------------------------
/pelops/etl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/etl/__init__.py
--------------------------------------------------------------------------------
/pelops/etl/computeMatrixCMC.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 | from collections import defaultdict
4 |
5 | from matplotlib import pyplot
6 |
7 |
8 | def makeTransDicts(reindexFile):
9 | reindex = open(reindexFile, 'r')
10 | file2num = dict()
11 | num2file = dict()
12 | index = 0
13 | for line in reindex:
14 | line = line.strip()
15 | file2num[line] = index
16 | num2file[index] = line
17 | index += 1
18 | return (file2num, num2file)
19 |
20 |
21 | def makeMatrix(matrixFilename, num2file, file2num, measure='cosine'):
22 |
23 | a = open(matrixFilename, 'r')
24 | lines = 0
25 | for line in a:
26 | lines += 1
27 | a.close()
28 |
29 | Matrix = [[0 for x in range(lines)] for y in range(lines)]
30 | matrixFile = open(matrixFilename, 'r')
31 | for line in matrixFile:
32 |
33 | line = line.strip()
34 | line = json.loads(line)
35 | x = file2num[line['x']]
36 | y = file2num[line['y']]
37 | Matrix[x][y] = line[measure]
38 | Matrix[y][x] = line[measure]
39 |
40 | for index in range(0, lines):
41 | Matrix[index][index] = 8675309
42 | return Matrix
43 |
44 |
45 | def getrank(car, s, maxval=-1):
46 | for sidx, work in enumerate(s):
47 | # sval = work[0]
48 | scar = work[1]
49 | if scar == car:
50 | return sidx
51 | return maxval
52 |
53 |
54 | def preCMC(Matrix, num2file, downto=50):
55 | retval = defaultdict(int)
56 | start = time.time()
57 | size = len(Matrix[0])
58 |
59 | for oindex in range(size):
60 | if oindex % 1000 == 0:
61 | print('index:{0} time:{1}'.format(oindex, time.time() - start))
62 | start = time.time()
63 |
64 | car = num2file[oindex].split('_')[0]
65 |
66 | current = list()
67 |
68 | for idx, val in enumerate(Matrix[oindex]):
69 | current.append((float(val), num2file[idx].split('_')[0]))
70 |
71 | s = sorted(current, key=lambda tup: tup[0])[:downto]
72 | maxSearch = downto + 1
73 | r = getrank(car, s, maxval=maxSearch)
74 | retval[r] += 1
75 | return retval
76 |
77 |
78 | def computeCMC(rawCounts, num):
79 | idx = sorted(rawCounts)
80 | sum = 0
81 | CMC = list()
82 | for index in range(0, len(idx)):
83 | sum += rawCounts[index]
84 | print (index, sum)
85 | CMC.append(sum / float(num))
86 | return CMC
87 |
88 |
89 | testFilesName = '/local_data/dgrossman/VeRi/test_uniqfiles'
90 | matrixFilename = '/local_data/dgrossman/VeRi/matrixFile.test_uniqfile'
91 | file2num, num2file = makeTransDicts(testFilesName)
92 | Matrix = makeMatrix(matrixFilename, num2file, file2num)
93 | rawCounts = preCMC(Matrix, num2file)
94 | CMC = computeCMC(rawCounts, len(Matrix[0]))
95 |
96 | # pyplot.ylim(0,1)
97 | pyplot.plot(CMC[:-1])
98 | pyplot.show()
99 |
--------------------------------------------------------------------------------
/pelops/etl/json2h5.py:
--------------------------------------------------------------------------------
1 | '''transform the json files into h5py files
2 |
3 | Input:
4 | one json encoded dict / line
5 | dict should have the following keys:
6 | colorID - colorID of the vehicle
7 | vehicleID - vehicle ID
8 | resnet50 - feature vector of the vehicle
9 | imageName - name of the file in storage
10 | typeID - ??
11 | cameraID - which camera took the image
12 |
13 | Output:
14 | h5py file with the following datasets
15 | colorID - int colorID of the vehicle
16 | vehicleID - int vehicle ID
17 | resnet50 - [float] feature vector of the vehicle
18 | imageName - str name of the file in storage
19 | typeID - int ??
20 | cameraID - which camera took the image
21 |
22 | Usage:
23 | json2h5.py [-hv]
24 | json2h5.py -i -o
25 |
26 | Arguments:
27 | INFILE - json infile name
28 | OUTFILE - h5py outfile name
29 |
30 | Options:
31 | -h, --help :show this message
32 | -v, --version :Version of the program
33 | -i, --input= :input file for the program
34 | -o, --output= :output file for the program
35 |
36 | '''
37 | import docopt
38 | import h5py
39 | import json
40 | import numpy as np
41 | import sys
42 |
43 |
44 | def makeJsonList(fileName):
45 | retval = list()
46 | with open(fileName, 'r') as f:
47 | for line in f:
48 | line = line.strip()
49 | line = json.loads(line)
50 | retval.append(line)
51 | return retval
52 |
53 |
54 | def extractColumn(colName, jsonList, t):
55 | retval = list()
56 | for line in jsonList:
57 | if t == str:
58 | retval.append(str(line[colName]).encode('ascii', 'ignore'))
59 | if t == int:
60 | retval.append(int(line[colName]))
61 | if t == float:
62 | vector = list()
63 | for element in line[colName]:
64 | vector.append(float(element))
65 | retval.append(vector)
66 | return retval
67 |
68 |
69 | def make5file(file5Name, names, jsonList):
70 | with h5py.File(file5Name, 'w') as f:
71 | for o, i, t, t2 in names:
72 | sys.stdout.write('converting column {0}'.format(o))
73 | temp = extractColumn(o, jsonList, t)
74 | sys.stdout.write('...Done\n')
75 | sys.stdout.write('making dataset {0}'.format(i))
76 | f.create_dataset(i, data=temp, dtype=t2)
77 | sys.stdout.write('...Done\n')
78 |
79 |
80 | def main(args):
81 | try:
82 | inFileName = args['--input']
83 | outFileName = args['--output']
84 | except docopt.DocoptExit as e:
85 | sys.exit('error: input invalid options: {0}'.format(e))
86 |
87 | f = np.dtype('float')
88 | c = h5py.special_dtype(vlen=bytes)
89 | names = [('colorID', 'colorID', int, int),
90 | ('vehicleID', 'vehicleID', int, int),
91 | ('resnet50', 'feats', float, f),
92 | ('imageName', 'ids', str, c),
93 | ('typeID', 'typeID', int, int),
94 | ('cameraID', 'cameraID', str, c)]
95 |
96 | sys.stdout.write('Reading {0}'.format(inFileName))
97 | jsonList = makeJsonList(inFileName)
98 | sys.stdout.write('...Done\n')
99 |
100 | make5file(outFileName, names, jsonList)
101 |
102 | if __name__ == '__main__':
103 | args = docopt.docopt(__doc__, version='json2h5.py 1.0')
104 | main(args)
105 |
--------------------------------------------------------------------------------
/pelops/etl/makeDistMatrix.py:
--------------------------------------------------------------------------------
1 | """ make the data for comparing entity id
2 |
3 | Input:
4 | processsed json file containing the list of images to compare
5 | the file must contain the 'resnet50' feature vector
6 |
7 | Output:
8 | all pairs comparison between images using resnet50
9 | output file lines will have the form of json dict
10 | dict will contain the following keys
11 | x - image name of the first image in comparison
12 | y - image name of the second image in comparison
13 | cosine - cosine distance bewteen the images
14 | euclidean - euclidian distance between the images
15 |
16 | Usage:
17 | makeDistMatrix [-hv]
18 | makeDistMatrix -i [-w ]
19 |
20 | Arguments:
21 | INPUT_FILE :file of the json description of the VeRi files
22 | WORKERS :number of threads in the pool
23 |
24 | Options:
25 | -i, --inputFile= :file location of the input
26 | -w,--workers= :num of workers in threadpool [default: 10]
27 | """
28 | import docopt
29 | import json
30 | import sys
31 | from multiprocessing import Pool
32 | import scipy.spatial.distance
33 | import itertools
34 | import numpy as np
35 | import time
36 |
37 |
38 | # read the list of things to compare
39 | def makeWork(vectorFileName):
40 | vfile = open(vectorFileName, 'r')
41 | retval = list()
42 | for line in vfile:
43 | line = line.strip()
44 | line = json.loads(line)
45 | retval.append(line)
46 | vfile.close()
47 | return retval
48 |
49 |
50 | # help by chopping work into chunks
51 | def grouper(n, iterable):
52 | it = iter(iterable)
53 | while True:
54 | chunk = tuple(itertools.islice(it, n))
55 | if not chunk:
56 | return
57 | yield chunk
58 |
59 |
60 | # my distance measures
61 | def my_dist(workList):
62 | retval = list()
63 |
64 | for pair in workList:
65 | x = pair[0]
66 | y = pair[1]
67 | fx = np.asarray(x['resnet50'])
68 | fy = np.asarray(y['resnet50'])
69 | workItem = dict()
70 | dc = str(float(scipy.spatial.distance.cosine(fx, fy)))
71 | de = str(float(scipy.spatial.distance.euclidean(fx, fy)))
72 | workItem['x'] = x['imageName']
73 | workItem['y'] = y['imageName']
74 | workItem['cosine'] = dc
75 | workItem['euclidean'] = de
76 | retval.append(workItem)
77 |
78 | return (retval)
79 |
80 |
81 | # takes in a json file with vectors and creates all the pairwise
82 | # distance calculations, saves output to file
83 | def main(args, atOnceOuter=100000, atOnceInner=10000):
84 | try:
85 | pworkers = args['--workers']
86 | inFileName = args['--inputFile']
87 | except docopt.DocoptExit as e:
88 | sys.exit('ERROR: input invalid options {0}'.format(e))
89 |
90 | inFileName = sys.argv[1]
91 | work = makeWork(inFileName)
92 | p = Pool(pworkers)
93 |
94 | outFileName = 'matrixFile.{0}'.format(inFileName)
95 | matrixFile = open(outFileName, 'w')
96 |
97 | total = 0
98 | for batch in grouper(atOnceOuter, itertools.combinations(work, 2)):
99 | start = time.time()
100 | batched = list()
101 |
102 | for workbatch in grouper(atOnceInner, batch):
103 | batched.append(workbatch)
104 |
105 | retval = p.map(my_dist, batched)
106 | end = time.time()
107 | start2 = time.time()
108 | for listLine in retval:
109 | for line in listLine:
110 | total = total + 1
111 | matrixFile.write(json.dumps(line)+'\n')
112 | end2 = time.time()
113 |
114 | fstr = 'proc elapsed:{0} sec proc:{1} total{2}'
115 | print(fstr.format(end-start, atOnceOuter, total))
116 | print('IO elapsed:{0}\n'.format(end2-start2))
117 | matrixFile.close()
118 |
119 | if __name__ == '__main__':
120 | args = docopt.docopt(__doc__,version='makeDistMatrix 1.0')
121 | main()
122 |
--------------------------------------------------------------------------------
/pelops/etl/makeFeaturesResNet50.py:
--------------------------------------------------------------------------------
1 | """ Generate resnet50 features
2 |
3 | Input:
4 | infile shold be a list of json lines one json/line
5 |
6 | Output:
7 | appending of resnet50 features to each json line
8 |
9 | Usage:
10 | makeFeaturesResNet50 [-hv]
11 | makeFeaturesResNet50 -i -p
12 |
13 | Arguments:
14 | INPUT_FILENAME : location of the file to enrich with resnet features
15 | IMAGE_DIR : full path to where the images live
16 |
17 | Options:
18 | -h, --help : Show this help message.
19 | -v, --version : Show the version number.
20 | -i, --inFile : input file to enrich with reset fetures
21 | -p, --path : Path to the directory holding the images
22 |
23 |
24 | """
25 |
26 | import docopt
27 | import numpy as np
28 | from keras.applications.resnet50 import ResNet50
29 | from keras.preprocessing import image
30 | from keras.applications.imagenet_utils import preprocess_input
31 | from keras.models import Model
32 | import os
33 | import time
34 | import json
35 | import sys
36 |
37 |
38 | # return an image from a file, default resize to 224,224
39 | def load_image(img_path, resizex=224, resizey=224):
40 | data = image.load_img(img_path, target_size=(resizex, resizey))
41 | x = image.img_to_array(data)
42 | x = np.expand_dims(x, axis=0)
43 | x = preprocess_input(x)
44 | return x
45 |
46 |
47 | # load the imagenet networks
48 | def get_models():
49 | # include_top needs to be True for this to work
50 | base_model = ResNet50(weights='imagenet', include_top=True)
51 | model = Model(input=base_model.input,
52 | output=base_model.get_layer('flatten_1').output)
53 | return (model, base_model)
54 |
55 |
56 | # return feature vector for a given img, and model
57 | def image_features(img, model):
58 | features = model.predict(img)
59 | return features
60 |
61 |
62 | # read the files to process
63 | def getList(name):
64 | retval = list()
65 | f = open('name', 'r')
66 | for line in f:
67 | line = line.strip()
68 | line = json.loads(line)
69 | retval.append(line)
70 | f.close()
71 | return retval
72 |
73 |
74 | # perform the file by file processing
75 | def process(trainingList, prefix, model, outFilename, batchSize=1000):
76 | outFile = open(outFilename, 'w')
77 | start = time.time()
78 | for idx, line in enumerate(trainingList):
79 | tempd = dict()
80 | if idx % batchSize == 0:
81 | end = time.time() - start
82 | start = time.time()
83 | fstring = 'total {0} batch {1} images in {2} seconds'
84 | print (fstring.format(idx, batchSize, end))
85 | path = os.path.join(prefix, line['imageName'])
86 | img = load_image(path)
87 | feature = image_features(img, model)
88 | tempd['resnet50'] = feature.tolist()[0]
89 | tempd.update(line)
90 | outFile.write(json.dumps(tempd)+'\n')
91 | outFile.close()
92 |
93 |
94 | # read json file append feature vector to each line dict
95 | def main(args):
96 | try:
97 | lineFileName = args['--inFile']
98 | prefix = args['--path']
99 |
100 | except docopt.DocoptExit as e:
101 | sys.exit('Error: input invalid options {0}'.format(e))
102 |
103 | outFilename = '{0}.resnet50.json'.format(lineFileName)
104 | model, base_model = get_models()
105 |
106 | print('loading...')
107 | trainingList = getList(lineFileName)
108 |
109 | print('processing...')
110 | process(trainingList, prefix, model, outFilename)
111 |
112 | print('done.')
113 |
114 |
115 | if __name__ == '__main__':
116 | args = docopt.docopt(__doc__, version='1.0')
117 | main(args)
118 |
--------------------------------------------------------------------------------
/pelops/etl/veriFileList2Json.py:
--------------------------------------------------------------------------------
1 | """ turn the list of files into a list of json dicts about the files
2 |
3 | Input:
4 | Take the VeRi datset that contains the following information:
5 | * 49358 images (1679 query images, 11580 test images, 37779 train images)
6 | * 776 vehicles
7 | * 20 cameras
8 | * covering 1.0 km^2 area in 24 hours
9 |
10 | convert the name_* files into json files for processing
11 |
12 | Output:
13 | json file with the following attributes in a dict per line:
14 | imageName
15 | vehicleID
16 | cameraID
17 | colorID
18 | typeID
19 |
20 | Usage:
21 | veriFileList2Json [-hv]
22 | veriFileList2Json -i
23 |
24 | Arguments:
25 | INFILE_NAME :file path to the VeRI name_ file
26 |
27 | Options:
28 | -h, --help :Show this message
29 | -v, --version :Version of the prog
30 | -i, --inputFile :location of the VeRi name_ file to process
31 |
32 |
33 |
34 | """
35 | import docopt
36 | import json
37 | import sys
38 |
39 |
40 | # turn the list of files into json for working with
41 | def main(args):
42 | try:
43 | inFileName = args['--inputFile']
44 | except docopt.DocoptExit as e:
45 | sys.exit('error: input invalid options: {0}'.format(e))
46 |
47 | outFileName = '{0}.json'.format(inFileName)
48 |
49 | inFile = open(inFileName, 'r')
50 | outFile = open(outFileName, 'w')
51 |
52 | for line in inFile:
53 | d = dict()
54 | line = line.strip()
55 | attrs = line.split('_')
56 | d['imageName'] = line
57 | d['vehicleID'] = attrs[0]
58 | d['cameraID'] = attrs[1]
59 | d['colorID'] = str(-1)
60 | d['typeID'] = str(-1)
61 | outFile.write(json.dumps(d)+'\n')
62 | inFile.close()
63 |
64 |
65 | if __name__ == '__main__':
66 | args = docopt.docopt(__doc__,version='veriFileList2Json 1.0')
67 | main(args)
68 |
--------------------------------------------------------------------------------
/pelops/experiment_api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/experiment_api/__init__.py
--------------------------------------------------------------------------------
/pelops/experiment_api/run_metric.sh:
--------------------------------------------------------------------------------
1 | python3 metric.py -c -w VeriDataset -y 2 -r 5 "/path/to/veri/dataset" "/path/to/image/feature/json/file"
2 |
--------------------------------------------------------------------------------
/pelops/features/feature_producer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 |
4 | from pelops.datasets.chipper import Chipper
5 | from pelops.datasets.featuredataset import FeatureDataset
6 |
7 |
8 | class FeatureProducer(object):
9 | def __init__(self, chip_producer):
10 | self.chip_producer = chip_producer
11 | self.set_variables()
12 |
13 | def return_features(self):
14 | if isinstance(self.chip_producer, Chipper):
15 | chips = []
16 | chip_keys = []
17 | for chip_list in self.chip_producer:
18 | chips.extend(chip_list)
19 | for i, chip in enumerate(chip_list):
20 | chip_keys.append('{}_{}'.format(chip.frame_number, i))
21 |
22 | else:
23 | chips = []
24 | chip_keys = []
25 | for chip_key, chip in self.chip_producer.chips.items():
26 | chips.append(chip)
27 | chip_keys.append(chip_key)
28 |
29 | feats = np.zeros((len(chips), self.feat_size), dtype=np.float32)
30 | for i, chip in enumerate(chips):
31 | feats[i] = self.produce_features(chip)
32 | return chip_keys, chips, feats
33 |
34 | @staticmethod
35 | def get_image(chip):
36 | if hasattr(chip, 'img_data'):
37 | img = Image.fromarray(chip.img_data)
38 | return img.convert('RGB')
39 | else:
40 | return Image.open(chip.filepath)
41 |
42 | def produce_features(self, chip):
43 | """Takes a chip object and returns a feature vector of size
44 | self.feat_size. """
45 | raise NotImplementedError("produce_features() not implemented")
46 |
47 | def save_features(self, output_filename):
48 | """
49 | Calculate features and save as a "FeatureDataset"
50 | Args:
51 | filename:
52 |
53 | Returns:
54 |
55 | """
56 | # TODO: See if this function should save the features in memory
57 | if isinstance(self.chip_producer, Chipper):
58 | raise NotImplementedError("Only ChipDatasets are supported at this time")
59 | chip_keys, chips, features = self.return_features()
60 | FeatureDataset.save(output_filename, chip_keys, chips, features)
61 |
62 | def set_variables(self):
63 | """Child classes should use this to set self.feat_size, and any other
64 | needed variables. """
65 | self.feat_size = None # Set this in your inherited class
66 | raise NotImplementedError("set_variables() is not implemented")
67 |
--------------------------------------------------------------------------------
/pelops/features/hog.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 | from skimage import color
4 | from skimage.feature import hog
5 |
6 | from pelops.features.feature_producer import FeatureProducer
7 |
8 |
9 | class HOGFeatureProducer(FeatureProducer):
10 |
11 | def __init__(self, chip_producer, image_size=(224,224), cells=(16, 16), orientations=8, histogram_bins_per_channel=256):
12 | self.image_size = image_size
13 | self.cells = cells
14 | self.orientations = orientations
15 | self.histogram_bins_per_channel = histogram_bins_per_channel
16 | super().__init__(chip_producer)
17 |
18 | def produce_features(self, chip):
19 | """Takes a chip object and returns a feature vector of size
20 | self.feat_size. """
21 | img = self.get_image(chip)
22 | img = img.resize(self.image_size, Image.BICUBIC)
23 | img_x, img_y = img.size
24 |
25 | # Calculate histogram of each channel
26 | channels = img.split()
27 | hist_features = np.full(shape=3 * self.histogram_bins_per_channel, fill_value=-1)
28 |
29 | # We expect RGB images. If something else is passed warn the user and
30 | # continue.
31 | if len(channels) < 3:
32 | print("Non-RBG image! Vector will be padded with -1!")
33 | if len(channels) > 3:
34 | print("Non-RBG image! Channels beyond the first three will be ignored!")
35 | channels = channel[:3]
36 |
37 | for i, channel in enumerate(channels):
38 | channel_array = np.array(channel)
39 | values, _ = np.histogram(channel_array.flat, bins=self.histogram_bins_per_channel)
40 | start = i * self.histogram_bins_per_channel
41 | end = (i+1) * self.histogram_bins_per_channel
42 | hist_features[start:end] = values
43 |
44 | # Calculate HOG features, which require a grayscale image
45 | img = color.rgb2gray(np.array(img))
46 | features = hog(
47 | img,
48 | orientations=self.orientations,
49 | pixels_per_cell=(img_x / self.cells[0], img_y / self.cells[1]),
50 | cells_per_block=self.cells, # Normalize over the whole image
51 | )
52 |
53 | return np.concatenate((features, hist_features))
54 |
55 | def set_variables(self):
56 | hog_size = self.cells[0] * self.cells[1] * self.orientations
57 | hist_size = 3 * self.histogram_bins_per_channel
58 | self.feat_size = hog_size + hist_size
59 |
--------------------------------------------------------------------------------
/pelops/features/keras_model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from keras.preprocessing import image
3 | from keras.applications.resnet50 import preprocess_input
4 | from keras.models import load_model, Model, model_from_json
5 |
6 | from PIL import Image as PIL_Image
7 | from pelops.features.feature_producer import FeatureProducer
8 |
9 |
10 | class KerasModelFeatureProducer(FeatureProducer):
11 | def __init__(self, chip_producer, model_filename, layer_name, weight_filename=None):
12 | global resnet_model
13 | super().__init__(chip_producer)
14 |
15 | if weight_filename is None:
16 | self.original_model = load_model(model_filename)
17 | else:
18 | self.original_model = self.load_model_workaround(model_filename,weight_filename)
19 |
20 | self.keras_model = Model(input=self.original_model.input,
21 | output=self.original_model.get_layer(layer_name).output)
22 |
23 | @staticmethod
24 | def load_model_workaround(model_filename,weight_filename):
25 | # load json and create model
26 | json_file = open(model_filename, 'r')
27 | loaded_model_json = json_file.read()
28 | json_file.close()
29 |
30 | loaded_model = model_from_json(loaded_model_json)
31 |
32 | # load weights into new model
33 | loaded_model.load_weights(weight_filename)
34 | return loaded_model
35 |
36 | @staticmethod
37 | def preprocess_image(img, x_dim=224, y_dim=224):
38 | img = img.resize((x_dim,y_dim), PIL_Image.BICUBIC)
39 | x = image.img_to_array(img)
40 | x = np.expand_dims(x, axis=0)
41 | x = preprocess_input(x)
42 | return x
43 |
44 | def produce_features(self, chip):
45 | pil_image = self.get_image(chip)
46 | preprocessed_image = self.preprocess_image(pil_image)
47 | image_features = self.keras_model.predict(preprocessed_image)
48 | return image_features
49 |
50 | def set_variables(self):
51 | self.feat_size = 2048
52 |
--------------------------------------------------------------------------------
/pelops/features/resnet50.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from keras.applications.resnet50 import ResNet50
3 | from keras.preprocessing import image
4 | from keras.applications.resnet50 import preprocess_input
5 | from keras.models import Model
6 |
7 | from PIL import Image as PIL_Image
8 | from pelops.features.feature_producer import FeatureProducer
9 |
10 | # Use global so we only load the resnet model once
11 | # TODO: find a better way to do this
12 | resnet_model = None
13 |
14 |
15 | class ResNet50FeatureProducer(FeatureProducer):
16 | def __init__(self, chip_producer):
17 | global resnet_model
18 | super().__init__(chip_producer)
19 |
20 | if resnet_model is None:
21 | # include_top needs to be True for this to work
22 | base_model = ResNet50(weights='imagenet', include_top=True)
23 | resnet_model = Model(input=base_model.input,
24 | output=base_model.get_layer('flatten_1').output)
25 |
26 | self.resnet_model = resnet_model
27 |
28 | @staticmethod
29 | def preprocess_image(img, x_dim=224, y_dim=224):
30 | if img.size != (x_dim, y_dim):
31 | img = img.resize((x_dim,y_dim), PIL_Image.BICUBIC)
32 | x = image.img_to_array(img)
33 | x = np.expand_dims(x, axis=0)
34 | x = preprocess_input(x)
35 | return x
36 |
37 | def produce_features(self, chip):
38 | pil_image = self.get_image(chip)
39 | preprocessed_image = self.preprocess_image(pil_image)
40 | image_features = self.resnet_model.predict(preprocessed_image)
41 | return image_features
42 |
43 | def set_variables(self):
44 | self.feat_size = 2048
45 |
--------------------------------------------------------------------------------
/pelops/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/models/__init__.py
--------------------------------------------------------------------------------
/pelops/models/makesvm.py:
--------------------------------------------------------------------------------
1 | """ work with SVM and chips """
2 | import time
3 |
4 | import sklearn
5 | from scipy.stats import uniform as sp_rand
6 | from sklearn import svm
7 | from sklearn.externals import joblib
8 | from sklearn.model_selection import RandomizedSearchCV
9 | from tqdm import tnrange
10 |
11 | from pelops.analysis.camerautil import get_match_id, make_good_bad
12 | from pelops.analysis.comparecameras import make_work
13 |
14 |
15 | def train_svm(examples, fd_train, eg_train):
16 | """
17 | train a support vector machine
18 |
19 | examples(int): number of examples to generate
20 | fd_train(featureDataset): where to join features to chips
21 | eg_train(experimentGenerator): makes experiments
22 |
23 | clf(SVM): scm classifier trainined on the input examples
24 | """
25 | lessons_train = list()
26 | outcomes_train = list()
27 | for _ in tnrange(examples):
28 | cameras_train = eg_train.generate()
29 | match_id = get_match_id(cameras_train)
30 | goods, bads = make_good_bad(cameras_train, match_id)
31 | make_work(fd_train, lessons_train, outcomes_train, goods, 1)
32 | make_work(fd_train, lessons_train, outcomes_train, bads, 0)
33 |
34 | clf = svm.SVC()
35 |
36 | print('fitting')
37 | start = time.time()
38 | clf.fit(lessons_train, outcomes_train)
39 | end = time.time()
40 | print('fitting took {} seconds'.format(end - start))
41 | return clf
42 |
43 |
44 | def search(examples, fd_train, eg_train, iterations):
45 | """
46 | beginnnings of hyperparameter search for svm
47 | """
48 | param_grid = {'C': sp_rand()}
49 | lessons_train = list()
50 | outcomes_train = list()
51 | for _ in tnrange(examples):
52 | cameras_train = eg_train.generate()
53 | match_id = get_match_id(cameras_train)
54 | goods, bads = make_good_bad(cameras_train, match_id)
55 | make_work(fd_train, lessons_train, outcomes_train, goods, 1)
56 | make_work(fd_train, lessons_train, outcomes_train, bads, 0)
57 | clf = svm.SVC()
58 | print('searching')
59 | start = time.time()
60 | rsearch = RandomizedSearchCV(
61 | estimator=clf, param_distributions=param_grid, n_iter=iterations)
62 | rsearch.fit(lessons_train, outcomes_train)
63 | end = time.time()
64 | print('searching took {} seconds'.format(end - start))
65 | print(rsearch.best_score_)
66 | print(rsearch.best_estimator_.C)
67 |
68 |
69 | def save_model(model, filename):
70 | """
71 | save a model to disk
72 |
73 | model(somemodel): trained model to save
74 | filename(str): location to safe the model
75 | """
76 | joblib.dump(model, filename)
77 |
78 |
79 | def load_model(filename):
80 | """
81 | load a model from disk. make sure that models only
82 | show up from version 0.18.1 of sklearn as other versions
83 | may not load correctly
84 |
85 | filename(str): name of file to load
86 | """
87 | if sklearn.__version__ == '0.18.1':
88 | model = joblib.load(filename)
89 | return model
90 | else:
91 | print('upgrade sklearn to version 0.18.1')
92 |
93 |
94 | def test_svm(examples, clf_train, fd_test, eg_test):
95 | """
96 | score the trained SVM against test features
97 |
98 | examples(int): number of examples to run
99 | clf_train(modle): model for evaluating testing data
100 | fd_test(featureDataset): testing dataset
101 | eg_test(experimentGenerator): generated experiments from testing dataset
102 |
103 | out(int): score from the model
104 | """
105 | lessons_test = list()
106 | outcomes_test = list()
107 |
108 | for _ in tnrange(examples):
109 | cameras_test = eg_test.generate()
110 | match_id = get_match_id(cameras_test)
111 | goods, bads = make_good_bad(cameras_test, match_id)
112 | make_work(fd_test, lessons_test, outcomes_test, goods, 1)
113 | make_work(fd_test, lessons_test, outcomes_test, bads, 0)
114 |
115 | print('scoring')
116 | start = time.time()
117 | out = clf_train.score(lessons_test, outcomes_test)
118 | end = time.time()
119 | print('scoring took {} seconds'.format(end - start))
120 | return out
121 |
--------------------------------------------------------------------------------
/pelops/transform_img/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/pelops/transform_img/__init__.py
--------------------------------------------------------------------------------
/pelops/transform_img/run.sh:
--------------------------------------------------------------------------------
1 | python3 transform.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | matplotlib
4 | logger
5 | python-resize-image
6 | h5py
7 | imageio
8 | scikit-image
9 | keras
10 | tensorflow
11 | tqdm
12 | pytest
13 | hdfs3
14 | opencv-python
15 | docopt
16 | scikit-learn
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from setuptools import find_packages
3 | from setuptools import setup
4 |
5 | setup(
6 | name="Pelops",
7 | version="0.1.1",
8 | description="Car re-identification via deep learning",
9 | url="https://www.python.org/sigs/distutils-sig/",
10 | author="Lab41",
11 | author_email="lab41@iqt.org",
12 | license="Apache Software License",
13 | packages=find_packages(),
14 | classifiers=[
15 | "Development Status :: 3 - Alpha",
16 | "Environment :: Console",
17 | "Intended Audience :: Science/Research",
18 | "License :: OSI Approved :: Apache Software License",
19 | "Natural Language :: English",
20 | "Operating System :: OS Independent",
21 | "Programming Language :: Python :: 3",
22 | "Programming Language :: Python :: 3.5",
23 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
24 | ],
25 | keywords=[
26 | "computer vision",
27 | "deep learning",
28 | "resnet",
29 | "vehicle re-identification",
30 | ],
31 | )
32 |
--------------------------------------------------------------------------------
/testci/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is meant to be called by the "install" step defined in
3 | # .travis.yml. See http://docs.travis-ci.com/ for more details.
4 | # The behavior of the script is controlled by environment variabled defined
5 | # in the .travis.yml in the top level folder of the project.
6 | #
7 | # This script is adapted from a similar script from the scikit-learn repository.
8 | # Taken from: https://github.com/aabadie/nilearn/blob/circle_ci_anaconda/continuous_integration/install.sh
9 | #
10 | # License: 3-clause BSD
11 |
12 | set -e
13 |
14 | # Fix the compilers to workaround avoid having the Python 3.4 build
15 | # lookup for g++44 unexpectedly.
16 | export CC=gcc
17 | export CXX=g++
18 |
19 | create_new_venv() {
20 | # At the time of writing numpy 1.9.1 is included in the travis
21 | # virtualenv but we want to be in control of the numpy version
22 | # we are using for example through apt-get install
23 | deactivate
24 | virtualenv --system-site-packages testvenv
25 | source testvenv/bin/activate
26 | pip install nose
27 | }
28 |
29 | print_conda_requirements() {
30 | # Echo a conda requirement string for example
31 | # "pip nose python='2.7.3 scikit-learn=*". It has a hardcoded
32 | # list of possible packages to install and looks at _VERSION
33 | # environment variables to know whether to install a given package and
34 | # if yes which version to install. For example:
35 | # - for numpy, NUMPY_VERSION is used
36 | # - for scikit-learn, SCIKIT_LEARN_VERSION is used
37 | TO_INSTALL_ALWAYS="pip nose"
38 | REQUIREMENTS="$TO_INSTALL_ALWAYS"
39 | TO_INSTALL_MAYBE="python numpy scipy matplotlib scikit-learn flake8"
40 | for PACKAGE in $TO_INSTALL_MAYBE; do
41 | # Capitalize package name and add _VERSION
42 | PACKAGE_VERSION_VARNAME="${PACKAGE^^}_VERSION"
43 | # replace - by _, needed for scikit-learn for example
44 | PACKAGE_VERSION_VARNAME="${PACKAGE_VERSION_VARNAME//-/_}"
45 | # dereference $PACKAGE_VERSION_VARNAME to figure out the
46 | # version to install
47 | PACKAGE_VERSION="${!PACKAGE_VERSION_VARNAME}"
48 | if [ -n "$PACKAGE_VERSION" ]; then
49 | REQUIREMENTS="$REQUIREMENTS $PACKAGE=$PACKAGE_VERSION"
50 | fi
51 | done
52 | echo $REQUIREMENTS
53 | }
54 |
55 | create_new_conda_env() {
56 | # Skip Travis related code on circle ci.
57 | if [ -z $CIRCLECI ]; then
58 | # Deactivate the travis-provided virtual environment and setup a
59 | # conda-based environment instead
60 | deactivate
61 | fi
62 |
63 | # Use the miniconda installer for faster download / install of conda
64 | # itself
65 | wget https://repo.continuum.io/miniconda/Miniconda3-4.2.11-Linux-x86_64.sh \
66 | -O ~/miniconda.sh
67 | chmod +x ~/miniconda.sh && ~/miniconda.sh -b
68 | export PATH=$HOME/miniconda2/bin:$PATH
69 | echo $PATH
70 | conda update --quiet --yes conda
71 |
72 | # Configure the conda environment and put it in the path using the
73 | # provided versions
74 | REQUIREMENTS=$(print_conda_requirements)
75 | echo "conda requirements string: $REQUIREMENTS"
76 | conda create -n testenv --quiet --yes $REQUIREMENTS
77 | source activate testenv
78 |
79 | if [[ "$INSTALL_MKL" == "true" ]]; then
80 | # Make sure that MKL is used
81 | conda install --quiet --yes mkl
82 | elif [[ -z $CIRCLECI ]]; then
83 | # Travis doesn't use MKL but circle ci does for speeding up examples
84 | # generation in the html documentation.
85 | # Make sure that MKL is not used
86 | conda remove --yes --features mkl || echo "MKL not installed"
87 | fi
88 | }
89 |
90 | if [[ "$DISTRIB" == "conda" ]]; then
91 | create_new_conda_env
92 | pip install nose-timer
93 | # Note: nibabel is in setup.py install_requires so nibabel will
94 | # always be installed eventually. Defining NIBABEL_VERSION is only
95 | # useful if you happen to want a specific nibabel version rather
96 | # than the latest available one.
97 | if [ -n "$NIBABEL_VERSION" ]; then
98 | pip install nibabel=="$NIBABEL_VERSION"
99 | fi
100 |
101 | else
102 | echo "Unrecognized distribution ($DISTRIB); cannot setup CI environment."
103 | exit 1
104 | fi
105 |
106 | pip install psutil memory_profiler
107 |
108 | if [[ "$COVERAGE" == "true" ]]; then
109 | pip install coverage coveralls
110 | fi
111 |
--------------------------------------------------------------------------------
/testci/small.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lab41/pelops/292af80dba190f9506519c8e13432fef648a2291/testci/small.hdf5
--------------------------------------------------------------------------------
/testci/small.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Sequential", "keras_version": "1.2.0", "config": [{"class_name": "Dense", "config": {"bias": true, "trainable": true, "W_regularizer": null, "input_dtype": "float32", "output_dim": 12, "name": "dense_8", "activation": "relu", "batch_input_shape": [null, 8], "init": "uniform", "activity_regularizer": null, "input_dim": 8, "b_constraint": null, "b_regularizer": null, "W_constraint": null}}]}
--------------------------------------------------------------------------------
/testci/test_chip.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from pelops.datasets.chip import ChipDataset, Chip
4 |
5 |
6 | @pytest.fixture
7 | def chips():
8 | CHIPS = (
9 | # filepath, car_id, cam_id, time, misc
10 | ("car1_cam1.png", 1, 1, 100, None),
11 | ("car1_cam2.png", 1, 2, 105, None),
12 | ("car1_cam3.png", 1, 3, 110, None),
13 | ("car2_cam1.png", 2, 1, 100, None),
14 | ("car2_cam2.png", 2, 1, 102, None),
15 | ("car2_cam3.png", 2, 1, 104, None),
16 | )
17 |
18 | chips = {}
19 | for filepath, car_id, cam_id, time, misc in CHIPS:
20 | chip = Chip(filepath, car_id, cam_id, time, misc)
21 | chips[filepath] = chip
22 |
23 | return chips
24 |
25 |
26 | @pytest.fixture
27 | def chip_dataset(chips):
28 | """ Set up a instance of ChipDataset(). """
29 | # Setup the class
30 | instantiated_class = ChipDataset(dataset_path="Test")
31 |
32 | # Monkey Patch in a fake chips dictionary
33 | instantiated_class.chips = chips
34 |
35 | return instantiated_class
36 |
37 |
38 | def test_chips_len(chip_dataset, chips):
39 | """ Test that ChipDataset.chips is the correct length """
40 | assert len(chips) == len(chip_dataset)
41 |
42 |
43 | def get_all_function_tester(in_chips, in_chipbase, index, test_function):
44 | """ Check that a chip getting function gets all the correct chips.
45 |
46 | This function tests a chip getting function, such as
47 | `get_all_chips_by_carid()` by creating a list of every correct chip from
48 | the true list of chips, and comparing it to the list returned by the
49 | function.
50 |
51 | Args:
52 | in_chips: The output of chips()
53 | in_chipbase: The output of chipbase()
54 | index: The location of the id in the chips object to use to compare.
55 | 0 is the filepath (aka chip_id), 1 is the car_id, 2 is the cam_id.
56 | test_function: The function to test, it should return a list of chips
57 | selected by some id value.
58 |
59 | Returns:
60 | None
61 | """
62 | seen_ids = []
63 | for tup in in_chips.values():
64 | test_id = tup[index]
65 | # Generate all the chips by hand, and compare
66 | if test_id in seen_ids:
67 | continue
68 | seen_ids.append(test_id)
69 | chips_list = []
70 | for _, val in in_chipbase.chips.items():
71 | if val[index] == test_id:
72 | chips_list.append(val)
73 |
74 | chips_list.sort()
75 | test_chips = sorted(test_function(test_id))
76 | assert chips_list == test_chips
77 |
78 |
79 | def test_get_all_chips_by_car_id(chip_dataset, chips):
80 | """ Test ChipDataset.get_all_chips_by_carid() """
81 | CAR_ID_INDEX = 1
82 | get_all_function_tester(chips, chip_dataset, CAR_ID_INDEX,
83 | chip_dataset.get_all_chips_by_car_id)
84 |
85 |
86 | def test_get_all_chips_by_cam_id(chip_dataset, chips):
87 | """ Test ChipDataset.get_all_chips_by_camid() """
88 | CAM_ID_INDEX = 2
89 | get_all_function_tester(chips, chip_dataset, CAM_ID_INDEX,
90 | chip_dataset.get_all_chips_by_cam_id)
91 |
92 |
93 | def test_get_distinct_cams_by_car_id(chip_dataset):
94 | """ Test ChipDataset.get_distinct_cams_by_car_id() and get_distinct_cams_per_car() """
95 | CAR_ID = 1
96 | TEST_CAMS = [1, 2, 3]
97 | for test_cam, cam in zip(TEST_CAMS, sorted(chip_dataset.get_distinct_cams_by_car_id(CAR_ID))):
98 | assert test_cam == cam
99 |
100 |
101 | def test_get_all_cam_ids(chip_dataset):
102 | """ Test ChipDataset.get_all_cam_ids() """
103 | TEST_CAMS = [1, 2, 3]
104 | for test_cam, cam in zip(TEST_CAMS, sorted(chip_dataset.get_all_cam_ids())):
105 | assert test_cam == cam
106 |
107 |
108 | def test_get_all_car_ids(chip_dataset):
109 | TEST_CARS = [1, 2]
110 | for test_car, car in zip (TEST_CARS, sorted(chip_dataset.get_all_car_ids())):
111 | assert test_car == car
112 |
113 |
114 | def test_chipdataset_iter(chip_dataset, chips):
115 | """ Test iteration over ChipDataset() """
116 | for chip in chip_dataset:
117 | assert chip in chips.values()
118 |
--------------------------------------------------------------------------------
/testci/test_chipper.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import datetime as dt
4 |
5 | # OpenCV is *VERY* hard to install in CircleCI, so if we don't have it, skip these tests
6 | cv2 = pytest.importorskip("cv2") # Skip all tests if not found
7 | from pelops.datasets.chipper import FrameProducer
8 |
9 |
10 | @pytest.fixture
11 | def frame_time_fp(tmpdir):
12 | # Define a FrameProducer with just enough information to run __get_frame_time()
13 | ifp = FrameProducer(
14 | file_list = [],
15 | )
16 | ifp.vid_metadata = {"fps": 30}
17 |
18 | return ifp
19 |
20 |
21 | @pytest.fixture
22 | def frame_time_fp_data(tmpdir):
23 | # Data to test __get_frame_time()
24 | DATA = (
25 | # (filename, frame number), (answer)
26 | (("/foo/bar/baz_20000101T000000-00000-006000.mp4", 0), dt.datetime(2000, 1, 1)),
27 | (("/foo/bar/baz_20000101T000000-00600-012000.mp4", 0), dt.datetime(2000, 1, 1, 0, 10)),
28 | (("/foo/bar/baz_20000101T000000-00000-006000.mp4", 1), dt.datetime(2000, 1, 1, 0, 0, 0, 33333)),
29 | (("/foo/bar/baz_20000101T000000-00600-012000.mp4", 10), dt.datetime(2000, 1, 1, 0, 10, 0, 333333)),
30 | )
31 | return DATA
32 |
33 |
34 | def test_get_frame_time(frame_time_fp, frame_time_fp_data):
35 | for input, answer in frame_time_fp_data:
36 | output = frame_time_fp._FrameProducer__get_frame_time(input[0], input[1])
37 | assert output == answer
38 |
--------------------------------------------------------------------------------
/testci/test_dgcars.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import os.path
4 | import json
5 |
6 | import pelops.utils as utils
7 | from pelops.datasets.dgcars import DGCarsDataset
8 | from pelops.datasets.chip import Chip
9 | from pelops.utils import SetType
10 |
11 |
12 | @pytest.fixture
13 | def dgcars(tmpdir):
14 | # Define some test and training data, all will be the sum
15 | TRAIN = [
16 | {"url": "http://example.com/img.jpg", "hash": "2a8cedfa145b4345aed3fd9e82796c3e", "resnet50": "minivan", "model": "ZX2", "filename": "black/Ford/2a8cedfa145b4345aed3fd9e82796c3e.jpg", "make": "Ford", "color": "black"},
17 | {"url": "http://example.com/img.jpg", "hash": "8241daf452ace679162c69386f26ddc7", "resnet50": "sports_car", "model": "Mazda6 Sport", "filename": "red/Mazda/8241daf452ace679162c69386f26ddc7.jpg", "make": "Mazda", "color": "red"},
18 | {"url": "http://example.com/img.jpg", "hash": "e8dc3fb78206b14fe3568c1b28e5e5a1", "resnet50": "cab", "model": "XJ Series", "filename": "yellow/Jaguar/e8dc3fb78206b14fe3568c1b28e5e5a1.jpg", "make": "Jaguar", "color": "yellow"},
19 | ]
20 | TEST = [
21 | {"url": "http://example.com/img.jpg", "hash": "8881e7b561393f1d778a70dd449433e9", "resnet50": "racer", "model": "IS F", "filename": "yellow/Lexus/8881e7b561393f1d778a70dd449433e9.jpg", "make": "Lexus", "color": "yellow"},
22 | {"url": "http://example.com/img.jpg", "hash": "38e857d5235afda4315676c0b7756832", "resnet50": "pickup", "model": "Mark VII", "filename": "silver/Lincoln/38e857d5235afda4315676c0b7756832.jpg", "make": "Lincoln", "color": "silver"},
23 | {"url": "http://example.com/img.jpg", "hash": "6eb2b407cc398e70604bfd336bb2efad", "resnet50": "pickup", "model": "Lightning", "filename": "orange/Ford/6eb2b407cc398e70604bfd336bb2efad.jpg", "make": "Ford", "color": "orange"},
24 | {"url": "http://example.com/img.jpg", "hash": "eb3811772ec012545c8952d88906d355", "resnet50": "racer", "model": "Rockette", "filename": "green/Fairthorpe/eb3811772ec012545c8952d88906d355.jpg", "make": "Fairthorpe", "color": "green"},
25 | {"url": "http://example.com/img.jpg", "hash": "8dbbc1d930c7f2e4558efcc596728945", "resnet50": "minivan", "model": "S70", "filename": "white/Volvo/8dbbc1d930c7f2e4558efcc596728945.jpg", "make": "Volvo", "color": "white"},
26 | {"url": "http://example.com/img.jpg", "hash": "ed45784812d1281bcb61f217f4422ab5", "resnet50": "convertible", "model": "A8", "filename": "green/Audi/ed45784812d1281bcb61f217f4422ab5.jpg", "make": "Audi", "color": "green"},
27 | {"url": "http://example.com/img.jpg", "hash": "763ca4abbbb9b042b21f19fd80986179", "resnet50": "pickup", "model": "W126", "filename": "green/Mercedes-Benz/763ca4abbbb9b042b21f19fd80986179.jpg", "make": "Mercedes-Benz", "color": "green"},
28 | ]
29 |
30 | WRITE_LIST = (
31 | # filename, data list, settype
32 | ("allFiles", TRAIN + TEST, SetType.ALL),
33 | ("training", TRAIN, SetType.TRAIN),
34 | ("testing", TEST, SetType.TEST),
35 | )
36 |
37 | output_chips = {
38 | SetType.ALL: [],
39 | SetType.TRAIN: [],
40 | SetType.TEST: [],
41 | }
42 | for filename, data_list, settype in WRITE_LIST:
43 | fn = tmpdir.join(filename)
44 | with open(fn.strpath, "w") as f:
45 | for d in data_list:
46 | # Write the data list files
47 | line = json.dumps(d)
48 | f.write(line + "\n")
49 |
50 | # Make a chip
51 | fp = os.path.join(tmpdir.strpath, d["filename"])
52 | chip = Chip(fp, None, None, None, d)
53 | output_chips[settype].append(chip)
54 |
55 | # Instantiate a DGCarsDataset() class
56 | output_classes = {
57 | SetType.ALL: DGCarsDataset(tmpdir.strpath, SetType.ALL),
58 | SetType.TRAIN: DGCarsDataset(tmpdir.strpath, SetType.TRAIN),
59 | SetType.TEST: DGCarsDataset(tmpdir.strpath, SetType.TEST),
60 | }
61 |
62 | return (output_classes, output_chips)
63 |
64 |
65 | def test_dgcars_chips_len(dgcars):
66 | classes = dgcars[0]
67 | answer_chips = dgcars[1]
68 | # check that self.chips has been created, is not empty, and has the right
69 | # number of entries
70 | for key, cls in classes.items():
71 | ans = answer_chips[key]
72 | assert len(cls.chips) == len(ans)
73 |
74 | def test_dgcars_chips_vals(dgcars):
75 | classes = dgcars[0]
76 | answer_chips = dgcars[1]
77 |
78 | for key, cls in classes.items():
79 | ans = answer_chips[key]
80 | for chip in cls:
81 | # The chip must match one of our hand built chips
82 | assert chip in ans
83 | # Various values are None
84 | assert chip.car_id is None
85 | assert chip.cam_id is None
86 | assert chip.time is None
87 | # Misc and filepath should exist
88 | assert chip.filepath
89 | assert chip.misc
90 | # Misc is a dictionary like object
91 | assert hasattr(chip.misc, "get")
92 |
93 |
94 | def test_get_all_chips_by_car_id(dgcars):
95 | classes = dgcars[0]
96 | answer_chips = dgcars[1]
97 |
98 | for key, cls in classes.items():
99 | ans = answer_chips[key]
100 |
101 | # All car_id values are None in DG Cars
102 | all_chips = sorted(cls.get_all_chips_by_car_id(None))
103 | assert all_chips == sorted(ans)
104 |
105 |
106 | def test_get_all_chips_by_cam_id(dgcars):
107 | classes = dgcars[0]
108 | answer_chips = dgcars[1]
109 |
110 | for key, cls in classes.items():
111 | ans = answer_chips[key]
112 |
113 | # All cam_id values are None in DG Cars
114 | all_chips = sorted(cls.get_all_chips_by_cam_id(None))
115 | assert all_chips == sorted(ans)
116 |
117 |
118 | def test_get_distinct_cams_by_car_id(dgcars):
119 | classes = dgcars[0]
120 | answer_chips = dgcars[1]
121 |
122 | for key, cls in classes.items():
123 | ans = answer_chips[key]
124 |
125 | # All car_id values are None in DG Cars
126 | assert cls.get_distinct_cams_by_car_id(None) == {None}
127 |
128 |
129 | def test_get_all_cam_ids(dgcars):
130 | classes = dgcars[0]
131 | answer_chips = dgcars[1]
132 |
133 | for key, cls in classes.items():
134 | ans = answer_chips[key]
135 |
136 | # All cam_id values are None in DG Cars
137 | assert cls.get_all_cam_ids() == [None]
138 |
139 |
140 | def test_get_all_car_ids(dgcars):
141 | classes = dgcars[0]
142 | answer_chips = dgcars[1]
143 |
144 | for key, cls in classes.items():
145 | ans = answer_chips[key]
146 |
147 | # All car_id values are None in DG Cars
148 | assert cls.get_all_car_ids() == [None]
149 |
150 |
151 | def test_dgcars_iter(dgcars):
152 | classes = dgcars[0]
153 | answer_chips = dgcars[1]
154 |
155 | for key, cls in classes.items():
156 | ans = answer_chips[key]
157 |
158 | # Ensure that we can iterate and get all of the items
159 | for chip in cls:
160 | assert chip in ans
161 |
162 | # Ensure list can access the iterator, and that there are no extra
163 | # chips
164 | cls_chips = list(cls)
165 | for chip in ans:
166 | assert chip in cls_chips
167 |
--------------------------------------------------------------------------------
/testci/test_experiment_utils.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | import pelops.utils as utils
4 |
5 |
6 | def test_SetType():
7 | vals = utils.SetType.__members__
8 | assert 'ALL' in vals
9 | assert 'QUERY' in vals
10 | assert 'TEST' in vals
11 | assert 'TRAIN' in vals
12 |
13 |
14 | def test_get_index_of_tuple():
15 | TEST_LIST = [
16 | (0, 'Who', 'John'),
17 | (1, 'What', 'Pizza'),
18 | (2, 'Where', 'Little Caesar'),
19 | (3, 'When', 'Noon'),
20 | (4, 'How', 'Eat'),
21 | (5, None, None),
22 | ]
23 |
24 | # Test that we can find ints, strings, and Nones
25 | assert 1 == utils.get_index_of_tuple(TEST_LIST, 0, 0)
26 | assert 2 == utils.get_index_of_tuple(TEST_LIST, 1, 'What')
27 | assert 6 == utils.get_index_of_tuple(TEST_LIST, 1, None)
28 |
29 | # Test that we report the last position if we don't find an answer
30 | assert len(TEST_LIST) == utils.get_index_of_tuple(
31 | TEST_LIST, 0, 'NOT THERE')
32 |
33 | def test_get_index_of_pairs():
34 | TEST_LIST = [
35 | (0, 0, 'Mozart'),
36 | (1, 'Twinkle', 'Twinkle'),
37 | (2, 'Where', 'Little Caesar'),
38 | (3, 'When', 'Noon'),
39 | (4, 'How', 'Eat'),
40 | (5, None, None),
41 | ]
42 |
43 | # Test that we can find ints, strings, and Nones
44 | assert 1 == utils.get_index_of_pairs(TEST_LIST, 0, 1, 0)
45 | assert 2 == utils.get_index_of_pairs(TEST_LIST, 1, 2, 'Twinkle')
46 | assert 6 == utils.get_index_of_pairs(TEST_LIST, 1, 2, None)
47 |
48 | # Test that we report the last position if we don't find an answer
49 | assert len(TEST_LIST) == utils.get_index_of_pairs(
50 | TEST_LIST, 0, 1, 'NOT THERE')
51 |
52 |
53 | def test_get_basename():
54 | TEST_FILEPATHS = (
55 | ("/path/to/file/hello.py", "hello.py"),
56 | ("hello.py", "hello.py")
57 | )
58 |
59 | for test_input, answer in TEST_FILEPATHS:
60 | assert answer == utils.get_basename(test_input)
61 |
62 |
63 | def test_get_numeric():
64 | TEST_STRINGS = (
65 | ('c002.jpg', '002'),
66 | ('_012_', '012'),
67 | )
68 |
69 | for test_input, answer in TEST_STRINGS:
70 | assert answer == utils.get_numeric(test_input)
71 |
72 |
73 | def test_get_timestamp():
74 | assert "2012-09-16 12:03:04" == str(utils.get_timestamp(datetime.datetime(2012, 9, 16, 12, 3, 4)))
75 | assert 1 == utils.get_timestamp(1)
76 | assert "Saturday" == utils.get_timestamp("Saturday")
77 |
78 |
79 | def test_should_drop():
80 | # Never drop
81 | assert utils.should_drop(1.) is True
82 | # Always drop
83 | assert utils.should_drop(0.) is False
84 |
--------------------------------------------------------------------------------
/testci/test_featuredataset.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import pytest
3 |
4 | import numpy as np
5 | from pelops.datasets.chip import ChipDataset, Chip
6 | from pelops.datasets.featuredataset import FeatureDataset
7 |
8 | FEAT_LENGTH = 2048
9 |
10 | @pytest.fixture
11 | def chips():
12 | CHIPS = (
13 | # filepath, car_id, cam_id, time, misc
14 | ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=100), {}),
15 | ("car1_cam2.png", 1, 2, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=105), {}),
16 | ("car1_cam3.png", 1, 3, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=110), {}),
17 | ("car2_cam1.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=100), {}),
18 | ("car2_cam2.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=102), {}),
19 | ("car2_cam3.png", 2, 1, datetime.datetime(2016, 10,1, 0, 1, 2, microsecond=104), {}),
20 | )
21 |
22 | chips = {}
23 | for filepath, car_id, cam_id, time, misc in CHIPS:
24 | chip = Chip(filepath, car_id, cam_id, time, misc)
25 | chips[filepath] = chip
26 |
27 | return chips
28 |
29 | @pytest.fixture
30 | def feature_dataset(chips, tmpdir):
31 | OUTPUT_FNAME = tmpdir.join("test_feature_dataset.hdf5").strpath
32 | feat_data = np.random.random((len(chips), FEAT_LENGTH))
33 | FeatureDataset.save(OUTPUT_FNAME, list(chips.keys()), list(chips.values()), feat_data)
34 | return FeatureDataset(OUTPUT_FNAME)
35 |
36 | def test_get_feats(chips, feature_dataset):
37 | chip_key = next(iter(chips))
38 | chip = chips[chip_key]
39 | assert len(feature_dataset.get_feats_for_chip(chip)) == FEAT_LENGTH
40 |
41 | def test_load_save(chips, feature_dataset):
42 | chip_key = next(iter(chips))
43 | assert feature_dataset.chips[chip_key] == chips[chip_key]
44 |
--------------------------------------------------------------------------------
/testci/test_featureproducer.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import datetime
3 | import pytest
4 | import numpy as np
5 | from PIL import Image
6 |
7 | from pelops.features.feature_producer import FeatureProducer
8 |
9 |
10 | @pytest.fixture
11 | def img_data():
12 | DATA = [[[ 0, 0, 0],
13 | [255, 255, 255],
14 | [ 0, 0, 0]],
15 | [[255, 255, 255],
16 | [ 0, 0, 0],
17 | [255, 255, 255]],
18 | [[ 0, 0, 0],
19 | [255, 255, 255],
20 | [ 0, 0, 0]]]
21 | return np.array(DATA, dtype=np.uint8)
22 |
23 |
24 | @pytest.fixture
25 | def chip_producer(img_data):
26 | Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"])
27 | ChipProducer = collections.namedtuple("ChipProducer", ["chips"])
28 | CHIPS = (
29 | # filepath, car_id, cam_id, time, img_data, misc
30 | ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100), img_data, {}),
31 | )
32 |
33 | chip_producer = ChipProducer({})
34 | for filepath, car_id, cam_id, time, img_data, misc in CHIPS:
35 | print(img_data.shape)
36 | chip = Chip(filepath, car_id, cam_id, time, img_data, misc)
37 | chip_producer.chips[filepath] = chip
38 |
39 | return chip_producer
40 |
41 |
42 | @pytest.fixture
43 | def monkey_feature_producer(chip_producer):
44 | # Monkey patch the __init__() function so that it will succeed
45 | def new_init(self, chip_producer):
46 | self.chip_producer = chip_producer
47 | self.feat_size = 1
48 |
49 | FeatureProducer.__init__ = new_init
50 |
51 | return FeatureProducer(chip_producer)
52 |
53 |
54 | def test_set_variables_raises():
55 | with pytest.raises(NotImplementedError):
56 | fp = FeatureProducer(None)
57 |
58 |
59 | def test_produce_features_raises(monkey_feature_producer):
60 | with pytest.raises(NotImplementedError):
61 | monkey_feature_producer.produce_features(None)
62 |
63 |
64 | def test_get_image_img_data(monkey_feature_producer, chip_producer, img_data):
65 | for key, chip in chip_producer.chips.items():
66 | image = monkey_feature_producer.get_image(chip)
67 | image_array = np.array(image)
68 | assert np.array_equal(img_data, np.array(image))
69 |
70 |
71 | def test_return_features_raises(monkey_feature_producer):
72 | with pytest.raises(NotImplementedError):
73 | monkey_feature_producer.return_features()
74 |
--------------------------------------------------------------------------------
/testci/test_hog_feature.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | from skimage import color
3 | from skimage.feature import hog
4 | import collections
5 | import datetime
6 | import numpy as np
7 | import pytest
8 | from itertools import product
9 |
10 | from pelops.features.hog import HOGFeatureProducer
11 |
12 | def hog_features(img):
13 | img = color.rgb2gray(np.array(img))
14 | features = hog(img, orientations=8, pixels_per_cell=(14, 14), cells_per_block=(16, 16))
15 | return features
16 |
17 |
18 | def hist_features(img):
19 | MAX_CHANNELS = 3
20 | BINS = 256
21 |
22 | channels = img.split()
23 |
24 | # Remove alpha channels
25 | if len(channels) > MAX_CHANNELS:
26 | channels = channel[:MAX_CHANNELS]
27 |
28 | # Calculate features
29 | hist_features = np.zeros(MAX_CHANNELS * BINS)
30 | for i, channel in enumerate(channels):
31 | channel_array = np.array(channel)
32 | values, _ = np.histogram(channel_array.flat, bins=BINS)
33 | start = i * BINS
34 | end = (i+1) * BINS
35 | hist_features[start:end] = values
36 |
37 | return hist_features
38 |
39 |
40 | @pytest.fixture(scope="module")
41 | def img_data():
42 | data = {
43 | "DATA_1":{},
44 | "DATA_3":{},
45 | "DATA_4":{},
46 | }
47 |
48 | # Raw data
49 | data["DATA_1"]["array"] = np.array([
50 | [[ 0, 0, 0],
51 | [255, 255, 255],
52 | [ 0, 0, 0]],
53 | ], dtype=np.uint8)
54 |
55 | data["DATA_3"]["array"] = np.array([
56 | [[ 0, 0, 0],
57 | [255, 255, 255],
58 | [ 0, 0, 0]],
59 | [[255, 255, 255],
60 | [ 0, 0, 0],
61 | [255, 255, 255]],
62 | [[ 0, 0, 0],
63 | [255, 255, 255],
64 | [ 0, 0, 0]],
65 | ], dtype=np.uint8)
66 |
67 | data["DATA_4"]["array"] = np.array([
68 | [[ 0, 0, 0],
69 | [255, 255, 255],
70 | [ 0, 0, 0]],
71 | [[255, 255, 255],
72 | [ 0, 0, 0],
73 | [255, 255, 255]],
74 | [[ 0, 0, 0],
75 | [255, 255, 255],
76 | [ 0, 0, 0]],
77 | [[ 0, 0, 0],
78 | [ 0, 0, 0],
79 | [ 0, 0, 0]],
80 | ], dtype=np.uint8)
81 |
82 | # PIL images
83 | for data_id in data:
84 | arr = data[data_id]["array"]
85 | img = Image.fromarray(arr)
86 | img = img.convert("RGB")
87 | img = img.resize((224, 224), Image.BICUBIC)
88 | data[data_id]["image"] = img
89 |
90 | # Calculate HOG features
91 | for data_id in data:
92 | img = data[data_id]["image"]
93 | hog = hog_features(img)
94 | data[data_id]["hog_features"] = hog
95 |
96 | # Calculate Histogram features
97 | for data_id in data:
98 | img = data[data_id]["image"]
99 | hist = hist_features(img)
100 | data[data_id]["hist_features"] = hist
101 |
102 | return data
103 |
104 |
105 | @pytest.fixture
106 | def chip_producer(img_data):
107 | Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"])
108 | CHIPS = []
109 | for i, data_id in enumerate(img_data):
110 | data = img_data[data_id]
111 | arr = data["array"]
112 | # We use the data_id as the filepath since we do not actually open the
113 | # file and it only needs to be unique
114 | #
115 | # filepath, car_id, cam_id, time, img_data, misc
116 | chip = (data_id, i, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100+i), arr, {})
117 | CHIPS.append(chip)
118 |
119 | chip_producer = {"chips": {}}
120 | for filepath, car_id, cam_id, time, data, misc in CHIPS:
121 | chip = Chip(filepath, car_id, cam_id, time, data, misc)
122 | chip_producer["chips"][filepath] = chip
123 |
124 | return chip_producer
125 |
126 |
127 | @pytest.fixture
128 | def feature_producer(chip_producer):
129 | hog = HOGFeatureProducer(chip_producer)
130 |
131 | return hog
132 |
133 |
134 | def test_features(feature_producer, chip_producer, img_data):
135 | fp = feature_producer
136 |
137 | for _, chip in chip_producer["chips"].items():
138 | data_id = chip.filepath
139 | data = img_data[data_id]
140 | hog_features = data["hog_features"]
141 | hist_features = data["hist_features"]
142 | hog_len = len(hog_features)
143 | hist_len = len(hist_features)
144 |
145 | features = feature_producer.produce_features(chip)
146 | assert len(features) == hog_len + hist_len
147 |
148 | total_features = np.concatenate((hog_features, hist_features))
149 | assert np.array_equal(features, total_features)
150 |
151 |
152 | def test_inputs(chip_producer):
153 | pix_sizes = (32, 64, 128, 256, 512)
154 | cell_counts = (1, 2, 4, 16)
155 | orientation_counts = (2, 4, 8, 16)
156 | histogram_bins = (32, 64, 128, 256)
157 | for pix, cell, orientation, histogram_bin in product(pix_sizes, cell_counts, orientation_counts, histogram_bins):
158 | hog = HOGFeatureProducer(
159 | chip_producer,
160 | image_size=(pix, pix),
161 | cells=(cell, cell),
162 | orientations=orientation,
163 | histogram_bins_per_channel=histogram_bin,
164 | )
165 | for _, chip in chip_producer["chips"].items():
166 | features = hog.produce_features(chip)
167 | assert len(features) == ((cell**2) * orientation) + (3 * histogram_bin)
168 |
--------------------------------------------------------------------------------
/testci/test_keras_load_model.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 | from pelops.features.keras_model import KerasModelFeatureProducer
6 |
7 |
8 | def test_load_model_workaround():
9 | # @TODO get some environment variable set when in CI environment
10 | # test to see, modify path...
11 | if os.getenv('CIRCLECI', None) is not None:
12 | model_filename = '/home/ubuntu/pelops/testci/small.json'
13 | weight_filename = '/home/ubuntu/pelops/testci/small.hdf5'
14 | if os.getenv('INDOCKERCONTAINER', None) is not None:
15 | model_filename = '/pelops_root/testci/small.json'
16 | weight_filename = '/pelops_root/testci/small.hdf5'
17 |
18 | model = KerasModelFeatureProducer.load_model_workaround(
19 | model_filename, weight_filename)
20 | assert model.layers[0].name == 'dense_8'
21 |
--------------------------------------------------------------------------------
/testci/test_keras_model_feature.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import collections
3 | import datetime
4 | import numpy as np
5 | import pytest
6 |
7 | from pelops.features.keras_model import KerasModelFeatureProducer
8 |
9 |
10 | @pytest.fixture
11 | def img_data():
12 | DATA = [[[ 0, 0, 0],
13 | [255, 255, 255],
14 | [ 0, 0, 0]],
15 | [[255, 255, 255],
16 | [ 0, 0, 0],
17 | [255, 255, 255]],
18 | [[ 0, 0, 0],
19 | [255, 255, 255],
20 | [ 0, 0, 0]]]
21 | return np.array(DATA, dtype=np.uint8)
22 |
23 |
24 | def test_preprocess_image(img_data):
25 | img = Image.fromarray(img_data)
26 | img_resized = KerasModelFeatureProducer.preprocess_image(img, 224, 224)
27 | assert img_resized.shape == (1, 224, 224, 3)
28 |
--------------------------------------------------------------------------------
/testci/test_resnet50_feature.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import collections
3 | import datetime
4 | import numpy as np
5 | import pytest
6 |
7 | from pelops.features.resnet50 import ResNet50FeatureProducer
8 |
9 |
10 | @pytest.fixture
11 | def img_data():
12 | DATA = [[[ 0, 0, 0],
13 | [255, 255, 255],
14 | [ 0, 0, 0]],
15 | [[255, 255, 255],
16 | [ 0, 0, 0],
17 | [255, 255, 255]],
18 | [[ 0, 0, 0],
19 | [255, 255, 255],
20 | [ 0, 0, 0]]]
21 | return np.array(DATA, dtype=np.uint8)
22 |
23 |
24 | @pytest.fixture
25 | def chip_producer(img_data):
26 | Chip = collections.namedtuple("Chip", ["filepath", "car_id", "cam_id", "time", "img_data", "misc"])
27 | CHIPS = (
28 | # filepath, car_id, cam_id, time, img_data, misc
29 | ("car1_cam1.png", 1, 1, datetime.datetime(2016, 10, 1, 0, 1, 2, microsecond=100), img_data, {}),
30 | )
31 |
32 | chip_producer = {"chips": {}}
33 | for filepath, car_id, cam_id, time, img_data, misc in CHIPS:
34 | chip = Chip(filepath, car_id, cam_id, time, img_data, misc)
35 | chip_producer["chips"][filepath] = chip
36 |
37 | return chip_producer
38 |
39 |
40 | @pytest.fixture
41 | def feature_producer(chip_producer):
42 | res = ResNet50FeatureProducer(chip_producer)
43 | return res
44 |
45 |
46 | def test_features(feature_producer, chip_producer):
47 | for _, chip in chip_producer["chips"].items():
48 | features = feature_producer.produce_features(chip)
49 | assert features.shape == (1, 2048)
50 | assert np.sum(features) != 0
51 |
52 |
53 | def test_preprocess_image(feature_producer, img_data):
54 | img = Image.fromarray(img_data)
55 | img_resized = feature_producer.preprocess_image(img, 224, 224)
56 | assert img_resized.shape == (1, 224, 224, 3)
57 |
--------------------------------------------------------------------------------
/testci/test_slice.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import datetime
3 | import io
4 |
5 | import pytest
6 |
7 | import pelops.datasets.slice as slice
8 |
9 |
10 | @pytest.fixture
11 | def slice_env(tmpdir):
12 | """Setup mock STR SLiCE dataset"""
13 | work_dir = tmpdir.mkdir('pelops_testing')
14 | truth = [
15 | ['% obSetIdx', ' chipIdx', ' targetID'],
16 | ['1', ' 1', '0'],
17 | ['1', ' 2', '1'],
18 | ['1', ' 3', '0'],
19 | ['2', ' 1', '1'],
20 | ['100', ' 1', '2'],
21 | ]
22 |
23 | truth_file = work_dir.join('truth.txt')
24 | with io.StringIO(newline='') as truth_hdl:
25 | csv.writer(truth_hdl).writerows(truth)
26 | truth_hdl.seek(0)
27 | truth_file.write(truth_hdl.read())
28 |
29 | for obset, chipid in {(row[0], row[1].strip()) for row in truth[1:]}:
30 | obset_dir = work_dir.join('ObSet00{}_1492560663_TestDir'.format(obset))
31 | obset_dir.ensure(dir=True)
32 | img_dir = obset_dir.join('images')
33 | img_dir.ensure(dir=True)
34 | img_file = img_dir.join('ObSet001-00{}.png'.format(chipid))
35 | img_file.ensure(dir=False)
36 |
37 | yield work_dir.strpath
38 |
39 |
40 | def test_slice_chip_load(slice_env):
41 | """Test that SLiCE chips load without error"""
42 | slice_dataset = slice.SliceDataset(slice_env)
43 | assert len(slice_dataset.chips) == 5
44 |
45 |
46 | def test_slice_chip_tgt_car_id(slice_env):
47 | """Test that SLiCE chips for target vehicles are processed properly."""
48 | slice_dataset = slice.SliceDataset(slice_env)
49 | target_ids = [chip.car_id for chip in slice_dataset.chips.values() if chip.car_id.startswith('tgt-')]
50 | assert 'tgt-000000001' in target_ids
51 | assert len(target_ids) == 3
52 | assert len(set(target_ids)) == 2
53 |
54 |
55 | def test_slice_chip_unk_car_id(slice_env):
56 | """Test that SLiCE chips for non-target vehicles are processed properly."""
57 | slice_dataset = slice.SliceDataset(slice_env)
58 | unk_ids = [chip.car_id for chip in slice_dataset.chips.values() if chip.car_id.startswith('unk-')]
59 | assert 'unk-000000001' in unk_ids
60 | assert len(unk_ids) == 2
61 |
62 |
63 | def test_slice_chip_dtg(slice_env):
64 | """Test that date/times encoded in filenames are processed properly."""
65 | slice_dataset = slice.SliceDataset(slice_env)
66 | dtgs = {datetime.datetime.fromtimestamp(float(chip.time)).isoformat() for chip in slice_dataset.chips.values()}
67 | assert len(dtgs) == 1
68 |
69 |
70 | def test_slice_index_chip():
71 | TRUTH = (
72 | # STR like chip
73 | (
74 | "ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
75 | (
76 | (9, 14),
77 | {
78 | 'file': "ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
79 | 'meta': {
80 | 'obSetName': "IH37_Jones",
81 | 'epoch': "1473015765",
82 | },
83 | },
84 | ),
85 | ),
86 | # STR like chip
87 | (
88 | "/root/data/stuff/ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
89 | (
90 | (9, 14),
91 | {
92 | 'file': "/root/data/stuff/ObSet009_1473015765_IH37_Jones/images/ObSet009-014.png",
93 | 'meta': {
94 | 'obSetName': "IH37_Jones",
95 | 'epoch': "1473015765",
96 | },
97 | },
98 | ),
99 | ),
100 | # SLICE like chip
101 | (
102 | "ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
103 | (
104 | (101, 1),
105 | {
106 | 'file': "ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
107 | 'meta': {
108 | 'obSetName': "day5_camera3",
109 | 'epoch': "1473101743",
110 | },
111 | },
112 | ),
113 | ),
114 | # SLICE like chip
115 | (
116 | "/test/test/data/ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
117 | (
118 | (101, 1),
119 | {
120 | 'file': "/test/test/data/ObSet101_1473082429_day5_camera3/images/ObSet101-001-0-20160905_185543.375_1.jpg",
121 | 'meta': {
122 | 'obSetName': "day5_camera3",
123 | 'epoch': "1473101743",
124 | },
125 | },
126 | ),
127 | ),
128 | # Special cases
129 | ("/test/test/truth.txt", None),
130 | ("/test/masks/image_mask.png", None),
131 | )
132 |
133 | for file_path, answer in TRUTH:
134 | assert answer == slice.SliceDataset.index_chip(file_path)
135 |
--------------------------------------------------------------------------------
/testci/test_str.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 |
4 | from pelops.datasets.str import get_sa_cam_id
5 | from pelops.datasets.str import get_sa_car_id
6 | from pelops.datasets.str import int_from_string
7 | from pelops.datasets.str import StrDataset
8 |
9 |
10 | @pytest.fixture
11 | def str_sa(tmpdir):
12 | """ Set up some test files and an instance of StrDataset(). """
13 | # Write a file to read back
14 | FILE_NAMES = (
15 | # filepath, car_id, cam_id, time, misc
16 | ("match00001_cam02.png", 1, 2, None, None),
17 | ("match00001_cam01_mask.png", None, None, None, None),
18 | ("match00010_cam01.png", 10, 1, None, None),
19 | ("match00011_cam02_mask.png", None, None, None, None)
20 | )
21 | # The contents of the files do not matter, the name is enough
22 | internal_dir = tmpdir.mkdir("crossCameraMatches")
23 | for name, _, _, _, _ in FILE_NAMES:
24 | out_file = internal_dir.join(name)
25 | out_file.write("TEST")
26 |
27 | # Setup the class
28 | instantiated_class = StrDataset(os.path.dirname(out_file.dirname))
29 |
30 | # Rename filepath
31 | FILE_NAMES = (
32 | (os.path.join(out_file.dirname, "match00001_cam02.png"), 1, 2, None, None),
33 | (os.path.join(out_file.dirname, "match00001_cam01_mask.png"), None, None, None, None),
34 | (os.path.join(out_file.dirname, "match00010_cam01.png"), 10, 1, None, None),
35 | (os.path.join(out_file.dirname, "match00011_cam02_mask.png"), None, None, None, None)
36 | )
37 |
38 | # Filter out the files that were not read
39 | RET_FILE_NAMES = tuple(t for t in FILE_NAMES if t[1] is not None)
40 | return (instantiated_class, RET_FILE_NAMES)
41 |
42 |
43 | def test_str_sa_chips_len(str_sa):
44 | """ Test that StrDataset.chips is the correct length """
45 | instantiated_class = str_sa[0]
46 | FILE_NAMES = str_sa[1]
47 | # check that self.chips has been created, is not empty, and has the right
48 | # number of entries
49 | assert len(FILE_NAMES)
50 | assert len(FILE_NAMES) == len(instantiated_class.chips)
51 |
52 |
53 | def test_str_sa_chips_vals(str_sa):
54 | """ Test that StrDataset chips have the correct values. """
55 | instantiated_class = str_sa[0]
56 | FILE_NAMES = str_sa[1]
57 |
58 | # Check that the correct chips exist
59 | for filepath, car_id, cam_id, time, misc in FILE_NAMES:
60 | chip = instantiated_class.chips[filepath]
61 | assert car_id == chip.car_id
62 | assert cam_id == chip.cam_id
63 | # No time data
64 | assert chip.time is None
65 | # No misc data
66 | assert chip.misc is None
67 | # Filepath should be filled
68 | assert chip.filepath
69 |
70 |
71 | def test_get_all_chips_by_car_id(str_sa):
72 | """ Test StrDataset.get_all_chips_by_car_id() """
73 | instantiated_class = str_sa[0]
74 | FILE_NAMES = str_sa[1]
75 |
76 | seen_ids = []
77 | for filepath, car_id, cam_id, time, misc in FILE_NAMES:
78 | # Generate all the chips by hand, and compare
79 | if car_id in seen_ids:
80 | continue
81 | seen_ids.append(car_id)
82 | chips = []
83 | for key, val in instantiated_class.chips.items():
84 | if val.car_id == car_id:
85 | chips.append(val)
86 |
87 | chips.sort()
88 | test_chips = sorted(instantiated_class.get_all_chips_by_car_id(car_id))
89 | assert chips == test_chips
90 |
91 |
92 | def test_get_all_chips_by_cam_id(str_sa):
93 | """ Test StrDataset.get_all_chips_by_cam_id() """
94 | instantiated_class = str_sa[0]
95 | FILE_NAMES = str_sa[1]
96 |
97 | seen_ids = []
98 | for filepath, car_id, cam_id, time, misc in FILE_NAMES:
99 | # Generate all the chips by hand, and compare
100 | if cam_id in seen_ids:
101 | continue
102 | seen_ids.append(cam_id)
103 | chips = []
104 | for key, val in instantiated_class.chips.items():
105 | if val.cam_id == cam_id:
106 | chips.append(val)
107 |
108 | chips.sort()
109 | test_chips = sorted(instantiated_class.get_all_chips_by_cam_id(cam_id))
110 | assert chips == test_chips
111 |
112 |
113 | def test_get_distinct_cams_by_car_id(str_sa):
114 | """ Test StrDataset.get_distinct_cams_by_car_id() and get_distinct_cams_per_car """
115 | instantiated_class = str_sa[0]
116 | CAR_ID = 1
117 | TEST_CAMS = [2]
118 | for test_cam, cam in zip(TEST_CAMS, sorted(instantiated_class.get_distinct_cams_by_car_id(CAR_ID))):
119 | assert test_cam == cam
120 |
121 | def test_get_all_cam_ids(str_sa):
122 | """ Test StrDataset.get_distinct_cams_by_car_id() """
123 | instantiated_class = str_sa[0]
124 | TEST_CAMS = [1, 2]
125 | for test_cam, cam in zip(TEST_CAMS, sorted(instantiated_class.get_all_cam_ids())):
126 | assert test_cam == cam
127 |
128 | def test_get_all_car_ids(str_sa):
129 | """ Test StrDataset.get_distinct_cams_by_car_id() """
130 | instantiated_class = str_sa[0]
131 | TEST_CARS = [1, 10]
132 | for test_car, car in zip (TEST_CARS, sorted(instantiated_class.get_all_car_ids())):
133 | assert test_car == car
134 |
135 |
136 | def test_str_sa_iter(str_sa):
137 | """ Test StrDataset.__iter__() """
138 | instantiated_class = str_sa[0]
139 | FILE_NAMES = str_sa[1]
140 | chip_ids = tuple(i for i, _, _, _, _ in FILE_NAMES)
141 |
142 | for chip in instantiated_class:
143 | assert chip.filepath in chip_ids
144 |
145 |
146 | def test_int_from_string():
147 | """ Test int_from_string() """
148 | TEST_STRINGS = (
149 | # String, Args, Answer
150 | ("test_010_test", ("test_", 3), 10),
151 | ("test_010_test", ("FAIL_", 3), None),
152 | ("test_010", ("test_", 3), 10),
153 | ("test_11_test", ("test_", 2), 11),
154 | ("010_test", ("", 3), 10),
155 | ("/foo/bar/bass/test_/test_010_test", ("test_", 3), 10),
156 | )
157 |
158 | for test_string, args, answer in TEST_STRINGS:
159 | assert answer == int_from_string(test_string, args[0], args[1])
160 |
161 |
162 | def test_get_sa_cam_id():
163 | """ Test get_sa_cam_id() """
164 | TEST_STRINGS = (
165 | # String, Answer
166 | ("match00001_cam02.png", 2),
167 | ("match00001_cam01_mask.png", 1),
168 | ("match00010_cam01.png", 1),
169 | ("match00011_cam02_mask.png", 2),
170 | )
171 |
172 | for test_string, answer in TEST_STRINGS:
173 | assert answer == get_sa_cam_id(test_string)
174 |
175 |
176 | def test_get_sa_car_id():
177 | """ Test get_sa_car_id() """
178 | TEST_STRINGS = (
179 | # String, Answer
180 | ("match00001_cam02.png", 1),
181 | ("match00001_cam01_mask.png", 1),
182 | ("match00010_cam01.png", 10),
183 | ("match00011_cam02_mask.png", 11),
184 | )
185 |
186 | for test_string, answer in TEST_STRINGS:
187 | assert answer == get_sa_car_id(test_string)
188 |
--------------------------------------------------------------------------------