├── .dockerignore
├── .flake8
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── questions-about-datasetinsights.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── linting-and-unittests.yaml
    │   ├── publish-docker-hub.yaml
    │   ├── publish-pypi.yaml
    │   └── synk-scan.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── Dockerfile
├── LICENCE
├── Makefile
├── README.md
├── datasetinsights
    ├── __init__.py
    ├── __main__.py
    ├── commands
    │   ├── __init__.py
    │   ├── convert.py
    │   └── download.py
    ├── constants.py
    ├── dashboard.py
    ├── datasets
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── synthetic.py
    │   ├── transformers
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── coco.py
    │   └── unity_perception
    │   │   ├── __init__.py
    │   │   ├── captures.py
    │   │   ├── exceptions.py
    │   │   ├── metrics.py
    │   │   ├── references.py
    │   │   ├── tables.py
    │   │   └── validation.py
    ├── io
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── download.py
    │   ├── downloader
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── gcs_downloader.py
    │   │   └── http_downloader.py
    │   ├── exceptions.py
    │   └── gcs.py
    └── stats
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── image_analysis
    │       ├── __init__.py
    │       ├── laplacian.py
    │       ├── spectral_analysis.py
    │       └── wavelet.py
    │   ├── keypoints_stats.py
    │   ├── object_detection_stats.py
    │   ├── statistics.py
    │   └── visualization
    │       ├── __init__.py
    │       ├── app.py
    │       ├── bbox2d_plot.py
    │       ├── bbox3d_plot.py
    │       ├── constants.py
    │       ├── font
    │           ├── DroidSansFallback.ttf
    │           └── LICENSE-2.0.txt
    │       ├── keypoints_plot.py
    │       ├── object_detection.py
    │       ├── overview.py
    │       ├── plots.py
    │       └── stylesheet.css
├── docs
    ├── Makefile
    ├── README.md
    ├── requirements.txt
    └── source
    │   ├── Synthetic_Dataset_Schema.md
    │   ├── _images
    │       ├── captures_steps_timestamps.png
    │       ├── image_0.png
    │       ├── image_2.png
    │       ├── image_3.png
    │       ├── image_4.png
    │       ├── kubeflow
    │       │   ├── evaluate_pipeline_graph.png
    │       │   ├── evaluate_the_model.png
    │       │   ├── notebook.png
    │       │   ├── notebook_docker_cpu_memory.png
    │       │   ├── notebook_gpu_volume.png
    │       │   ├── train_on_real_world_dataset.png
    │       │   ├── train_on_synthdet_sample.png
    │       │   ├── train_on_synthetic_and_real_world_dataset.png
    │       │   ├── train_on_synthetic_dataset_unity_simulation.png
    │       │   ├── train_pipeline_graph.jpg
    │       │   └── upload_pipeline.png
    │       └── synthetic_data_pipeline_dataset_evaluation.png
    │   ├── _templates
    │       ├── module.rst_t
    │       ├── package.rst_t
    │       └── toc.rst_t
    │   ├── conf.py
    │   ├── datasetinsights.datasets.rst
    │   ├── datasetinsights.datasets.transformers.rst
    │   ├── datasetinsights.datasets.unity_perception.rst
    │   ├── datasetinsights.io.downloader.rst
    │   ├── datasetinsights.io.rst
    │   ├── datasetinsights.rst
    │   ├── datasetinsights.stats.rst
    │   ├── datasetinsights.stats.visualization.rst
    │   ├── index.rst
    │   └── modules.rst
├── notebooks
    ├── Human_Keypoint_Pose.ipynb
    ├── Image_Analysis.ipynb
    ├── Object_Detection_Stats.ipynb
    └── Perception_Statistics.ipynb
├── poetry.lock
├── pyproject.toml
└── tests
    ├── datasets
        ├── test_coco_transformers.py
        ├── test_statistics.py
        └── test_synthetic.py
    ├── mock_data
        ├── calib000000.txt
        ├── coco
        │   ├── annotations
        │   │   ├── instances.json
        │   │   └── keypoints.json
        │   └── images
        │   │   ├── camera_001.png
        │   │   ├── camera_125709864006893838062514269195103918838.png
        │   │   └── camera_61855733451949387398181790757513827492.png
        ├── no_annotations_or_metrics
        │   └── Dataset
        │   │   ├── annotation_definitions.json
        │   │   ├── captures_000.json
        │   │   ├── captures_001.json
        │   │   ├── egos.json
        │   │   ├── metric_definitions.json
        │   │   ├── metrics_000.json
        │   │   └── sensors.json
        ├── simrun
        │   ├── Dataset
        │   │   ├── annotation_definitions.json
        │   │   ├── captures_000.json
        │   │   ├── captures_001.json
        │   │   ├── egos.json
        │   │   ├── metric_definitions.json
        │   │   ├── metrics_000.json
        │   │   └── sensors.json
        │   ├── README.md
        │   ├── annotations
        │   │   ├── instance_segmantation_000.png
        │   │   ├── lidar_semantic_segmentation_000.pcd
        │   │   ├── sementic_segmantation_000.png
        │   │   └── sementic_segmantation_001.png
        │   └── captures
        │   │   ├── camera_000.png
        │   │   ├── camera_001.png
        │   │   └── lidar_000.pcd
        ├── simrun_keypoint_dataset
        │   ├── annotation_definitions.json
        │   ├── annotations
        │   │   ├── keypoint_000.png
        │   │   └── keypoint_001.png
        │   ├── captures_000.json
        │   ├── egos.json
        │   ├── metric_definitions.json
        │   ├── metrics_000.json
        │   └── sensors.json
        └── simrun_manifest.csv
    ├── test_bbox.py
    ├── test_create_downloader.py
    ├── test_dashboard.py
    ├── test_download_command.py
    ├── test_gcs.py
    ├── test_http_downloader.py
    ├── test_image_analysis.py
    ├── test_keypoints_stats.py
    ├── test_main_entrypoint.py
    ├── test_object_detection_stats.py
    ├── test_visual.py
    └── unity_perception
        ├── conftest.py
        ├── test_captures.py
        ├── test_metrics.py
        └── test_references.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | 
 5 | # CI
 6 | .codeclimate.yml
 7 | .travis.yml
 8 | .taskcluster.yml
 9 | 
10 | # Docker
11 | docker-compose.yml
12 | .docker
13 | 
14 | # Byte-compiled / optimized / DLL files
15 | **/__pycache__/
16 | **/*.py[cod]
17 | 
18 | # C extensions
19 | *.so
20 | 
21 | # Distribution / packaging
22 | .Python
23 | env/
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | 
38 | # PyInstaller
39 | #  Usually these files are written by a python script from a template
40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
41 | *.manifest
42 | *.spec
43 | 
44 | # Installer logs
45 | pip-log.txt
46 | pip-delete-this-directory.txt
47 | 
48 | # Unit test / coverage reports
49 | htmlcov/
50 | .tox/
51 | .coverage
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | .pytest_cache
56 | 
57 | # Translations
58 | *.mo
59 | *.pot
60 | 
61 | # Django stuff:
62 | *.log
63 | 
64 | # Sphinx documentation
65 | docs/_build/
66 | 
67 | # PyBuilder
68 | target/
69 | 
70 | # Virtual environment
71 | .env/
72 | .venv/
73 | venv/
74 | 
75 | # PyCharm
76 | .idea
77 | 
78 | # IDE
79 | **/.ropeproject
80 | **/.swp
81 | .vscode
82 | .ipynb_checkpoints
83 | 
84 | # Place project specific ignores here
85 | runs
86 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 80
 3 | ignore =
 4 |     E133,
 5 |     E203,
 6 |     W503,
 7 |     W504,
 8 |     W605,
 9 |     F541
10 | exclude =
11 |     .git,
12 |     __pycache__,
13 |     datasetinsights/data/datasets/protos/
14 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, religion, or sexual identity
 11 | and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the
 27 |   overall community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or
 32 |   advances of any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email
 36 |   address, without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official e-mail address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | <perception@unity3d.com>.
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series
 87 | of actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or
 94 | permanent ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior,  harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within
114 | the community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.0, available at
120 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
121 | 
122 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
123 | enforcement ladder](https://github.com/mozilla/diversity).
124 | 
125 | [homepage]: https://www.contributor-covenant.org
126 | 
127 | For answers to common questions about this code of conduct, see the FAQ at
128 | https://www.contributor-covenant.org/faq. Translations are available at
129 | https://www.contributor-covenant.org/translations.
130 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report a bug with datasetinsights
 4 | labels: bug
 5 | 
 6 | ---
 7 | 
 8 | **Describe the Bug:**
 9 | [A clear and concise description of what the bug is.]
10 | 
11 | **How to Reproduce?**
12 | [What are the steps that would reproduce the bug that you encountered.]
13 | 
14 | **What did you expect to happen:**
15 | 
16 | **Console logs / stack traces**
17 | Please wrap in [triple backticks (```)](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make it easier to read.
18 | 
19 | **Screenshots**
20 | [If applicable, add screenshots to help explain your problem.]
21 | 
22 | **Anything else you would like to add:**
23 | [Miscellaneous information that will assist in solving the issue.]
24 | 
25 | **Environment:**
26 | 
27 | - OS + version: [e.g. Ubuntu 20.04.1 LTS]
28 | - datasetinsights version
29 | - _Environment_: (which example environment you used to reproduce the error)
30 | - Other environment settings
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | labels: enhancement
 5 | 
 6 | ---
 7 | 
 8 | **Why you need this feature:**
 9 | [Is your feature request related to a problem? Please describe in details]
10 | 
11 | 
12 | **Describe the solution you'd like:**
13 | [A clear and concise description of what you want to happen.]
14 | 
15 | 
16 | **Anything else you would like to add:**
17 | [Miscellaneous information that will assist in solving the issue.]
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions-about-datasetinsights.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Questions about datasetinsights
 3 | about: Ask your question or about any confusion that you have about this project
 4 | labels: question
 5 | 
 6 | ---
 7 | 
 8 | **Question:**
 9 | [You can ask any question about this project.]
10 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Peer Review Information
 2 | 
 3 | Add information on any code, feature, documentation changes here.
 4 | 
 5 | # Pull Request Check List
 6 | 
 7 | <!-- This is just a reminder about the most common mistakes. Please make sure that you tick all *appropriate* boxes. Please read our [contribution guide](https://github.com/Unity-Technologies/dataset-insights/blob/master/CONTRIBUTING.md)
 8 | at least once, it will save you unnecessary review cycles! -->
 9 | 
10 | - [ ] Added **tests** for changed code.
11 | - [ ] Updated **documentation** for changed code.
12 | 


--------------------------------------------------------------------------------
/.github/workflows/linting-and-unittests.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   linting:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       - name: Set up Python 3.8
18 |         uses: actions/setup-python@v2
19 |         with:
20 |           python-version: "3.8"
21 |       - name: Linting
22 |         run: |
23 |           pip install pre-commit
24 |           pre-commit run --all-files
25 |   tests:
26 |     # reference from https://github.com/python-poetry/poetry/blob/master/.github/workflows/main.yml
27 |     runs-on: ubuntu-latest
28 |     strategy:
29 |       matrix:
30 |         python-version: ["3.8", "3.9", "3.10"]
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v2
34 |       - name: Set up Python ${{ matrix.python-version }}
35 |         uses: actions/setup-python@v2
36 |         with:
37 |           python-version: ${{ matrix.python-version }}
38 |       - name: Get full Python version
39 |         id: full-python-version
40 |         shell: bash
41 |         run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
42 |       - name: Install poetry
43 |         shell: bash
44 |         run: |
45 |           curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python -
46 |           echo "$HOME/.poetry/bin" >> $GITHUB_PATH
47 |       - name: Configure poetry
48 |         shell: bash
49 |         run: poetry config virtualenvs.in-project true
50 |       - name: Set up cache
51 |         uses: actions/cache@v2
52 |         id: cache
53 |         with:
54 |           path: .venv
55 |           key: venv-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
56 |       - name: Ensure cache is healthy
57 |         if: steps.cache.outputs.cache-hit == 'true'
58 |         shell: bash
59 |         run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv
60 |       - name: Install dependencies
61 |         run: poetry install
62 |         shell: bash
63 |       - name: Run pytest
64 |         run: poetry run pytest
65 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-docker-hub.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish Docker image
 2 | on:
 3 |   release:
 4 |     types: [published]
 5 | jobs:
 6 |   push_to_registry:
 7 |     name: Push Docker image to Docker Hub
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Check out the repo
11 |         uses: actions/checkout@v2
12 |       - name: Push to Docker Hub
13 |         uses: docker/build-push-action@v1
14 |         with:
15 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
16 |           password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }}
17 |           repository: unitytechnologies/datasetinsights
18 |           tags: latest
19 |           tag_with_ref: true
20 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-pypi.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish to pypi
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | env:
 8 |   PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
 9 | 
10 | jobs:
11 | 
12 |   build-and-publish:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 | 
17 |       - uses: actions/checkout@v2
18 |       - name: Set up Python 3.8
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: "3.8"
22 |       - name: Get full Python version
23 |         id: full-python-version
24 |         shell: bash
25 |         run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
26 |       - name: Install poetry
27 |         shell: bash
28 |         run: |
29 |           curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python -
30 |           echo "$HOME/.poetry/bin" >> $GITHUB_PATH
31 |       - name: Set env
32 |         run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
33 |       - name : Configure poetry
34 |         shell: bash
35 |         run: poetry config pypi-token.pypi $PYPI_TOKEN
36 |       - name: Set poetry version
37 |         shell: bash
38 |         run: poetry version $RELEASE_VERSION
39 |       - name: build
40 |         shell: bash
41 |         run: poetry build
42 |       - name: publish
43 |         shell: bash
44 |         run: poetry publish
45 | 


--------------------------------------------------------------------------------
/.github/workflows/synk-scan.yaml:
--------------------------------------------------------------------------------
 1 | name: Scan Python project using Snyk
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   security:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@master
16 |       - name: Run Snyk to check for vulnerabilities
17 |         uses: snyk/actions/python@master
18 |         env:
19 |           SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
20 |         with:
21 |           command: monitor
22 |           args: --all-projects --exclude=docs --command=python3
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Compiled source #
  2 | ###################
  3 | *.com
  4 | *.class
  5 | *.dll
  6 | *.exe
  7 | *.o
  8 | *.so
  9 | 
 10 | # Compressed files #
 11 | ####################
 12 | # it's better to unpack these files and commit the raw source
 13 | # git has its own built in compression methods
 14 | *.7z
 15 | *.dmg
 16 | *.gz
 17 | *.iso
 18 | *.jar
 19 | *.rar
 20 | *.tar
 21 | *.zip
 22 | 
 23 | # Logs and databases #
 24 | ######################
 25 | *.log
 26 | *.sql
 27 | *.sqlite
 28 | 
 29 | # OS generated files #
 30 | ######################
 31 | .DS_Store*
 32 | ehthumbs.db
 33 | Icon?
 34 | Thumbs.db
 35 | *.bak*
 36 | 
 37 | # IDE Project files  #
 38 | ######################
 39 | *.sublime-*
 40 | *.Rproj
 41 | .Rproj.user
 42 | .Rhistory
 43 | *.xcodeproj
 44 | *.idea
 45 | 
 46 | # Python  #
 47 | ###########
 48 | # Byte-compiled / optimized / DLL files
 49 | __pycache__/
 50 | *.py[cod]
 51 | *$py.class
 52 | 
 53 | # C extensions
 54 | *.so
 55 | 
 56 | # Distribution / packaging
 57 | .Python
 58 | build/
 59 | develop-eggs/
 60 | dist/
 61 | downloads/
 62 | eggs/
 63 | .eggs/
 64 | lib/
 65 | lib64/
 66 | parts/
 67 | sdist/
 68 | var/
 69 | wheels/
 70 | pip-wheel-metadata/
 71 | share/python-wheels/
 72 | *.egg-info/
 73 | .installed.cfg
 74 | *.egg
 75 | MANIFEST
 76 | 
 77 | # Unit test / coverage reports
 78 | htmlcov/
 79 | .tox/
 80 | .nox/
 81 | .coverage
 82 | .coverage.*
 83 | .cache
 84 | nosetests.xml
 85 | coverage.xml
 86 | *.cover
 87 | .hypothesis/
 88 | .pytest_cache/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | 
109 | # Editor
110 | .vscode
111 | 
112 | # For this Project   #
113 | ######################
114 | runs/
115 | checkpoints/
116 | metrics/
117 | coco_data
118 | perception_data
119 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | exclude: >
 4 |     (?x)^(
 5 |         .*_pb2.py|
 6 |         .*_pb2_grpc.py
 7 |     )$
 8 | repos:
 9 | -   repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v2.4.0
11 |     hooks:
12 |     -   id: trailing-whitespace
13 |     -   id: end-of-file-fixer
14 |     -   id: check-yaml
15 |     -   id: check-added-large-files
16 |     -   id: check-merge-conflict
17 | -   repo: https://github.com/psf/black
18 |     rev: 22.3.0
19 |     hooks:
20 |     -   id: black
21 | -   repo: https://gitlab.com/pycqa/flake8
22 |     rev: 3.8.1
23 |     hooks:
24 |     -   id: flake8
25 | -   repo: https://github.com/timothycrosley/isort
26 |     rev: 5.1.0
27 |     hooks:
28 |     -   id: isort
29 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | formats: all
 3 | build:
 4 |   image: stable
 5 | python:
 6 |   version: 3.8
 7 |   install:
 8 |     - requirements: docs/requirements.txt
 9 |     - method: pip
10 |       path: .
11 | sphinx:
12 |   builder: html
13 |   configuration: docs/source/conf.py
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Table of contents
  2 | 
  3 | - [Table of contents](#table-of-contents)
  4 |   - [Contributing to datasetinsights](#contributing-to-datasetinsights)
  5 |   - [Developing datasetinsights](#developing-datasetinsights)
  6 |     - [Add new dependencies](#add-new-dependencies)
  7 |   - [Codebase structure](#codebase-structure)
  8 |   - [Unit testing](#unit-testing)
  9 |   - [Style Guide](#style-guide)
 10 |   - [Writing documentation](#writing-documentation)
 11 |     - [Building documentation](#building-documentation)
 12 | 
 13 | ## Contributing to datasetinsights
 14 | 
 15 | We encourage contributions to the datasetinsights repo, including but not limited to following categories:
 16 | 
 17 | 1. You want to improve the documentation of existing module.
 18 | 2. You want to provide bug-fix for an outstanding issue.
 19 | 3. You want to implement a new feature to support new type of perception package outputs.
 20 | 
 21 | ## Developing datasetinsights
 22 | 
 23 | Here are some steps to setup datasetinsights virtual environment with on your machine:
 24 | 
 25 | 1. Install [poetry](https://python-poetry.org/), [git](https://git-scm.com/) and [pre-commit](https://pre-commit.com/)
 26 | 2. Create a virtual environment. We recommend using [miniconda](https://docs.conda.io/en/latest/miniconda.html)
 27 | 
 28 | ```bash
 29 | conda create -n dins-dev python=3.8
 30 | conda activate dins-dev
 31 | ```
 32 | 
 33 | 3. Clone a copy of datasetinsights from source:
 34 | 
 35 | ```bash
 36 | git clone https://github.com/Unity-Technologies/datasetinsights.git
 37 | cd datasetinsights
 38 | ```
 39 | 
 40 | 4. Install datasetinsights in `develop` mode:
 41 | 
 42 | ```bash
 43 | poetry install
 44 | ```
 45 | 
 46 | This will symlink the Python files from the current local source tree into the installed virtual environment install.
 47 | The `develop` mode also includes Python packages such as [pytest](https://docs.pytest.org/en/latest/) and [black](https://black.readthedocs.io/en/stable/).
 48 | 
 49 | 5. Install pre-commit [hook](https://pre-commit.com/#3-install-the-git-hook-scripts) to `.git` folder.
 50 | 
 51 | ```bash
 52 | pre-commit install
 53 | # pre-commit installed at .git/hooks/pre-commit
 54 | ```
 55 | 
 56 | ### Add new dependencies
 57 | 
 58 | Adding new Python dependencies to datasetinsights environment using poetry like:
 59 | 
 60 | ```bash
 61 | poetry add numpy@^1.18.4
 62 | ```
 63 | 
 64 | Make sure you only add the desired packages instead of adding all dependencies.
 65 | Let package management system resolve for dependencies.
 66 | See [poetry add](https://python-poetry.org/docs/cli/#add) for detail instructions.
 67 | 
 68 | ## Codebase structure
 69 | 
 70 | The datasetinsights package contains the following modules:
 71 | 
 72 | - [commands](datasetinsights/commands) This module contains the cli commands.
 73 | - [datasets](datasetinsights/datasets) This module contains different datasets. The dataset classes contain knowledge on how the dataset should be loaded into memory.
 74 | - [io](datasetinsights/io) This module contains functionality that relates to writing/downloading/uploading to/from different sources.
 75 | - [stats](datasetinsights/stats) This module contains code for visualizing and gathering statistics on the dataset
 76 | 
 77 | ## Unit testing
 78 | 
 79 | We use [pytest](https://docs.pytest.org/en/latest/) to run tests located under `tests/`. Run the entire test suite with
 80 | 
 81 | ```bash
 82 | pytest
 83 | ```
 84 | 
 85 | or run individual test files, like:
 86 | 
 87 | ```bash
 88 | pytest tests/test_visual.py
 89 | ```
 90 | 
 91 | for individual test suites.
 92 | 
 93 | ## Style Guide
 94 | 
 95 | We follow Black code [style](https://black.readthedocs.io/en/stable/the_black_code_style.html) for this repository.
 96 | The max line length is set at 80.
 97 | We enforce this code style using [Black](https://black.readthedocs.io/en/stable/) to format Python code.
 98 | In addition to Black, we use [isort](https://github.com/timothycrosley/isort) to sort Python imports.
 99 | 
100 | Before submitting a pull request, run:
101 | 
102 | ```bash
103 | pre-commit run --all-files
104 | ```
105 | 
106 | Fix all issues that were highlighted by flake8. If you want to skip exceptions such as long url lines in docstring, add `# noqa: E501 <describe reason>` for the specific line violation. See [this](https://flake8.pycqa.org/en/3.1.1/user/ignoring-errors.html) to learn more about how to ignore flake8 errors.
107 | 
108 | Some editors support automatically formatting on save. For example, in [vscode](https://code.visualstudio.com/docs/python/editing#_formatting)
109 | 
110 | ## Writing documentation
111 | 
112 | Datasetinsights uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for formatting docstrings.
113 | Length of line inside docstrings block must be limited to 80 characters with exceptions such as long urls or tables.
114 | 
115 | ### Building documentation
116 | 
117 | Follow instructions [here](docs/README.md).
118 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04
 2 | 
 3 | RUN apt-get update \
 4 |     && apt-get install -y \
 5 |         build-essential \
 6 |         curl \
 7 |         libsm6 \
 8 |         libxext6 \
 9 |         libxrender-dev \
10 |         libgl1-mesa-dev \
11 |         libffi-dev \
12 |         libzmq3-dev \
13 |         python3.8-dev \
14 |         python3-pip \
15 |     && ln -s /usr/bin/python3.8 /usr/local/bin/python
16 | 
17 | RUN python -m pip install --upgrade pip
18 | RUN python -m pip install setuptools==60.2.0 cryptography==36.0.1 poetry==1.1.12 notebook==6.4.8
19 | 
20 | # Add Tini
21 | ENV TINI_VERSION v0.18.0
22 | ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/local/bin/tini
23 | RUN chmod +x /usr/local/bin/tini
24 | 
25 | WORKDIR /datasetinsights
26 | VOLUME /data /root/.config
27 | 
28 | COPY poetry.lock pyproject.toml ./
29 | RUN poetry config virtualenvs.create false \
30 |     && poetry install --no-root
31 | 
32 | COPY . ./
33 | # Run poetry install again to install datasetinsights
34 | RUN poetry config virtualenvs.create false \
35 |     && poetry install
36 | 
37 | # Use -g to ensure all child process received SIGKILL
38 | ENTRYPOINT ["tini", "-g", "--"]
39 | 
40 | CMD sh -c "jupyter notebook --notebook-dir=/ --ip=0.0.0.0 --no-browser --allow-root --port=8888 --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.allow_origin='*' --NotebookApp.base_url=${NB_PREFIX}"
41 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help
 2 | 
 3 | help:
 4 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 5 | 
 6 | .DEFAULT_GOAL := help
 7 | 
 8 | GCP_PROJECT_ID := unity-ai-thea-test
 9 | TAG ?= latest
10 | 
11 | build: ## Build datasetinsights docker image
12 | 	@echo "Building docker image for datasetinsights with tag: $(TAG)"
13 | 	@docker build -t datasetinsights:$(TAG) .
14 | 
15 | push: ## Push datasetinsights docker image to registry
16 | 	@echo "Uploading docker image to GCS registry with tag: $(TAG)"
17 | 	@docker tag datasetinsights:$(TAG) gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG) && \
18 | 	docker push gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG)
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dataset Insights
  2 | 
  3 | [![PyPI python](https://img.shields.io/pypi/pyversions/datasetinsights)](https://pypi.org/project/datasetinsights)
  4 | [![PyPI version](https://badge.fury.io/py/datasetinsights.svg)](https://pypi.org/project/datasetinsights)
  5 | [![Downloads](https://pepy.tech/badge/datasetinsights)](https://pepy.tech/project/datasetinsights)
  6 | [![Tests](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml/badge.svg?branch=master&event=push)](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml?query=branch%3Amaster+event%3Apush)
  7 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
  8 | 
  9 | Unity Dataset Insights is a python package for downloading, parsing and analyzing synthetic datasets generated using the Unity [Perception package](https://github.com/Unity-Technologies/com.unity.perception).
 10 | 
 11 | ## Installation
 12 | 
 13 | Datasetinsights is published to PyPI. You can simply run `pip install datasetinsights` command under a supported python environments:
 14 | 
 15 | ## Getting Started
 16 | 
 17 | ### Dataset Statistics
 18 | 
 19 | We provide a sample [notebook](notebooks/Perception_Statistics.ipynb) to help you load synthetic datasets generated using [Perception package](https://github.com/Unity-Technologies/com.unity.perception) and visualize dataset statistics. We plan to support other sample Unity projects in the future.
 20 | 
 21 | ### Load Datasets
 22 | 
 23 | The [Unity Perception](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.datasets.unity_perception.html#datasetinsights-datasets-unity-perception) package provides datasets under this [schema](https://datasetinsights.readthedocs.io/en/latest/Synthetic_Dataset_Schema.html#synthetic-dataset-schema). The datasetinsighs package also provide convenient python modules to parse datasets.
 24 | 
 25 | For example, you can load `AnnotationDefinitions` into a python dictionary by providing the corresponding annotation definition ID:
 26 | 
 27 | ```python
 28 | from datasetinsights.datasets.unity_perception import AnnotationDefinitions
 29 | 
 30 | annotation_def = AnnotationDefinitions(data_root=dest, version="my_schema_version")
 31 | definition_dict = annotation_def.get_definition(def_id="my_definition_id")
 32 | ```
 33 | 
 34 | Similarly, for `MetricDefinitions`:
 35 | ```python
 36 | from datasetinsights.datasets.unity_perception import MetricDefinitions
 37 | 
 38 | metric_def = MetricDefinitions(data_root=dest, version="my_schema_version")
 39 | definition_dict = metric_def.get_definition(def_id="my_definition_id")
 40 | ```
 41 | 
 42 | The `Captures` table provide the collection of simulation captures and annotations. You can load these records directly as a Pandas `DataFrame`:
 43 | 
 44 | ```python
 45 | from datasetinsights.datasets.unity_perception import Captures
 46 | 
 47 | captures = Captures(data_root=dest, version="my_schema_version")
 48 | captures_df = captures.filter(def_id="my_definition_id")
 49 | ```
 50 | 
 51 | 
 52 | The `Metrics` table can store simulation metrics for a capture or annotation. You can also load these records as a Pandas `DataFrame`:
 53 | 
 54 | ```python
 55 | from datasetinsights.datasets.unity_perception import Metrics
 56 | 
 57 | metrics = Metrics(data_root=dest, version="my_schema_version")
 58 | metrics_df = metrics.filter_metrics(def_id="my_definition_id")
 59 | ```
 60 | 
 61 | ### Download Datasets
 62 | 
 63 | You can download the datasets using the [download](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.commands.html#datasetinsights-commands-download) command:
 64 | 
 65 | ```bash
 66 | datasetinsights download --source-uri=<xxx> --output=$HOME/data
 67 | ```
 68 | 
 69 | The download command supports HTTP(s), and GCS.
 70 | 
 71 | Alternatively, you can download dataset directly from python [interface](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.io.downloader.html#module-datasetinsights.io.downloader).
 72 | 
 73 | `GCSDatasetDownloader` can download a dataset from GCS locations.
 74 | ```python
 75 | from datasetinsights.io.downloader import GCSDatasetDownloader
 76 | 
 77 | source_uri=gs://url/to/file.zip # or gs://url/to/folder
 78 | dest = "~/data"
 79 | downloader = GCSDatasetDownloader()
 80 | downloader.download(source_uri=source_uri, output=dest)
 81 | ```
 82 | 
 83 | `HTTPDatasetDownloader` can a dataset from any HTTP(S) url.
 84 | ```python
 85 | from datasetinsights.io.downloader import HTTPDatasetDownloader
 86 | 
 87 | source_uri=http://url.to.file.zip
 88 | dest = "~/data"
 89 | downloader = HTTPDatasetDownloader()
 90 | downloader.download(source_uri=source_uri, output=dest)
 91 | ```
 92 | 
 93 | ### Convert Datasets
 94 | 
 95 | If you are interested in converting the synthetic dataset to COCO format for
 96 | annotations that COCO supports, you can run the `convert` command:
 97 | 
 98 | ```bash
 99 | datasetinsights convert -i <input-directory> -o <output-directory> -f COCO-Instances
100 | ```
101 | or
102 | ```bash
103 | datasetinsights convert -i <input-directory> -o <output-directory> -f COCO-Keypoints
104 | ```
105 | 
106 | You will need to provide 2D bounding box definition ID in the synthetic dataset. We currently only support 2D bounding box and human keypoint annotations for COCO format.
107 | 
108 | ## Docker
109 | 
110 | You can use the pre-build docker image [unitytechnologies/datasetinsights](https://hub.docker.com/r/unitytechnologies/datasetinsights) to interact with datasets.
111 | 
112 | ## Documentation
113 | 
114 | You can find the API documentation on [readthedocs](https://datasetinsights.readthedocs.io/en/latest/).
115 | 
116 | ## Contributing
117 | 
118 | Please let us know if you encounter a bug by filing an issue. To learn more about making a contribution to Dataset Insights, please see our Contribution [page](CONTRIBUTING.md).
119 | 
120 | ## License
121 | 
122 | Dataset Insights is licensed under the Apache License, Version 2.0. See [LICENSE](LICENCE) for the full license text.
123 | 
124 | ## Citation
125 | If you find this package useful, consider citing it using:
126 | ```
127 | @misc{datasetinsights2020,
128 |     title={Unity {D}ataset {I}nsights Package},
129 |     author={{Unity Technologies}},
130 |     howpublished={\url{https://github.com/Unity-Technologies/datasetinsights}},
131 |     year={2020}
132 | }
133 | ```
134 | 


--------------------------------------------------------------------------------
/datasetinsights/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/datasetinsights/__init__.py


--------------------------------------------------------------------------------
/datasetinsights/__main__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | 
 5 | from datasetinsights.commands import Entrypoint
 6 | from datasetinsights.constants import CONTEXT_SETTINGS
 7 | 
 8 | logging.basicConfig(
 9 |     level=logging.INFO,
10 |     format=(
11 |         "%(levelname)s | %(asctime)s | %(name)s | %(threadName)s | "
12 |         "%(message)s"
13 |     ),
14 |     datefmt="%Y-%m-%d %H:%M:%S",
15 | )
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | @click.command(
20 |     cls=Entrypoint,
21 |     help="Dataset Insights.",
22 |     context_settings=CONTEXT_SETTINGS,
23 | )
24 | @click.option(
25 |     "-v",
26 |     "--verbose",
27 |     is_flag=True,
28 |     default=False,
29 |     help="Enables verbose mode.",
30 | )
31 | def entrypoint(verbose):
32 |     if verbose:
33 |         root_logger = logging.getLogger()
34 |         root_logger.setLevel(logging.DEBUG)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     entrypoint()
39 | 


--------------------------------------------------------------------------------
/datasetinsights/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import click
 4 | 
 5 | 
 6 | class Entrypoint(click.MultiCommand):
 7 |     """Click MultiCommand Entrypoint For Datasetinsights CLI"""
 8 | 
 9 |     def list_commands(self, ctx):
10 |         """Dynamically get the list of commands."""
11 |         rv = []
12 |         for filename in os.listdir(os.path.dirname(__file__)):
13 |             if filename.endswith(".py") and not filename.startswith("__init__"):
14 |                 rv.append(filename[:-3])
15 |         rv.sort()
16 | 
17 |         return rv
18 | 
19 |     def get_command(self, ctx, name):
20 |         """Dynamically get the command."""
21 |         ns = {}
22 |         fn = os.path.join(os.path.dirname(__file__), name + ".py")
23 |         if not os.path.exists(fn):
24 |             return None
25 |         with open(fn) as f:
26 |             code = compile(f.read(), fn, "exec")
27 |             eval(code, ns, ns)
28 | 
29 |         return ns["cli"]
30 | 


--------------------------------------------------------------------------------
/datasetinsights/commands/convert.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | 
 5 | import datasetinsights.constants as const
 6 | from datasetinsights.datasets.transformers import get_dataset_transformer
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | @click.command(context_settings=const.CONTEXT_SETTINGS)
12 | @click.option(
13 |     "-i",
14 |     "--input",
15 |     type=click.Path(exists=True, file_okay=False),
16 |     required=True,
17 |     help="Directory of the Synthetic dataset.",
18 | )
19 | @click.option(
20 |     "-o",
21 |     "--output",
22 |     type=click.Path(file_okay=False, writable=True),
23 |     required=True,
24 |     help="Directory of the converted dataset.",
25 | )
26 | @click.option(
27 |     "-f",
28 |     "--format",
29 |     required=True,
30 |     help=(
31 |         "The output dataset format. "
32 |         "Currently only 'COCO-Instances' and 'COCO-Keypoints' is supported."
33 |     ),
34 | )
35 | def cli(input, output, format):
36 |     """Convert dataset from Perception format to target format."""
37 |     ctx = click.get_current_context()
38 |     logger.debug(f"Called convert command with parameters: {ctx.params}")
39 | 
40 |     transformer = get_dataset_transformer(format=format, data_root=input)
41 |     transformer.execute(output=output)
42 | 


--------------------------------------------------------------------------------
/datasetinsights/commands/download.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | 
  4 | import click
  5 | 
  6 | import datasetinsights.constants as const
  7 | from datasetinsights.io.downloader.base import create_dataset_downloader
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | class SourceURI(click.ParamType):
 13 |     """Represents the Source URI Parameter type.
 14 | 
 15 |     This extends click.ParamType that allows click framework to validates
 16 |     supported source URI according to the prefix pattern.
 17 | 
 18 |     Raises:
 19 |         click.BadParameter: if the validation failed.
 20 |     """
 21 | 
 22 |     name = "source_uri"
 23 |     PREFIX_PATTERN = r"^gs://|^http(s)?://|^usim://"
 24 | 
 25 |     def convert(self, value, param, ctx):
 26 |         """Validate source URI and Converts the value."""
 27 |         match = re.search(self.PREFIX_PATTERN, value)
 28 |         if not match:
 29 |             message = (
 30 |                 f"The source uri {value} is not supported. "
 31 |                 f"Pattern: {self.PREFIX_PATTERN}"
 32 |             )
 33 |             self.fail(message, param, ctx)
 34 | 
 35 |         return value
 36 | 
 37 | 
 38 | @click.command(
 39 |     context_settings=const.CONTEXT_SETTINGS,
 40 | )
 41 | @click.option(
 42 |     "-s",
 43 |     "--source-uri",
 44 |     type=SourceURI(),
 45 |     required=True,
 46 |     help=(
 47 |         "URI of where this data should be downloaded. "
 48 |         f"Supported source uri patterns {SourceURI.PREFIX_PATTERN}"
 49 |     ),
 50 | )
 51 | @click.option(
 52 |     "-o",
 53 |     "--output",
 54 |     type=click.Path(exists=True, file_okay=False, writable=True),
 55 |     default=const.DEFAULT_DATA_ROOT,
 56 |     help="Directory on localhost where datasets should be downloaded.",
 57 | )
 58 | @click.option(
 59 |     "-b",
 60 |     "--include-binary",
 61 |     is_flag=True,
 62 |     default=False,
 63 |     help=(
 64 |         "Whether to download binary files such as images or LIDAR point "
 65 |         "clouds. This flag applies to Datasets where metadata "
 66 |         "(e.g. annotation json, dataset catalog, ...) can be separated from "
 67 |         "binary files."
 68 |     ),
 69 | )
 70 | @click.option(
 71 |     "--access-token",
 72 |     type=str,
 73 |     default=None,
 74 |     help="Unity Simulation access token. "
 75 |     "This will override synthetic datasets source-uri for Unity Simulation",
 76 | )
 77 | @click.option(
 78 |     "--checksum-file",
 79 |     type=str,
 80 |     default=None,
 81 |     help="Dataset checksum text file path. "
 82 |     "Path can be a HTTP(S) url or a local file path. This will help check the "
 83 |     "integrity of the downloaded dataset.",
 84 | )
 85 | def cli(
 86 |     source_uri,
 87 |     output,
 88 |     include_binary,
 89 |     access_token,
 90 |     checksum_file,
 91 | ):
 92 |     """Download datasets to localhost from known locations.
 93 | 
 94 |     The download command can support downloading from 3 types of sources
 95 | 
 96 |     1. Download from Unity Simulation:
 97 | 
 98 |     You can specify project_id, run_execution_id, access_token in source-uri:
 99 | 
100 |     \b
101 |     datasetinsights download \\
102 |         --source-uri=usim://<access_token>@<project_id>/<run_execution_id> \\
103 |         --output=$HOME/data
104 | 
105 |     Alternatively, you can also override access_token such as:
106 | 
107 |     \b
108 |     datasetinsights download \\
109 |         --source-uri=usim://<project_id>/<run_execution_id> \\
110 |         --output=$HOME/data \\
111 |         --access-token=<access_token>
112 | 
113 |     2. Downloading from a public http(s) url:
114 | 
115 |     \b
116 |     datasetinsights download \\
117 |         --source-uri=http://url/to/file.zip \\
118 |         --output=$HOME/data
119 | 
120 |     3. Downloading from a GCS url:
121 | 
122 |     \b
123 |     datasetinsights download \\
124 |         --source-uri=gs://url/to/file.zip \\
125 |         --output=$HOME/data
126 | 
127 |     or download all objects under the same directory:
128 | 
129 |     \b
130 |     datasetinsights download \\
131 |         --source-uri=gs://url/to/directory \\
132 |         --output=$HOME/data
133 |     """
134 |     ctx = click.get_current_context()
135 |     logger.debug(f"Called download command with parameters: {ctx.params}")
136 | 
137 |     downloader = create_dataset_downloader(
138 |         source_uri=source_uri, access_token=access_token
139 |     )
140 |     downloader.download(
141 |         source_uri=source_uri,
142 |         output=output,
143 |         include_binary=include_binary,
144 |         checksum_file=checksum_file,
145 |     )
146 | 


--------------------------------------------------------------------------------
/datasetinsights/constants.py:
--------------------------------------------------------------------------------
 1 | DEFAULT_DATA_ROOT = "/data"
 2 | 
 3 | # Default perception schema version
 4 | DEFAULT_PERCEPTION_VERSION = "0.0.1"
 5 | 
 6 | 
 7 | # Default Timing text for codetiming.Timer decorator
 8 | TIMING_TEXT = "[{name}] elapsed time: {:0.4f} seconds."
 9 | 
10 | # Click CLI context settings
11 | CONTEXT_SETTINGS = {
12 |     "help_option_names": ["-h", "--help"],
13 |     "show_default": True,
14 |     "ignore_unknown_options": True,
15 |     "allow_extra_args": True,
16 | }
17 | 


--------------------------------------------------------------------------------
/datasetinsights/dashboard.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | 
  5 | import dash_core_components as dcc
  6 | import dash_html_components as html
  7 | from dash.dependencies import Input, Output
  8 | 
  9 | import datasetinsights.stats.visualization.overview as overview
 10 | from datasetinsights.stats.visualization.app import get_app
 11 | from datasetinsights.stats.visualization.object_detection import (
 12 |     render_object_detection_layout,
 13 | )
 14 | 
 15 | app = get_app()
 16 | 
 17 | 
 18 | def main_layout():
 19 |     """Method for generating main app layout.
 20 | 
 21 |     Returns:
 22 |         html layout: main layout design with tabs for overview statistics
 23 |             and object detection.
 24 |     """
 25 |     app_layout = html.Div(
 26 |         [
 27 |             html.H1(
 28 |                 children="Dataset Insights",
 29 |                 style={
 30 |                     "textAlign": "center",
 31 |                     "padding": 20,
 32 |                     "background": "lightgrey",
 33 |                 },
 34 |             ),
 35 |             html.Div(
 36 |                 [
 37 |                     dcc.Tabs(
 38 |                         id="page_tabs",
 39 |                         value="dataset_overview",
 40 |                         children=[
 41 |                             dcc.Tab(
 42 |                                 label="Overview",
 43 |                                 value="dataset_overview",
 44 |                             ),
 45 |                             dcc.Tab(
 46 |                                 label="Object Detection",
 47 |                                 value="object_detection",
 48 |                             ),
 49 |                         ],
 50 |                     ),
 51 |                     html.Div(id="main_page_tabs"),
 52 |                 ]
 53 |             ),
 54 |             # Sharing data between callbacks using hidden division.
 55 |             # These hidden dcc and html components are for storing data-root
 56 |             # into the division. This is further used in callbacks made in the
 57 |             # object_detection module. This is a temporary hack and can be found
 58 |             # in example 1 of sharing data between callback dash tutorial.
 59 |             # ref: https://dash.plotly.com/sharing-data-between-callbacks
 60 |             # TODO: Fix this using a better solution to share data.
 61 |             dcc.Dropdown(id="dropdown", style={"display": "none"}),
 62 |             html.Div(id="data_root_value", style={"display": "none"}),
 63 |         ]
 64 |     )
 65 |     return app_layout
 66 | 
 67 | 
 68 | @app.callback(
 69 |     Output("data_root_value", "children"), [Input("dropdown", "value")]
 70 | )
 71 | def store_data_root(value):
 72 |     """Method for storing data-root value in a hidden division.
 73 | 
 74 |     Returns:
 75 |         json : data-root encoded in json to be stored in data_root_value div.
 76 |     """
 77 |     json_data_root = json.dumps(data_root)
 78 | 
 79 |     return json_data_root
 80 | 
 81 | 
 82 | @app.callback(
 83 |     Output("main_page_tabs", "children"),
 84 |     [Input("page_tabs", "value"), Input("data_root_value", "children")],
 85 | )
 86 | def render_content(value, json_data_root):
 87 |     """Method for rendering dashboard layout based
 88 |         on the selected tab value.
 89 | 
 90 |     Args:
 91 |         value(str): selected tab value
 92 |         json_data_root: data root stored in hidden div in json format.
 93 | 
 94 |     Returns:
 95 |         html layout: layout for the selected tab.
 96 |     """
 97 |     # read data root value from the data_root_value division
 98 |     data_root = json.loads(json_data_root)
 99 |     if value == "dataset_overview":
100 |         return overview.html_overview(data_root)
101 |     elif value == "object_detection":
102 |         return render_object_detection_layout(data_root)
103 | 
104 | 
105 | def check_path(path):
106 |     """Method for checking if the given data-root path is valid or not."""
107 |     if os.path.isdir(path):
108 |         return path
109 |     else:
110 |         raise ValueError(f"Path {path} not found")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     parser = argparse.ArgumentParser()
115 |     parser.add_argument("--data-root", help="Path to the data root")
116 |     args = parser.parse_args()
117 |     data_root = check_path(args.data_root)
118 |     app.layout = main_layout()
119 |     app.run_server(debug=True)
120 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/datasetinsights/datasets/__init__.py


--------------------------------------------------------------------------------
/datasetinsights/datasets/exceptions.py:
--------------------------------------------------------------------------------
1 | class DatasetNotFoundError(Exception):
2 |     """Raise when a dataset file can't be found."""
3 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/synthetic.py:
--------------------------------------------------------------------------------
 1 | """ Simulation Dataset Catalog
 2 | """
 3 | 
 4 | 
 5 | import logging
 6 | 
 7 | from pyquaternion import Quaternion
 8 | 
 9 | from datasetinsights.io.bbox import BBox2D, BBox3D
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def read_bounding_box_3d(annotation, label_mappings=None):
15 |     """Convert dictionary representations of 3d bounding boxes into objects
16 |     of the BBox3d class
17 | 
18 |     Args:
19 |         annotation (List[dict]): 3D bounding box annotation
20 |         label_mappings (dict): a dict of {label_id: label_name} mapping
21 | 
22 |     Returns:
23 |         A list of 3d bounding box objects
24 |     """
25 | 
26 |     bboxes = []
27 | 
28 |     for b in annotation:
29 |         label_id = b["label_id"]
30 |         translation = (
31 |             b["translation"]["x"],
32 |             b["translation"]["y"],
33 |             b["translation"]["z"],
34 |         )
35 |         size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
36 |         rotation = b["rotation"]
37 |         rotation = Quaternion(
38 |             x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
39 |         )
40 | 
41 |         if label_mappings and label_id not in label_mappings:
42 |             continue
43 |         box = BBox3D(
44 |             translation=translation,
45 |             size=size,
46 |             label=label_id,
47 |             sample_token=0,
48 |             score=1,
49 |             rotation=rotation,
50 |         )
51 |         bboxes.append(box)
52 | 
53 |     return bboxes
54 | 
55 | 
56 | def read_bounding_box_2d(annotation, label_mappings=None):
57 |     """Convert dictionary representations of 2d bounding boxes into objects
58 |     of the BBox2D class
59 | 
60 |     Args:
61 |         annotation (List[dict]): 2D bounding box annotation
62 |         label_mappings (dict): a dict of {label_id: label_name} mapping
63 | 
64 |     Returns:
65 |         A list of 2D bounding box objects
66 |     """
67 |     bboxes = []
68 |     for b in annotation:
69 |         label_id = b["label_id"]
70 |         x = b["x"]
71 |         y = b["y"]
72 |         w = b["width"]
73 |         h = b["height"]
74 |         if label_mappings and label_id not in label_mappings:
75 |             continue
76 |         box = BBox2D(label=label_id, x=x, y=y, w=w, h=h)
77 |         bboxes.append(box)
78 | 
79 |     return bboxes
80 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import get_dataset_transformer
2 | from .coco import COCOInstancesTransformer, COCOKeypointsTransformer
3 | 
4 | __all__ = [
5 |     "COCOInstancesTransformer",
6 |     "COCOKeypointsTransformer",
7 |     "get_dataset_transformer",
8 | ]
9 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/transformers/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | def get_dataset_transformer(format, **kwargs):
 5 |     """
 6 |     Returns instantiated transformer object based on the provided conversion
 7 |     format from a registry.
 8 | 
 9 |     Args:
10 |         format (str): Conversion format to be used for dataset transformation.
11 | 
12 |     Returns: Transformer object instance.
13 | 
14 |     """
15 |     if format in DatasetTransformer.REGISTRY.keys():
16 |         transformer = DatasetTransformer.REGISTRY[format]
17 |     else:
18 |         raise ValueError(
19 |             f"Transformer not found for conversion format '{format}'"
20 |         )
21 | 
22 |     return transformer(**kwargs)
23 | 
24 | 
25 | class DatasetTransformer(ABC):
26 |     """Base class for all dataset transformer."""
27 | 
28 |     REGISTRY = {}
29 | 
30 |     @classmethod
31 |     def __init_subclass__(cls, format=None, **kwargs):
32 |         if format:
33 |             cls.REGISTRY[format] = cls
34 |         else:
35 |             raise NotImplementedError(
36 |                 f"Subclass needs to have class keyword argument named "
37 |                 f"transformer."
38 |             )
39 |         super().__init_subclass__(**kwargs)
40 | 
41 |     @abstractmethod
42 |     def execute(self, output, **kwargs):
43 |         raise NotImplementedError("Subclass needs to implement this method")
44 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/unity_perception/__init__.py:
--------------------------------------------------------------------------------
 1 | from .captures import Captures
 2 | from .metrics import Metrics
 3 | from .references import AnnotationDefinitions, Egos, MetricDefinitions, Sensors
 4 | 
 5 | __all__ = [
 6 |     "AnnotationDefinitions",
 7 |     "Captures",
 8 |     "Egos",
 9 |     "Metrics",
10 |     "MetricDefinitions",
11 |     "Sensors",
12 | ]
13 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/unity_perception/exceptions.py:
--------------------------------------------------------------------------------
1 | class DefinitionIDError(Exception):
2 |     """Raise when a given definition id can't be found."""
3 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/unity_perception/metrics.py:
--------------------------------------------------------------------------------
  1 | """Load Synthetic dataset Metrics
  2 | """
  3 | import json
  4 | 
  5 | import dask.bag as db
  6 | 
  7 | from datasetinsights.constants import DEFAULT_DATA_ROOT
  8 | 
  9 | from .exceptions import DefinitionIDError
 10 | from .tables import DATASET_TABLES, SCHEMA_VERSION, glob
 11 | from .validation import verify_version
 12 | 
 13 | 
 14 | class Metrics:
 15 |     """Load metrics table
 16 | 
 17 |     Metrics store extra metadata that can be used to describe a particular
 18 |     sequence, capture or annotation. Metric records are stored as arbitrary
 19 |     number (M) of key-value pairs.
 20 |     For more detail, see schema design doc:
 21 |     :ref:`metrics`
 22 | 
 23 |     Attributes:
 24 |         metrics (dask.bag.core.Bag): a collection of metrics records
 25 |     Examples:
 26 |         >>> metrics = Metrics(data_root="/data")
 27 |         >>> metrics_df = metrics.filter_metrics(def_id="my_definition_id")
 28 |         #metrics_df now contains all the metrics data corresponding to
 29 |         "my_definition_id"
 30 | 
 31 |         One example of metrics_df (first row shown below):
 32 | 
 33 |         +---------------+------------------+---------------------+
 34 |         | label_id(int) | instance_id(int) | visible_pixels(int) |
 35 |         +===============+==================+=====================+
 36 |         | 2             | 2                | 2231                |
 37 |         +---------------+------------------+---------------------+
 38 | 
 39 |     """
 40 | 
 41 |     TABLE_NAME = "metrics"
 42 |     FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
 43 | 
 44 |     def __init__(self, data_root=DEFAULT_DATA_ROOT, version=SCHEMA_VERSION):
 45 |         """Initialize Metrics
 46 | 
 47 |         Args:
 48 |             data_root (str): the root directory of the dataset containing
 49 |             metrics
 50 |             version (str): desired schema version
 51 |         """
 52 |         self.metrics = self._load_metrics(data_root, version)
 53 | 
 54 |     def _load_metrics(self, data_root, version):
 55 |         """
 56 |         `:ref:`metrics`
 57 | 
 58 | 
 59 |         Args:
 60 |             data_root: (str): the root directory of the dataset containing
 61 |             metrics
 62 |             version (str): desired schema version
 63 | 
 64 |         Returns:
 65 |             dask.bag.core.Bag
 66 |         """
 67 |         metrics_files = db.from_sequence(glob(data_root, self.FILE_PATTERN))
 68 |         metrics = metrics_files.map(
 69 |             lambda path: Metrics._load_json(path, self.TABLE_NAME, version)
 70 |         ).flatten()
 71 | 
 72 |         return metrics
 73 | 
 74 |     @staticmethod
 75 |     def _normalize_values(metric):
 76 |         """Filter unnecessary info from metric.
 77 |         1-level faltten of metrics.values column.
 78 |         """
 79 |         values = metric["values"]
 80 |         for value in values:
 81 |             value["capture_id"] = metric["capture_id"]
 82 |             value["annotation_id"] = metric["annotation_id"]
 83 |             value["sequence_id"] = metric["sequence_id"]
 84 |             value["step"] = metric["step"]
 85 | 
 86 |         return values
 87 | 
 88 |     def filter_metrics(self, def_id):
 89 |         """Get all metrics filtered by a given metric definition id
 90 | 
 91 |         Args:
 92 |             def_id (str): metric definition id used to filter results
 93 |         Raises:
 94 |             DefinitionIDError: raised if no metrics records match the given
 95 |             def_id
 96 |         Returns (pd.DataFrame):
 97 |         Columns: "label_id", "capture_id", "annotation_id", "sequence_id",
 98 |         "step"
 99 |         """
100 |         metrics = (
101 |             self.metrics.filter(
102 |                 lambda metric: metric["metric_definition"] == def_id
103 |             )
104 |             .map(Metrics._normalize_values)
105 |             .flatten()
106 |         )
107 |         if metrics.count().compute() == 0:
108 |             msg = (
109 |                 f"Can't find metrics records associated with the given "
110 |                 f"definition id {def_id}."
111 |             )
112 |             raise DefinitionIDError(msg)
113 | 
114 |         return metrics.to_dataframe().compute()
115 | 
116 |     @staticmethod
117 |     def _load_json(filename, table_name, version):
118 |         """Load records from json files into a dict"""
119 |         with open(filename, "r", encoding="utf8") as file:
120 |             data = json.load(file)
121 |         verify_version(data, version)
122 | 
123 |         return data[table_name]
124 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/unity_perception/tables.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import pathlib
 4 | from collections import namedtuple
 5 | from enum import Enum
 6 | 
 7 | import pandas as pd
 8 | 
 9 | from .validation import verify_version
10 | 
11 | logger = logging.getLogger(__name__)
12 | SCHEMA_VERSION = "0.0.1"  # Synthetic dataset schema version
13 | 
14 | 
15 | class FileType(Enum):
16 |     BINARY = "binary"
17 |     REFERENCE = "reference"
18 |     METRIC = "metric"
19 |     CAPTURE = "capture"
20 | 
21 | 
22 | Table = namedtuple("Table", "file pattern filetype")
23 | DATASET_TABLES = {
24 |     "annotation_definitions": Table(
25 |         "**/annotation_definitions.json",
26 |         r"(?:\w|-|/)*annotation_definitions.json",
27 |         FileType.REFERENCE,
28 |     ),
29 |     "captures": Table(
30 |         "**/captures_*.json",
31 |         r"(?:\w|-|/)*captures_[0-9]+.json",
32 |         FileType.CAPTURE,
33 |     ),
34 |     "egos": Table("**/egos.json", r"(?:\w|-|/)*egos.json", FileType.REFERENCE),
35 |     "metric_definitions": Table(
36 |         "**/metric_definitions.json",
37 |         r"(?:\w|-|/)*metric_definitions.json",
38 |         FileType.REFERENCE,
39 |     ),
40 |     "metrics": Table(
41 |         "**/metrics_*.json", r"(?:\w|-|/)*metrics_[0-9]+.json", FileType.METRIC
42 |     ),
43 |     "sensors": Table(
44 |         "**/sensors.json", r"(?:\w|-|/)*sensors.json", FileType.REFERENCE
45 |     ),
46 | }
47 | 
48 | 
49 | def glob(data_root, pattern):
50 |     """Find all matching files in a directory.
51 | 
52 |     Args:
53 |         data_root (str): directory containing capture files
54 |         pattern (str): Unix file pattern
55 | 
56 |     Yields:
57 |         str: matched filenames in a directory
58 |     """
59 |     path = pathlib.Path(data_root)
60 |     for fp in path.glob(pattern):
61 |         yield fp
62 | 
63 | 
64 | def load_table(json_file, table_name, version, **kwargs):
65 |     """Load records from json files into a pandas table
66 | 
67 |     Args:
68 |         json_file (str): filename to json.
69 |         table_name (str): table name in the json file to be loaded
70 |         version (str): requested version of this table
71 |         **kwargs: arbitrary keyword arguments to be passed to pandas'
72 |             json_normalize method.
73 | 
74 |     Returns:
75 |         a pandas dataframe of the loaded table.
76 | 
77 |     Raises:
78 |         VersionError: If the version in json file does not match the requested
79 |         version.
80 |     """
81 |     logger.debug(f"Loading table {table_name} from {json_file}")
82 |     data = json.load(open(json_file, "r", encoding="utf8"))
83 |     verify_version(data, version)
84 |     table = pd.json_normalize(data[table_name], **kwargs)
85 | 
86 |     return table
87 | 


--------------------------------------------------------------------------------
/datasetinsights/datasets/unity_perception/validation.py:
--------------------------------------------------------------------------------
 1 | """ Validate Simulation Data
 2 | """
 3 | 
 4 | 
 5 | class VersionError(Exception):
 6 |     """Raise when the data file version does not match"""
 7 | 
 8 |     pass
 9 | 
10 | 
11 | class DuplicateRecordError(Exception):
12 |     """Raise when the definition file has duplicate definition id"""
13 | 
14 |     pass
15 | 
16 | 
17 | class NoRecordError(Exception):
18 |     """Raise when no record is found matching a given definition id"""
19 | 
20 |     pass
21 | 
22 | 
23 | def verify_version(json_data, version):
24 |     """Verify json schema version
25 | 
26 |     Args:
27 |         json_data (json): a json object loaded from file.
28 |         version (str): string of the requested version.
29 | 
30 |     Raises:
31 |         VersionError: If the version in json file does not match the requested
32 |     version.
33 |     """
34 |     loaded = json_data["version"]
35 |     if loaded != version:
36 |         raise VersionError(f"Version mismatch. Expected version: {version}")
37 | 
38 | 
39 | def check_duplicate_records(table, column, table_name):
40 |     """Check if table has duplicate records for a given column
41 | 
42 |     Args:
43 |         table (pd.DataFrame): a pandas dataframe
44 |         column (str): the column where no duplication is allowed
45 |         table_name (str): table name
46 | 
47 |     Raises:
48 |         DuplicateRecordError: If duplicate records are found in a column
49 |     """
50 |     if table[column].nunique() != len(table):
51 |         raise DuplicateRecordError(
52 |             f"Duplicate record was found in {column} of table {table_name}. "
53 |             f"This column is expected to be unique. Violating this requirement "
54 |             f"might cause ambiguity when the records are loaded."
55 |         )
56 | 


--------------------------------------------------------------------------------
/datasetinsights/io/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox import BBox2D
2 | from .downloader import create_dataset_downloader
3 | 
4 | __all__ = [
5 |     "BBox2D",
6 |     "create_dataset_downloader",
7 | ]
8 | 


--------------------------------------------------------------------------------
/datasetinsights/io/download.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import logging
  3 | import os
  4 | import re
  5 | import tempfile
  6 | import zlib
  7 | from pathlib import Path
  8 | 
  9 | import requests
 10 | from requests.adapters import HTTPAdapter
 11 | from requests.packages.urllib3.util.retry import Retry
 12 | 
 13 | from .exceptions import ChecksumError, DownloadError
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | # Timeout of requests (in seconds)
 18 | DEFAULT_TIMEOUT = 1800
 19 | # Retry after failed request
 20 | DEFAULT_MAX_RETRIES = 5
 21 | 
 22 | 
 23 | class TimeoutHTTPAdapter(HTTPAdapter):
 24 |     def __init__(self, timeout, *args, **kwargs):
 25 |         self.timeout = timeout
 26 |         super().__init__(*args, **kwargs)
 27 | 
 28 |     def send(self, request, **kwargs):
 29 |         kwargs["timeout"] = self.timeout
 30 |         return super().send(request, **kwargs)
 31 | 
 32 | 
 33 | def download_file(source_uri: str, dest_path: str, file_name: str = None):
 34 |     """Download a file specified from a source uri
 35 | 
 36 |     Args:
 37 |         source_uri (str): source url where the file should be downloaded
 38 |         dest_path (str): destination path of the file
 39 |         file_name (str): file name of the file to be downloaded
 40 | 
 41 |     Returns:
 42 |         String of destination path.
 43 |     """
 44 |     logger.debug(f"Trying to download file from {source_uri} -> {dest_path}")
 45 |     adapter = TimeoutHTTPAdapter(
 46 |         timeout=DEFAULT_TIMEOUT, max_retries=Retry(total=DEFAULT_MAX_RETRIES)
 47 |     )
 48 |     with requests.Session() as http:
 49 |         http.mount("https://", adapter)
 50 |         try:
 51 |             response = http.get(source_uri)
 52 |             response.raise_for_status()
 53 |         except requests.exceptions.RequestException as ex:
 54 |             logger.error(ex)
 55 |             err_msg = (
 56 |                 f"The request download from {source_uri} -> {dest_path} can't "
 57 |                 f"be completed."
 58 |             )
 59 | 
 60 |             raise DownloadError(err_msg)
 61 |         else:
 62 |             dest_path = Path(dest_path)
 63 |             if not file_name:
 64 |                 file_name = _parse_filename(response, source_uri)
 65 |             dest_path = dest_path / file_name
 66 |             dest_path.parent.mkdir(parents=True, exist_ok=True)
 67 |             with open(dest_path, "wb") as f:
 68 |                 f.write(response.content)
 69 | 
 70 |     return dest_path
 71 | 
 72 | 
 73 | def checksum_matches(filepath, expected_checksum, algorithm="CRC32"):
 74 |     """Check if the checksum matches
 75 | 
 76 |     Args:
 77 |         filepath (str): the doaloaded file path
 78 |         expected_checksum (int): expected checksum of the file
 79 |         algorithm (str): checksum algorithm. Defaults to CRC32
 80 | 
 81 |     Returns:
 82 |         True if the file checksum matches.
 83 |     """
 84 |     computed = compute_checksum(filepath, algorithm)
 85 |     return computed == expected_checksum
 86 | 
 87 | 
 88 | def validate_checksum(filepath, expected_checksum, algorithm="CRC32"):
 89 |     """Validate checksum of the downloaded file.
 90 | 
 91 |     Args:
 92 |         filepath (str): the doaloaded file path
 93 |         expected_checksum (int): expected checksum of the file
 94 |         algorithm (str): checksum algorithm. Defaults to CRC32
 95 | 
 96 |     Raises:
 97 |         ChecksumError if the file checksum does not match.
 98 |     """
 99 |     if not checksum_matches(filepath, expected_checksum, algorithm):
100 |         raise ChecksumError
101 | 
102 | 
103 | def compute_checksum(filepath, algorithm="CRC32"):
104 |     """Compute the checksum of a file.
105 | 
106 |     Args:
107 |         filepath (str): the doaloaded file path
108 |         algorithm (str): checksum algorithm. Defaults to CRC32
109 | 
110 |     Returns:
111 |         int: the checksum value
112 |     """
113 |     if algorithm == "CRC32":
114 |         chs = _crc32_checksum(filepath)
115 |     elif algorithm == "MD5":
116 |         chs = _md5_checksum(filepath)
117 |     else:
118 |         raise ValueError("Unsupported checksum algorithm!")
119 | 
120 |     return chs
121 | 
122 | 
123 | def _crc32_checksum(filepath):
124 |     """Calculate the checksum of a file using CRC32."""
125 |     with open(filepath, "rb") as f:
126 |         checksum = zlib.crc32(f.read())
127 | 
128 |     return checksum
129 | 
130 | 
131 | def _md5_checksum(filename):
132 |     """Calculate the checksum of a file using MD5."""
133 |     md5 = hashlib.md5()
134 |     with open(filename, "rb") as f:
135 |         for chunk in iter(lambda: f.read(4096), b""):
136 |             md5.update(chunk)
137 |     return md5.hexdigest()
138 | 
139 | 
140 | def get_checksum_from_file(filepath):
141 |     """This method return checksum of the file whose filepath is given.
142 | 
143 |     Args:
144 |         filepath (str): Path of the checksum file.
145 |                         Path can be HTTP(s) url or local path.
146 | 
147 |     Raises:
148 |         ValueError: Raises this error if filepath is not local or not
149 |                     HTTP or HTTPS url.
150 | 
151 |     """
152 | 
153 |     if filepath.startswith(("http://", "https://")):
154 |         with tempfile.TemporaryDirectory() as tmp:
155 |             checksum_file_path = os.path.join(tmp, "checksum.txt")
156 |             file_path = download_file(
157 |                 source_uri=filepath, dest_path=checksum_file_path
158 |             )
159 |             return _read_checksum_from_txt(file_path)
160 | 
161 |     elif os.path.isfile(filepath):
162 |         return _read_checksum_from_txt(filepath)
163 | 
164 |     else:
165 |         raise ValueError(f"Can not get checksum from path: {filepath}")
166 | 
167 | 
168 | def _read_checksum_from_txt(filepath):
169 |     """This method reads checksum from a txt file and returns it.
170 | 
171 |     Args:
172 |         filepath (str): Local filepath of the checksum file.
173 | 
174 |     Returns:
175 |         str: checksum value from the checksum file.
176 | 
177 |     """
178 |     with open(filepath) as file:
179 |         checksum = file.read()
180 |     return checksum
181 | 
182 | 
183 | def _parse_filename(response, uri):
184 |     file_name = _get_filename_from_response(response)
185 |     if file_name is None:
186 |         file_name = _get_file_name_from_uri(uri)
187 |     return file_name
188 | 
189 | 
190 | def _get_filename_from_response(response):
191 |     """Gets filename from requests response object
192 | 
193 |     Args:
194 |         response: requests.Response() object that contains the server's
195 |         response to the HTTP request.
196 | 
197 |     Returns:
198 |         filename (str): Name of the file to be downloaded
199 |     """
200 |     cd = response.headers.get("content-disposition")
201 |     if not cd:
202 |         return None
203 |     file_name = re.findall("filename=(.+)", cd)
204 |     if len(file_name) == 0:
205 |         return None
206 |     return file_name[0]
207 | 
208 | 
209 | def _get_file_name_from_uri(uri):
210 |     """Gets filename from URI
211 | 
212 |     Args:
213 |         uri (str): URI
214 | 
215 |     """
216 |     return uri.split("/")[-1]
217 | 


--------------------------------------------------------------------------------
/datasetinsights/io/downloader/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import create_dataset_downloader
 2 | from .gcs_downloader import GCSDatasetDownloader
 3 | from .http_downloader import HTTPDatasetDownloader
 4 | 
 5 | __all__ = [
 6 |     "HTTPDatasetDownloader",
 7 |     "create_dataset_downloader",
 8 |     "GCSDatasetDownloader",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/datasetinsights/io/downloader/base.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | _registry = {}
 5 | 
 6 | 
 7 | def _find_downloader(source_uri):
 8 |     """
 9 |     This function returns the correct DatasetDownloader
10 |     from a registry based on the source-uri provided
11 | 
12 |        Args:
13 |            source_uri: URI of where this data should be downloaded.
14 | 
15 |        Returns: The dataset downloader class that is registered with the
16 |                 source-uri protocol.
17 | 
18 |     """
19 |     protocols = "|".join(_registry.keys())
20 |     pattern = re.compile(f"({protocols})")
21 | 
22 |     protocol = pattern.findall(source_uri)
23 | 
24 |     if source_uri.startswith(("https://", "http://")):
25 |         protocol = "http://"
26 |     elif protocol:
27 |         protocol = protocol[0]
28 |     else:
29 |         raise ValueError(f"Downloader not found for source-uri '{source_uri}'")
30 | 
31 |     return _registry.get(protocol)
32 | 
33 | 
34 | def create_dataset_downloader(source_uri, **kwargs):
35 |     """
36 |     This function instantiates the dataset downloader
37 |      after finding it with the source-uri provided
38 | 
39 |     Args:
40 |         source_uri: URI used to look up the correct dataset downloader
41 |         **kwargs:
42 | 
43 |     Returns: The dataset downloader instance matching the source-uri.
44 | 
45 |     """
46 |     downloader_class = _find_downloader(source_uri=source_uri)
47 |     return downloader_class(**kwargs)
48 | 
49 | 
50 | class DatasetDownloader(ABC):
51 |     """This is the base class for all dataset downloaders
52 |     The DatasetDownloader can be subclasses in the following way
53 | 
54 |     class NewDatasetDownloader(DatasetDownloader, protocol="protocol://")
55 | 
56 |     Here the 'protocol://' should match the prefix that the method download
57 |     source_uri supports. Example http:// gs://
58 | 
59 |     """
60 | 
61 |     def __init__(self, **kwargs):
62 |         pass
63 | 
64 |     @classmethod
65 |     def __init_subclass__(cls, protocol=None, **kwargs):
66 |         if protocol:
67 |             _registry[protocol] = cls
68 |         else:
69 |             raise NotImplementedError(
70 |                 f"Subclass needs to have class keyword argument named protocol."
71 |             )
72 |         super().__init_subclass__(**kwargs)
73 | 
74 |     @abstractmethod
75 |     def download(self, source_uri, output, **kwargs):
76 |         """This method downloads a dataset stored at the source_uri and stores it
77 |         in the output directory
78 | 
79 |         Args:
80 |             source_uri: URI that points to the dataset that should be downloaded
81 |             output: path to local folder where the dataset should be stored
82 |         """
83 |         raise NotImplementedError("Subclass needs to implement this method")
84 | 


--------------------------------------------------------------------------------
/datasetinsights/io/downloader/gcs_downloader.py:
--------------------------------------------------------------------------------
 1 | from datasetinsights.io.downloader.base import DatasetDownloader
 2 | from datasetinsights.io.gcs import GCSClient
 3 | 
 4 | 
 5 | class GCSDatasetDownloader(DatasetDownloader, protocol="gs://"):
 6 |     """This class is used to download data from GCS"""
 7 | 
 8 |     def __init__(self, **kwargs):
 9 |         """initiating GCSDownloader"""
10 |         self.client = GCSClient()
11 | 
12 |     def download(self, source_uri=None, output=None, **kwargs):
13 |         """
14 | 
15 |         Args:
16 |             source_uri: This is the downloader-uri that indicates where on
17 |                 GCS the dataset should be downloaded from.
18 |                 The expected source-uri follows these patterns
19 |                 gs://bucket/folder or gs://bucket/folder/data.zip
20 | 
21 |             output: This is the path to the directory
22 |                 where the download will store the dataset.
23 |         """
24 |         self.client.download(local_path=output, url=source_uri)
25 | 


--------------------------------------------------------------------------------
/datasetinsights/io/downloader/http_downloader.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from datasetinsights.io.download import (
 5 |     download_file,
 6 |     get_checksum_from_file,
 7 |     validate_checksum,
 8 | )
 9 | from datasetinsights.io.downloader.base import DatasetDownloader
10 | from datasetinsights.io.exceptions import ChecksumError
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | class HTTPDatasetDownloader(DatasetDownloader, protocol="http://"):
16 |     """This class is used to download data from any HTTP or HTTPS public url
17 |     and perform function such as downloading the dataset and checksum
18 |     validation if checksum file path is provided.
19 |     """
20 | 
21 |     def download(self, source_uri, output, checksum_file=None, **kwargs):
22 |         """This method is used to download the dataset from HTTP or HTTPS url.
23 | 
24 |         Args:
25 |             source_uri (str): This is the downloader-uri that indicates where
26 |                               the dataset should be downloaded from.
27 | 
28 |             output (str): This is the path to the directory where the download
29 |                           will store the dataset.
30 | 
31 |             checksum_file (str): This is path of the txt file that contains
32 |                                  checksum of the dataset to be downloaded. It
33 |                                  can be HTTP or HTTPS url or local path.
34 | 
35 |         Raises:
36 |             ChecksumError: This will raise this error if checksum doesn't
37 |                            matches
38 | 
39 |         """
40 |         dataset_path = download_file(source_uri, output)
41 | 
42 |         if checksum_file:
43 |             logger.debug("Reading checksum from checksum file.")
44 |             checksum = get_checksum_from_file(checksum_file)
45 |             try:
46 |                 logger.debug("Validating checksum!!")
47 |                 validate_checksum(dataset_path, int(checksum))
48 |             except ChecksumError as e:
49 |                 logger.info("Checksum mismatch. Deleting the downloaded file.")
50 |                 os.remove(dataset_path)
51 |                 raise e
52 | 


--------------------------------------------------------------------------------
/datasetinsights/io/exceptions.py:
--------------------------------------------------------------------------------
 1 | class DownloadError(Exception):
 2 |     """Raise when download file failed."""
 3 | 
 4 | 
 5 | class ChecksumError(Exception):
 6 |     """Raises when the downloaded file checksum is not correct."""
 7 | 
 8 | 
 9 | class InvalidTrackerError(Exception):
10 |     """Raises when unknown tracker requested ."""
11 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/__init__.py:
--------------------------------------------------------------------------------
 1 | from datasetinsights.stats.keypoints_stats import (
 2 |     get_average_skeleton,
 3 |     get_scale_keypoints,
 4 |     get_visible_keypoints_dict,
 5 | )
 6 | from datasetinsights.stats.object_detection_stats import (
 7 |     convert_coco_annotations_to_df,
 8 |     get_bbox_heatmap,
 9 |     get_bbox_per_img_dict,
10 |     get_bbox_relative_size_list,
11 | )
12 | 
13 | from .statistics import RenderedObjectInfo
14 | from .visualization.plots import (
15 |     bar_plot,
16 |     grid_plot,
17 |     histogram_plot,
18 |     model_performance_box_plot,
19 |     model_performance_comparison_box_plot,
20 |     plot_bboxes,
21 |     plot_keypoints,
22 |     rotation_plot,
23 | )
24 | 
25 | __all__ = [
26 |     "bar_plot",
27 |     "grid_plot",
28 |     "histogram_plot",
29 |     "plot_bboxes",
30 |     "model_performance_box_plot",
31 |     "model_performance_comparison_box_plot",
32 |     "rotation_plot",
33 |     "RenderedObjectInfo",
34 |     "plot_keypoints",
35 |     "convert_coco_annotations_to_df",
36 |     "get_bbox_heatmap",
37 |     "get_bbox_per_img_dict",
38 |     "get_bbox_relative_size_list",
39 |     "get_average_skeleton",
40 |     "get_scale_keypoints",
41 |     "get_visible_keypoints_dict",
42 | ]
43 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/constants.py:
--------------------------------------------------------------------------------
 1 | # Human pose skeleton
 2 | COCO_SKELETON = [
 3 |     [16, 14],
 4 |     [14, 12],
 5 |     [17, 15],
 6 |     [15, 13],
 7 |     [12, 13],
 8 |     [6, 12],
 9 |     [7, 13],
10 |     [6, 7],
11 |     [6, 8],
12 |     [7, 9],
13 |     [8, 10],
14 |     [9, 11],
15 |     [2, 3],
16 |     [1, 2],
17 |     [1, 3],
18 |     [2, 4],
19 |     [3, 5],
20 |     [4, 6],
21 |     [5, 7],
22 | ]
23 | 
24 | # Human pose keypoints
25 | COCO_KEYPOINTS = [
26 |     "nose",
27 |     "left_eye",
28 |     "right_eye",
29 |     "left_ear",
30 |     "right_ear",
31 |     "left_shoulder",
32 |     "right_shoulder",
33 |     "left_elbow",
34 |     "right_elbow",
35 |     "left_wrist",
36 |     "right_wrist",
37 |     "left_hip",
38 |     "right_hip",
39 |     "left_knee",
40 |     "right_knee",
41 |     "left_ankle",
42 |     "right_ankle",
43 | ]
44 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/image_analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .laplacian import (
 2 |     get_bbox_fg_bg_var_laplacian,
 3 |     get_bbox_var_laplacian,
 4 |     get_final_mask,
 5 |     get_seg_fg_bg_var_laplacian,
 6 |     laplacian_img,
 7 | )
 8 | from .spectral_analysis import get_average_psd_1d, get_psd1d, get_psd2d
 9 | from .wavelet import get_wt_coeffs_var
10 | 
11 | __all__ = [
12 |     "get_bbox_var_laplacian",
13 |     "get_bbox_fg_bg_var_laplacian",
14 |     "laplacian_img",
15 |     "get_seg_fg_bg_var_laplacian",
16 |     "get_final_mask",
17 |     "get_average_psd_1d",
18 |     "get_psd1d",
19 |     "get_psd2d",
20 |     "get_wt_coeffs_var",
21 | ]
22 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/image_analysis/laplacian.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Tuple
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | 
  7 | def laplacian_img(img_path: str) -> np.ndarray:
  8 |     """
  9 |     Converts image to grayscale, computes laplacian and returns it.
 10 |     Args:
 11 |         img_path (str): Path of image
 12 | 
 13 |     Returns:
 14 |         np.ndarray: numpy array of Laplacian of the image
 15 |     """
 16 |     image = cv2.imread(img_path)
 17 |     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 18 |     laplacian = cv2.Laplacian(gray, cv2.CV_64F)
 19 |     laplacian = laplacian.astype("float")
 20 |     return laplacian
 21 | 
 22 | 
 23 | def get_bbox_var_laplacian(
 24 |     laplacian: np.ndarray, x: int, y: int, w: int, h: int
 25 | ) -> np.ndarray:
 26 |     """
 27 |     Calculates bbox's variance of Laplacian
 28 |     Args:
 29 |         laplacian (np.ndarray): Laplacian of the image
 30 |         x (int): the upper-left coordinate of the bounding box
 31 |         y (int): the upper-left coordinate of the bounding box
 32 |         w (int): width of bbox
 33 |         h (int): height of bbox
 34 | 
 35 |     Returns:
 36 |         Variance of Laplacian of bbox
 37 |     """
 38 |     bbox_var = laplacian[y : y + h, x : x + w]
 39 |     return np.nanvar(bbox_var)
 40 | 
 41 | 
 42 | def get_bbox_fg_bg_var_laplacian(
 43 |     laplacian: np.ndarray, annotations: List[Dict]
 44 | ) -> Tuple[List, np.ndarray]:
 45 |     """
 46 |     Calculates foreground and background variance of laplacian of an image
 47 |     based on bounding boxes
 48 |     Args:
 49 |         laplacian (np.ndarray): Laplacian of the image
 50 |         annotations (List): List of dictionary of annotations containing bbox
 51 |                             information of the given image laplacian
 52 | 
 53 |     Returns:
 54 |         bbox_var_lap (List): List of variance of laplacian of all bbox in the
 55 |         image
 56 |         img_var_laplacian (np.ndarray): Variance of Laplacian of background
 57 |         of the image
 58 | 
 59 |     """
 60 |     bbox_var_lap = []
 61 |     img_laplacian = laplacian
 62 | 
 63 |     for ann in annotations:
 64 |         x, y, w, h = ann["bbox"]
 65 |         bbox_area = w * h
 66 |         if bbox_area >= 1200:  # ignoring small bbox sizes
 67 |             bbox_var = get_bbox_var_laplacian(
 68 |                 img_laplacian, int(x), int(y), int(w), int(h)
 69 |             )
 70 |             img_laplacian[int(y) : int(y + h), int(x) : int(x + w)] = np.nan
 71 |             bbox_var_lap.append(bbox_var)
 72 | 
 73 |     img_var_laplacian = np.nanvar(img_laplacian)
 74 | 
 75 |     return bbox_var_lap, img_var_laplacian
 76 | 
 77 | 
 78 | def get_final_mask(masks: List[np.ndarray]) -> np.ndarray:
 79 |     """
 80 |     Get one masks from multiple mask of an image
 81 |     Args:
 82 |         masks (List[np.ndarray]): List of binary masks of an image
 83 | 
 84 |     Returns:
 85 |         final_mask = Final binary mask representing union of all masks of an
 86 |         images
 87 |     """
 88 |     final_mask = np.zeros_like(masks[0])
 89 |     for mask in masks:
 90 |         final_mask = np.bitwise_or(final_mask, mask)
 91 |     return final_mask
 92 | 
 93 | 
 94 | def get_seg_fg_bg_var_laplacian(
 95 |     laplacian: np.ndarray, final_mask: np.ndarray
 96 | ) -> Tuple[np.ndarray, np.ndarray]:
 97 |     """
 98 |     Calculates foreground and background variance of laplacian of an image
 99 |     based on segmentation information
100 |     Args:
101 |         laplacian (np.ndarray): Laplacian of the image
102 |         final_mask (np.ndarray): Binary mask of the image in which 1 is
103 |         instances of the image
104 | 
105 |     Returns:
106 |         fg_var_lap = Foreground var of laplacian
107 |         bg_var_lap = Background var of laplacian
108 | 
109 |     """
110 |     fg = np.where(final_mask == 0, laplacian, np.nan)
111 |     bg = np.where(final_mask == 1, laplacian, np.nan)
112 |     fg_var_lap = np.nanvar(fg)
113 |     bg_var_lap = np.nanvar(bg)
114 | 
115 |     return fg_var_lap, bg_var_lap
116 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/image_analysis/spectral_analysis.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | 
 3 | import numpy as np
 4 | from PIL import Image
 5 | from scipy import ndimage
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | def get_psd2d(image: np.ndarray) -> np.ndarray:
10 |     """
11 |     Args:
12 |         image (np.ndarray): Grayscale Image
13 | 
14 |     Returns:
15 |         np.ndarray: 2D PSD of the image
16 |     """
17 |     h, w = image.shape
18 |     fourier_image = np.fft.fft2(image)
19 |     N = h * w * 2
20 |     psd2d = (1 / N) * np.abs(fourier_image) ** 2
21 |     psd2d = np.fft.fftshift(psd2d)
22 |     return psd2d
23 | 
24 | 
25 | def get_psd1d(psd_2d: np.ndarray) -> np.ndarray:
26 |     """
27 |     Args:
28 |         psd_2d (np.ndarray): 2D PSD of the image
29 | 
30 |     Returns:
31 |         np.ndarray: 1D PSD of the given 2D PSD
32 |     """
33 |     h = psd_2d.shape[0]
34 |     w = psd_2d.shape[1]
35 |     wc = w // 2
36 |     hc = h // 2
37 | 
38 |     # create an array of integer radial distances from the center
39 |     y, x = np.ogrid[-h // 2 : h // 2, -w // 2 : w // 2]
40 |     r = np.hypot(x, y).astype(int)
41 |     idx = np.arange(0, min(wc, hc))
42 |     psd_1d = ndimage.sum(psd_2d, r, index=idx)
43 |     return psd_1d
44 | 
45 | 
46 | def _load_img(img_path: str):
47 |     img = Image.open(img_path)
48 |     img = img.convert("RGB")
49 |     img = img.convert("L")
50 |     return np.array(img)
51 | 
52 | 
53 | def _load_images_from_dir(img_dir: str, img_type: str = "png"):
54 |     image_paths = glob.glob(img_dir + f"/*.{img_type}")
55 |     img_array = []
56 |     for img_path in image_paths:
57 |         img = _load_img(img_path)
58 |         img_array.append(img)
59 |     return img_array
60 | 
61 | 
62 | def get_average_psd_1d(img_dir: str, img_type: str = "png"):
63 |     """
64 |     Get average PSD of entire dataset.
65 |     Args:
66 |         img_dir (str): Path of image directory
67 |         img_type (str): Image tpye (PNG, JPG, etc)
68 | 
69 |     Returns:
70 |         avg_psd_1d (np.ndarray): Avg PSD 1D
71 |         std_psd_1d (np.ndarray): Standard deviation of PSD
72 | 
73 |     """
74 |     images = _load_images_from_dir(img_dir, img_type)
75 |     total_psd_1d = []
76 |     max_len = float("-inf")
77 | 
78 |     for image in tqdm(images):
79 |         psd_2d = get_psd2d(image)
80 |         psd_1d = get_psd1d(psd_2d)
81 |         max_len = max(max_len, len(psd_1d))
82 |         total_psd_1d.append(psd_1d)
83 | 
84 |     for i in range(len(total_psd_1d)):
85 |         if len(total_psd_1d[i]) < max_len:
86 |             _len = max_len - len(total_psd_1d[i])
87 |             nan_arr = np.empty(_len)
88 |             nan_arr[:] = np.nan
89 |             total_psd_1d[i] = np.append(total_psd_1d[i], nan_arr)
90 | 
91 |     total_psd_1d = np.asarray(total_psd_1d, dtype=float)
92 | 
93 |     avg_psd_1d = np.nanmean(total_psd_1d, axis=0)
94 |     std_psd_1d = np.nanstd(total_psd_1d, axis=0)
95 | 
96 |     return avg_psd_1d, std_psd_1d
97 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/image_analysis/wavelet.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import random
 3 | 
 4 | import numpy as np
 5 | import pywt
 6 | from PIL import Image
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | def get_wt_coeffs_var(img_dir: str, img_type: str = "png", num_img=None):
11 |     """
12 | 
13 |     Args:
14 |         img_dir (str): Path of image directory
15 |         img_type (str): Image tpye (PNG, JPG, etc)
16 |         num_img (int): Number of images to use for the calculation
17 | 
18 |     Returns:
19 |         List of variance of Horizontal, Vertical and Diagonal details
20 | 
21 |     """
22 |     images = glob.glob(img_dir + f"/*.{img_type}")
23 | 
24 |     if num_img and num_img < len(images):
25 |         images = random.sample(images, num_img)
26 | 
27 |     horizontal_coeff, vertical_coeff, diagonal_coeff = [], [], []
28 | 
29 |     for img in tqdm(images):
30 |         im = Image.open(img).convert("L")
31 |         _, (cH, cV, cD) = pywt.dwt2(im, "haar", mode="periodization")
32 |         horizontal_coeff.append(np.array(cH).var())
33 |         vertical_coeff.append(np.array(cV).var())
34 |         diagonal_coeff.append(np.array(cD).var())
35 | 
36 |     return horizontal_coeff, vertical_coeff, diagonal_coeff
37 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/keypoints_stats.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Any, Dict, List, Tuple
  3 | 
  4 | import numpy as np
  5 | 
  6 | from datasetinsights.stats.constants import COCO_KEYPOINTS, COCO_SKELETON
  7 | 
  8 | 
  9 | def _is_torso_visible_or_labeled(kp: List) -> bool:
 10 |     """
 11 |     True if torso (left hip, right hip, left shoulder,
 12 |     right shoulder) is visible else False
 13 |     """
 14 |     if len(kp) != 51:
 15 |         raise ValueError(
 16 |             "keypoint list doesn't fit the format of "
 17 |             "COCO human keypoints (17 keypoints)"
 18 |         )
 19 |     return (
 20 |         (kp[17] == 1 or kp[17] == 2)
 21 |         and (kp[20] == 1 or kp[20] == 2)
 22 |         and (kp[41] == 1 or kp[41] == 2)
 23 |         and (kp[38] == 1 or kp[38] == 2)
 24 |     )
 25 | 
 26 | 
 27 | def _get_kp_where_torso_visible(annotations: List) -> List:
 28 |     """
 29 |     List of keypoint where torso is visible or labeled
 30 |     """
 31 |     keypoints = []
 32 |     for ann in annotations:
 33 |         if _is_torso_visible_or_labeled(ann):
 34 |             keypoints.append(ann)
 35 |     return keypoints
 36 | 
 37 | 
 38 | def _calc_mid(p1: Tuple[Any, Any], p2: Tuple[Any, Any]):
 39 |     """
 40 |     Calculate mid point of two points
 41 |     """
 42 |     return (p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2
 43 | 
 44 | 
 45 | def _calc_dist(p1: Tuple[Any, Any], p2: Tuple[Any, Any]) -> float:
 46 |     """
 47 |     Calculate distance between two points
 48 |     """
 49 |     return math.sqrt(((p1[0] - p2[0]) ** 2) + ((p1[1] - p2[1]) ** 2))
 50 | 
 51 | 
 52 | def _translate_and_scale_xy(X: np.ndarray, Y: np.ndarray):
 53 |     """
 54 |     Return keypoints axis list X and Y after performing translation and scaling.
 55 |     """
 56 |     left_hip, right_hip = (X[11], Y[11]), (X[12], Y[12])
 57 |     left_shoulder, right_shoulder = (X[5], Y[5]), (X[6], Y[6])
 58 | 
 59 |     # Translate all points according to mid_hip being at 0,0
 60 |     mid_hip = _calc_mid(right_hip, left_hip)
 61 |     X = np.where(X > 0.0, X - mid_hip[0], 0.0)
 62 |     Y = np.where(Y > 0.0, Y - mid_hip[1], 0.0)
 63 | 
 64 |     # Calculate scale factor
 65 |     scale = (
 66 |         _calc_dist(left_shoulder, left_hip)
 67 |         + _calc_dist(right_shoulder, right_hip)
 68 |     ) / 2
 69 | 
 70 |     return X / scale, Y / scale
 71 | 
 72 | 
 73 | def get_scale_keypoints(annotations: List) -> Dict:
 74 |     """
 75 |     Process keypoints annotations to extract information for pose plots.
 76 |     Args:
 77 |         annotations (list): List of keypoints lists with format
 78 |         [x1, y1, v1, x2, y2, v2, ...] with the order of COCO_KEYPOINTS
 79 |     Returns:
 80 |         Dict: Processed key-value pair of keypoints name -> (x,y) list.
 81 |     """
 82 |     keypoints = _get_kp_where_torso_visible(annotations)
 83 | 
 84 |     processed_kp_dict = {}
 85 |     for name in COCO_KEYPOINTS:
 86 |         processed_kp_dict[name] = {"x": [], "y": []}
 87 | 
 88 |     for kp in keypoints:
 89 |         # Separate x and y keypoints
 90 |         x_kp, y_kp = np.array(kp[0::3]), np.array(kp[1::3])
 91 |         x_kp, y_kp = _translate_and_scale_xy(x_kp, y_kp)
 92 | 
 93 |         # save keypoints to dict
 94 |         idx = 0
 95 |         for xi, yi in zip(x_kp, y_kp):
 96 |             if xi == 0 and yi == 0:
 97 |                 pass
 98 |             elif xi > 2.5 or xi < -2.5 or yi > 2.5 or yi < -2.5:
 99 |                 pass
100 |             else:
101 |                 processed_kp_dict[COCO_KEYPOINTS[idx]]["x"].append(xi)
102 |                 processed_kp_dict[COCO_KEYPOINTS[idx]]["y"].append(yi)
103 |             idx += 1
104 | 
105 |     return processed_kp_dict
106 | 
107 | 
108 | def _get_avg_kp(kp_dict: Dict):
109 |     """
110 |     Return average value of keypoints axis list X and Y.
111 |     """
112 |     x_avg, y_avg = [], []
113 |     for key in COCO_KEYPOINTS:
114 |         kp_x = np.array(kp_dict[key]["x"])
115 |         kp_y = np.array(kp_dict[key]["y"])
116 |         x_avg.append(np.mean(kp_x))
117 |         y_avg.append(np.mean(kp_y))
118 |     return x_avg, y_avg
119 | 
120 | 
121 | def get_average_skeleton(kp_dict: Dict, skeleton=COCO_SKELETON) -> List:
122 |     """
123 |     return skeleton (a list of connected human joints) of
124 |     average keypoints values.
125 |     Args:
126 |         kp_dict (dict): key-value pair of keypoints name -> (x,y) list
127 |     Returns:
128 |         list: list of skeleton connections.
129 |     """
130 |     x, y = _get_avg_kp(kp_dict)
131 |     s = []
132 |     for p1, p2 in skeleton:
133 |         s.append([(x[p1 - 1], y[p1 - 1]), (x[p2 - 1], y[p2 - 1])])
134 |     return s
135 | 
136 | 
137 | def get_visible_keypoints_dict(keypoint_list: List) -> Dict:
138 |     """
139 |     Args:
140 |         keypoint_list (List): List of keypoints lists with format
141 |         [x1, y1, v1, x2, y2, v2, ...] with the order of COCO_KEYPOINTS
142 |     Returns:
143 |         labeled_kpt_dict (Dict): Labeled keypoints dictionary where
144 |         key is the keypoint and and val is the probability of that
145 |         keypoint to occur in the bbox given that kp is labeled.
146 |     """
147 |     total_instances = len(keypoint_list)
148 |     keypoints = COCO_KEYPOINTS
149 | 
150 |     kp_visibility_list = np.array(keypoint_list)[:, 2::3]
151 |     kp_visibility_list = np.where(kp_visibility_list == 0.0, 0.0, 1.0)
152 | 
153 |     labeled_kpt_dict = {}
154 |     for i, key in enumerate(keypoints):
155 |         labeled_kpt_dict[key] = sum(kp_visibility_list[:, i]) / total_instances
156 | 
157 |     return labeled_kpt_dict
158 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/object_detection_stats.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Dict
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | 
  8 | def convert_coco_annotations_to_df(filename: str) -> pd.DataFrame:
  9 |     """
 10 |     Converts coco annotation file to pandas df for processing.
 11 |     Args:
 12 |         filename (str): Annotation file path
 13 |     Returns:
 14 |         coco dataframe (pd.DataFrame): dataframe of annotation info.
 15 |     """
 16 |     coco_json = json.load(open(filename, "r"))
 17 | 
 18 |     df_image = pd.DataFrame(coco_json["images"])
 19 |     df_annotation = pd.DataFrame(coco_json["annotations"])
 20 | 
 21 |     df_coco = df_annotation.merge(df_image, left_on="image_id", right_on="id")
 22 | 
 23 |     return df_coco
 24 | 
 25 | 
 26 | def get_bbox_relative_size_list(annotation_df: pd.DataFrame) -> np.ndarray:
 27 |     """
 28 |     Args:
 29 |         annotation_df (pd.DataFrame): dataframe with image and
 30 |         bbox_annotation in each row,(columns include: width
 31 |         (image width), height (image height), area (bbox size))
 32 |     Returns:
 33 |         bbox_relative_size_list (np.ndarray): List of all bbox
 34 |          sizes relative to its image size
 35 |     """
 36 |     bbox_size = annotation_df["area"]
 37 |     image_size = annotation_df["width"] * annotation_df["height"]
 38 |     bbox_relative_size = np.sqrt(bbox_size / image_size)
 39 | 
 40 |     return bbox_relative_size
 41 | 
 42 | 
 43 | def get_bbox_heatmap(annotation_df: pd.DataFrame) -> np.ndarray:
 44 |     """
 45 |     Args:
 46 |         annotation_df (pd.DataFrame): dataframe with image
 47 |         and bbox_annotation in each row, (columns include:
 48 |         width (image width), height (image height),
 49 |         bbox ([top_left_x, top_left_y, width, height]))
 50 |     Returns:
 51 |         bbox_heatmap (np.ndarray): numpy array of size of
 52 |         the max sized image in the dataset with values describing
 53 |         bbox intensity over the entire dataset images
 54 |         at a particular pixel.
 55 |     """
 56 |     max_width = max(annotation_df["width"])
 57 |     max_height = max(annotation_df["height"])
 58 |     bbox_heatmap = np.zeros([max_height, max_width, 1])
 59 | 
 60 |     for bbox in annotation_df["bbox"]:
 61 |         bbox = np.array(bbox).astype(int)
 62 |         bbox_heatmap[
 63 |             bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2], :
 64 |         ] += 1
 65 | 
 66 |     return bbox_heatmap
 67 | 
 68 | 
 69 | def get_bbox_per_img_dict(annotation_df: pd.DataFrame) -> Dict:
 70 |     """
 71 |     Args:
 72 |         annotation_df (pd.DataFrame): dataframe with each annotation
 73 |         in each row, (columns include: iscrowd (bool), image_id (image id))
 74 |     Returns:
 75 |         Dict: Dictionary of number of bbox per image where key is the number
 76 |         of bbox and val is the probability of that number of bbox images in
 77 |         the dataset.
 78 |     """
 79 |     annotated_persons_df = annotation_df[(annotation_df["iscrowd"] == 0)]
 80 | 
 81 |     persons_in_img_df = pd.DataFrame(
 82 |         {"cnt": annotated_persons_df[["image_id"]].value_counts()}
 83 |     )
 84 |     persons_in_img_df.reset_index(level=[0], inplace=True)
 85 | 
 86 |     # group by counter so we will get the dataframe with number of
 87 |     # annotated people in a single image
 88 | 
 89 |     persons_in_img_cnt_df = persons_in_img_df.groupby(["cnt"]).count()
 90 | 
 91 |     # extract arrays
 92 |     x_occurences = persons_in_img_cnt_df.index.values
 93 |     y_images = persons_in_img_cnt_df["image_id"].values
 94 |     total_images = sum(y_images)
 95 | 
 96 |     bbox_num_dict = {}
 97 |     for key, value in zip(x_occurences, y_images):
 98 |         bbox_num_dict[key] = value / total_images
 99 |     return bbox_num_dict
100 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/statistics.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import datasetinsights.constants as const
  4 | from datasetinsights.datasets.unity_perception import MetricDefinitions, Metrics
  5 | from datasetinsights.datasets.unity_perception.tables import SCHEMA_VERSION
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | 
 10 | class RenderedObjectInfo:
 11 |     """Rendered Object Info in Captures
 12 | 
 13 |     This metric stores common object info captured by a sensor in the simulation
 14 |     environment. It can be used to calculate object statistics such as
 15 |     object count, object rotation and visible pixels.
 16 | 
 17 |     Attributes:
 18 |         raw_table (pd.DataFrame): rendered object info stored with a tidy
 19 |         pandas dataframe. Columns "label_id", "instance_id", "visible_pixels",
 20 |         "capture_id, "label_name".
 21 | 
 22 |     Examples:
 23 | 
 24 |     .. code-block:: python
 25 | 
 26 |         >>> # set the data root path to where data was stored
 27 |         >>> data_root = "$HOME/data"
 28 |         >>> # use rendered object info definition id
 29 |         >>> definition_id = "659c6e36-f9f8-4dd6-9651-4a80e51eabc4"
 30 |         >>> roinfo = RenderedObjectInfo(data_root, definition_id)
 31 |         #total object count per label dataframe
 32 |         >>> roinfo.total_counts()
 33 |         label_id label_name count
 34 |                1    object1    10
 35 |                2    object2    21
 36 |         #object count per capture dataframe
 37 |         >>> roinfo.per_capture_counts()
 38 |         capture_id  count
 39 |             qwerty     10
 40 |             asdfgh     21
 41 |     """
 42 | 
 43 |     LABEL = "label_id"
 44 |     LABEL_READABLE = "label_name"
 45 |     INDEX_COLUMN = "capture_id"
 46 |     VALUE_COLUMN = "values"
 47 |     COUNT_COLUMN = "count"
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         data_root=const.DEFAULT_DATA_ROOT,
 52 |         version=SCHEMA_VERSION,
 53 |         def_id=None,
 54 |     ):
 55 |         """Initialize RenderedObjectInfo
 56 | 
 57 |         Args:
 58 |             data_root (str): root directory where the dataset was stored
 59 |             version (str): synthetic dataset schema version
 60 |             def_id (str): rendered object info definition id
 61 |         """
 62 |         filtered_metrics = Metrics(data_root, version).filter_metrics(def_id)
 63 |         label_mappings = self._read_label_mappings(data_root, version, def_id)
 64 |         self.raw_table = self._read_filtered_metrics(
 65 |             filtered_metrics, label_mappings
 66 |         )
 67 | 
 68 |     def num_captures(self):
 69 |         """Total number of captures
 70 | 
 71 |         Returns:
 72 |             integer: Total number of captures
 73 |         """
 74 |         return self.raw_table[self.INDEX_COLUMN].nunique()
 75 | 
 76 |     @staticmethod
 77 |     def _read_label_mappings(data_root, version, def_id):
 78 |         """Read label_mappings from a metric_definition record.
 79 | 
 80 |         Args:
 81 |             data_root (str): root directory where the dataset was stored
 82 |             version (str): synthetic dataset schema version
 83 |             def_id (str): rendered object info definition id
 84 | 
 85 |         Returns:
 86 |             dict: The mappings of {label_id: label_name}
 87 |         """
 88 |         definition = MetricDefinitions(data_root, version).get_definition(
 89 |             def_id
 90 |         )
 91 |         name = RenderedObjectInfo.LABEL
 92 |         readable_name = RenderedObjectInfo.LABEL_READABLE
 93 | 
 94 |         return {d[name]: d[readable_name] for d in definition["spec"]}
 95 | 
 96 |     @staticmethod
 97 |     def _read_filtered_metrics(filtered_metrics, label_mappings):
 98 |         """Read label_mappings from a metric_definition record.
 99 | 
100 |         Args:
101 |             filtered_metrics (pd.DataFrame): A pandas dataframe for metrics
102 |                 filtered by definition id.
103 |             label_mappings (dict): the mappings of {label_id: label_name}
104 | 
105 |         Returns:
106 |             pd.DataFrame: rendered object info stored with a tidy
107 |             pandas dataframe. Columns "label_id", "instance_id",
108 |             "visible_pixels", "capture_id, "label_name".
109 |         """
110 |         filtered_metrics[RenderedObjectInfo.LABEL_READABLE] = filtered_metrics[
111 |             RenderedObjectInfo.LABEL
112 |         ].map(label_mappings)
113 |         # Remove metrics data not defined in label_mappings
114 |         filtered_metrics.dropna(
115 |             subset=[RenderedObjectInfo.LABEL_READABLE], inplace=True
116 |         )
117 | 
118 |         return filtered_metrics
119 | 
120 |     def total_counts(self):
121 |         """Aggregate Total Object Counts Per Label
122 | 
123 |         Returns:
124 |             pd.DataFrame: Total object counts table.
125 |                 Columns "label_id", "label_name", "count"
126 |         """
127 |         agg = (
128 |             self.raw_table.groupby([self.LABEL, self.LABEL_READABLE])
129 |             .size()
130 |             .to_frame(name=self.COUNT_COLUMN)
131 |             .reset_index()
132 |         )
133 | 
134 |         return agg
135 | 
136 |     def per_capture_counts(self):
137 |         """Aggregate Object Counts Per Label
138 | 
139 |         Returns:
140 |             pd.DataFrame: Total object counts table.
141 |                 Columns "capture_id", "count"
142 |         """
143 |         agg = (
144 |             self.raw_table.groupby(self.INDEX_COLUMN)
145 |             .size()
146 |             .to_frame(name=self.COUNT_COLUMN)
147 |             .reset_index()
148 |         )
149 | 
150 |         return agg
151 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .plots import grid_plot, plot_bboxes
2 | 
3 | __all__ = ["plot_bboxes", "grid_plot"]
4 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/visualization/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import dash
 4 | 
 5 | 
 6 | def _init_app():
 7 |     """Intializes the dash app."""
 8 | 
 9 |     this_dir = os.path.dirname(os.path.abspath(__file__))
10 |     css_file = os.path.join(this_dir, "stylesheet.css")
11 |     app = dash.Dash(
12 |         __name__,
13 |         external_stylesheets=[css_file],
14 |         suppress_callback_exceptions=True,
15 |     )
16 |     return app
17 | 
18 | 
19 | _app = _init_app()
20 | 
21 | 
22 | def get_app():
23 |     return _app
24 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/visualization/bbox2d_plot.py:
--------------------------------------------------------------------------------
  1 | """ Use a bounding box library to plot pretty bounding boxes
  2 | with a simple Python API. This library helps to display pretty bounding boxes
  3 | with a chosen set of colors.
  4 | Reference: https://github.com/nalepae/bounding-box
  5 | """
  6 | import os as _os
  7 | import pathlib
  8 | import random
  9 | from hashlib import md5 as _md5
 10 | 
 11 | import cv2 as _cv2
 12 | import numpy as _np
 13 | from PIL import ImageFont
 14 | 
 15 | FONT_PATH = _os.path.join(
 16 |     pathlib.Path(__file__).parent.absolute(), "font", "DroidSansFallback.ttf"
 17 | )
 18 | _COLOR_NAME_TO_RGB = dict(
 19 |     navy=((0, 38, 63), (119, 193, 250)),
 20 |     blue=((0, 120, 210), (173, 220, 252)),
 21 |     aqua=((115, 221, 252), (0, 76, 100)),
 22 |     teal=((15, 205, 202), (0, 0, 0)),
 23 |     olive=((52, 153, 114), (25, 58, 45)),
 24 |     green=((0, 204, 84), (15, 64, 31)),
 25 |     lime=((1, 255, 127), (0, 102, 53)),
 26 |     yellow=((255, 216, 70), (103, 87, 28)),
 27 |     orange=((255, 125, 57), (104, 48, 19)),
 28 |     red=((255, 47, 65), (131, 0, 17)),
 29 |     maroon=((135, 13, 75), (239, 117, 173)),
 30 |     fuchsia=((246, 0, 184), (103, 0, 78)),
 31 |     purple=((179, 17, 193), (241, 167, 244)),
 32 |     gray=((168, 168, 168), (0, 0, 0)),
 33 |     silver=((220, 220, 220), (0, 0, 0)),
 34 | )
 35 | _COLOR_NAMES = list(_COLOR_NAME_TO_RGB)
 36 | _DEFAULT_COLOR_NAME = "green"
 37 | 
 38 | 
 39 | def add_single_bbox_on_image(
 40 |     image, bbox, label, color, font_size=100, box_line_width=15
 41 | ):
 42 |     """Add single bounding box with label on a given image.
 43 | 
 44 |     Args:
 45 |         image (numpy array): a numpy array for an image.
 46 |         bbox (BBox2D): a canonical bounding box.
 47 |         color (str): a color name for one bounding box.
 48 |         If color = None, it will randomly assign a color for each box.
 49 |         font_size (int): font size for each label. Defaults to 100.
 50 |         box_line_width (int): line width of the bounding boxes. Defaults to 15.
 51 |     """
 52 |     left, top = (bbox.x, bbox.y)
 53 |     right, bottom = (bbox.x + bbox.w, bbox.y + bbox.h)
 54 | 
 55 |     _add_single_bbox_on_image(
 56 |         image,
 57 |         left,
 58 |         top,
 59 |         right,
 60 |         bottom,
 61 |         label=label,
 62 |         color=color,
 63 |         font_size=font_size,
 64 |         box_line_width=box_line_width,
 65 |     )
 66 | 
 67 | 
 68 | def _rgb_to_bgr(color):
 69 |     return list(reversed(color))
 70 | 
 71 | 
 72 | def _color_image(image, font_color, background_color):
 73 |     return background_color + (font_color - background_color) * image / 255
 74 | 
 75 | 
 76 | def _get_label_image(
 77 |     text, font_color_tuple_bgr, background_color_tuple_bgr, font_size=100
 78 | ):
 79 |     """Add text and background color for one label.
 80 | 
 81 |     Args:
 82 |         text (str): label name.
 83 |         font_color_tuple_bgr (tuple): font RGB color.
 84 |         background_color_tuple_bgr (tuple): background RGB color.
 85 |         font_size (int): font size for the label text.
 86 | 
 87 |     Returns:
 88 |         numpy array: a numpy array for a rendered label.
 89 |     """
 90 |     _FONT = ImageFont.truetype(FONT_PATH, font_size)
 91 |     text_image = _FONT.getmask(text)
 92 |     shape = list(reversed(text_image.size))
 93 |     bw_image = _np.array(text_image).reshape(shape)
 94 | 
 95 |     image = [
 96 |         _color_image(bw_image, font_color, background_color)[None, ...]
 97 |         for font_color, background_color in zip(
 98 |             font_color_tuple_bgr, background_color_tuple_bgr
 99 |         )
100 |     ]
101 | 
102 |     return _np.concatenate(image).transpose(1, 2, 0)
103 | 
104 | 
105 | def _add_single_bbox_on_image(
106 |     image,
107 |     left,
108 |     top,
109 |     right,
110 |     bottom,
111 |     label=None,
112 |     color=None,
113 |     font_size=100,
114 |     box_line_width=15,
115 | ):
116 |     """Add single bounding box with label on a given image.
117 | 
118 |     Add single bounding box and a label text with label on a given image. If the
119 |     label text exceeds the original image border, it would be cropped.
120 |     """
121 |     try:
122 |         left, top, right, bottom = int(left), int(top), int(right), int(bottom)
123 |     except ValueError:
124 |         raise TypeError("'left', 'top', 'right' & 'bottom' must be a number")
125 | 
126 |     if label and not color:
127 |         hex_digest = _md5(label.encode()).hexdigest()
128 |         color_index = int(hex_digest, 16) % len(_COLOR_NAME_TO_RGB)
129 |         color = _COLOR_NAMES[color_index]
130 |     elif not label:
131 |         color = random.choice(_COLOR_NAMES)
132 | 
133 |     colors = [list(item) for item in _COLOR_NAME_TO_RGB[color]]
134 |     color, color_text = colors
135 | 
136 |     _cv2.rectangle(image, (left, top), (right, bottom), color, box_line_width)
137 | 
138 |     if label:
139 |         label_image = _get_label_image(label, color_text, color, font_size)
140 |         _add_label_on_image(label_image, image, left, top, color)
141 | 
142 | 
143 | def _add_label_on_image(label_image, image, left, top, color):
144 |     """Add a label on a bounding box.
145 | 
146 |     Add a label on a bounding box. Crop the label image if it cross the image
147 |     border.
148 |     """
149 |     image_height, image_width, _ = image.shape
150 |     label_height, label_width, _ = label_image.shape
151 |     rectangle_height, rectangle_width = 1 + label_height, 1 + label_width
152 | 
153 |     rectangle_bottom = top
154 |     rectangle_left = max(0, min(left - 1, image_width - rectangle_width))
155 | 
156 |     rectangle_top = rectangle_bottom - rectangle_height
157 |     rectangle_right = rectangle_left + rectangle_width
158 | 
159 |     label_top = rectangle_top + 1
160 | 
161 |     if rectangle_top < 0:
162 |         rectangle_top = top
163 |         rectangle_bottom = rectangle_top + label_height + 1
164 | 
165 |         label_top = rectangle_top
166 | 
167 |     label_left = rectangle_left + 1
168 |     label_bottom = label_top + label_height
169 |     label_right = label_left + label_width
170 | 
171 |     rec_left_top = (rectangle_left, rectangle_top)
172 |     rec_right_bottom = (rectangle_right, rectangle_bottom)
173 | 
174 |     _cv2.rectangle(image, rec_left_top, rec_right_bottom, color, -1)
175 |     _fix_label_at_image_edge(
176 |         label_image, label_left, label_top, label_right, label_bottom, image
177 |     )
178 | 
179 | 
180 | def _fix_label_at_image_edge(
181 |     label_image, label_left, label_top, label_right, label_bottom, image
182 | ):
183 |     """Fix the label at image edge.
184 | 
185 |     Crop the label image if it cross the image border.
186 |     """
187 |     image_height, image_width, _ = image.shape
188 |     label_height, label_width, _ = label_image.shape
189 |     label_top = max(0, label_top)
190 |     label_bottom = min(image_height, label_bottom)
191 |     label_left = max(0, label_left)
192 |     label_right = min(image_width, label_right)
193 |     label_actual_width = label_right - label_left
194 |     label_actual_height = label_bottom - label_top
195 |     label_actual_size = label_actual_width * label_actual_height
196 |     if label_actual_size < label_height * label_width:
197 |         image[label_top:label_bottom, label_left:label_right, :] = label_image[
198 |             : (label_bottom - label_top), : (label_right - label_left), :
199 |         ]
200 |     else:
201 |         image[label_top:label_bottom, label_left:label_right, :] = label_image
202 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/visualization/constants.py:
--------------------------------------------------------------------------------
1 | MAX_SAMPLES = 10000
2 | RENDERED_OBJECT_INFO_DEFINITION_ID = "659c6e36-f9f8-4dd6-9651-4a80e51eabc4"
3 | USER_PARAMETERS_DEFINITION_ID = "3f06bcec-1f23-4387-a1fd-5af54ee29c16"
4 | FOREGROUND_PLACEMENT_INFO_DEFINITION_ID = "061e08cc-4428-4926-9933-a6732524b52b"
5 | LIGHTING_INFO_DEFINITION_ID = "939248ee-668a-4e98-8e79-e7909f034a47"
6 | BOUNDING_BOX_2D_DEFINITION_ID = "c31620e3-55ff-4af6-ae86-884aa0daa9b2"
7 | 


--------------------------------------------------------------------------------
/datasetinsights/stats/visualization/font/DroidSansFallback.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/datasetinsights/stats/visualization/font/DroidSansFallback.ttf


--------------------------------------------------------------------------------
/datasetinsights/stats/visualization/keypoints_plot.py:
--------------------------------------------------------------------------------
  1 | """ Helper keypoints library to plot keypoint joints and skeletons  with a
  2 | simple Python API.
  3 | """
  4 | 
  5 | 
  6 | def _get_color_from_color_node(color):
  7 |     """Gets the color from the color node in the template.
  8 | 
  9 |     Args:
 10 |         color (tuple): The color's channel values expressed in a range from 0..1
 11 | 
 12 |     Returns: The color for the node.
 13 | 
 14 |     """
 15 |     r = int(color["r"] * 255)
 16 |     g = int(color["g"] * 255)
 17 |     b = int(color["b"] * 255)
 18 |     a = int(color["a"] * 255)
 19 |     return r, g, b, a
 20 | 
 21 | 
 22 | def _get_color_for_bone(bone):
 23 |     """Gets the color for the bone from the template. A bone is a visual
 24 |         connection between two keypoints in the keypoint list of the figure.
 25 | 
 26 |         bone
 27 |         {
 28 |             joint1: <int> Index into the keypoint list for the first joint.
 29 |             joint2: <int> Index into the keypoint list for the second joint.
 30 |             color {
 31 |                 r: <float> Value (0..1) of the red channel.
 32 |                 g: <float> Value (0..1) of the green channel.
 33 |                 b: <float> Value (0..1) of the blue channel.
 34 |                 a: <float> Value (0..1) of the alpha channel.
 35 |             }
 36 |         }
 37 | 
 38 |     Args:
 39 |         bone: The active bone.
 40 | 
 41 |     Returns: The color of the bone.
 42 | 
 43 |     """
 44 |     if "color" in bone:
 45 |         return _get_color_from_color_node(bone["color"])
 46 |     else:
 47 |         return 255, 0, 255, 255
 48 | 
 49 | 
 50 | def _get_color_for_keypoint(template, keypoint):
 51 |     """Gets the color for the keypoint from the template. A keypoint is a
 52 |         location of interest inside of a figure. Keypoints are connected
 53 |         together with bones. The configuration of keypoint locations and bone
 54 |         connections are defined in a template file.
 55 | 
 56 |     keypoint_template {
 57 |         template_id: <str> The UUID of the template.
 58 |         template_name: <str> Human readable name of the template.
 59 |         key_points [ <List> List of joints defined in this template
 60 |             {
 61 |                 label: <str> The label of the joint.
 62 |                 index: <int> The index of the joint.
 63 |                 color {
 64 |                     r: <float> Value (0..1) for the red channel.
 65 |                     g: <float> Value (0..1) for the green channel.
 66 |                     b: <float> Value (0..1) for the blue channel.
 67 |                     a: <float> Value (0..1) for the alpha channel.
 68 |                 }
 69 |             }, ...
 70 |         ]
 71 |         skeleton [ <List> List of skeletal connections
 72 |             {
 73 |                 joint1: <int> The first joint of the connection.
 74 |                 joint2: <int> The second joint of the connection.
 75 |                 color {
 76 |                     r: <float> Value (0..1) for the red channel.
 77 |                     g: <float> Value (0..1) for the green channel.
 78 |                     b: <float> Value (0..1) for the blue channel.
 79 |                     a: <float> Value (0..1) for the alpha channel.
 80 |                 }
 81 |             }, ...
 82 |         ]
 83 |     }
 84 | 
 85 |     Args:
 86 |         template: The active template.
 87 |         keypoint: The active keypoint.
 88 | 
 89 |     Returns: The color for the keypoint.
 90 | 
 91 |     """
 92 |     node = template["key_points"][keypoint["index"]]
 93 | 
 94 |     if "color" in node:
 95 |         return _get_color_from_color_node(node["color"])
 96 |     else:
 97 |         return 0, 0, 255, 255
 98 | 
 99 | 
100 | def draw_keypoints_for_figure(image, figure, draw, templates, visual_width=6):
101 |     """Draws keypoints for a figure on an image.
102 | 
103 |     keypoints {
104 |         label_id: <int> Integer identifier of the label.
105 |         instance_id: <str> UUID of the instance.
106 |         template_guid: <str> UUID of the keypoint template.
107 |         pose: <str> String label for current pose.
108 |         keypoints [
109 |             {
110 |                 index: <int> Index of keypoint in template.
111 |                 x: <float> X subpixel coordinate of keypoint.
112 |                 y: <float> Y subpixel coordinate of keypoint
113 |                 state: <int> 0: keypoint does not exist,
114 |                              1: keypoint exists but is not visible,
115 |                              2: keypoint exists and is visible.
116 |             }, ...
117 |         ]
118 |     }
119 | 
120 |     Args:
121 |         image (PIL Image): a PIL image.
122 |         figure: The figure to draw.
123 |         draw (PIL ImageDraw): PIL image draw interface.
124 |         templates (list): a list of keypoint templates.
125 |         visual_width (int): the visual width of the joints.
126 | 
127 |     Returns: a PIL image with keypoints for a figure drawn on it.
128 | 
129 |     """
130 |     # find the template for this
131 |     for template in templates:
132 |         if template["template_id"] == figure["template_guid"]:
133 |             break
134 |     else:
135 |         return image
136 | 
137 |     # load the spec
138 |     skeleton = template["skeleton"]
139 | 
140 |     for bone in skeleton:
141 |         j1 = figure["keypoints"][bone["joint1"]]
142 |         j2 = figure["keypoints"][bone["joint2"]]
143 | 
144 |         if j1["state"] == 2 and j2["state"] == 2:
145 |             x1 = int(j1["x"])
146 |             y1 = int(j1["y"])
147 |             x2 = int(j2["x"])
148 |             y2 = int(j2["y"])
149 | 
150 |             color = _get_color_for_bone(bone)
151 |             draw.line((x1, y1, x2, y2), fill=color, width=visual_width)
152 | 
153 |     for k in figure["keypoints"]:
154 |         state = k["state"]
155 |         if state == 2:
156 |             x = k["x"]
157 |             y = k["y"]
158 | 
159 |             color = _get_color_for_keypoint(template, k)
160 | 
161 |             half_width = visual_width / 2
162 | 
163 |             draw.ellipse(
164 |                 (
165 |                     x - half_width,
166 |                     y - half_width,
167 |                     x + half_width,
168 |                     y + half_width,
169 |                 ),
170 |                 fill=color,
171 |                 outline=color,
172 |             )
173 | 
174 |     return image
175 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for Sphinx documentation
 2 | 
 3 | BUILDDIR      = build
 4 | SOURCEDIR     = source
 5 | TEMPLATEDIR   = $(SOURCEDIR)/_templates
 6 | 
 7 | .PHONY: help clean html
 8 | 
 9 | help:
10 | 	@echo "Please use \`make <target>' where <target> is one of"
11 | 	@echo "  html       to make standalone HTML files"
12 | 
13 | clean:
14 | 	rm -rf $(BUILDDIR)
15 | 
16 | apidoc:
17 | 	sphinx-apidoc --templatedir=$(TEMPLATEDIR) -o $(SOURCEDIR) -d 2 ../datasetinsights/ \
18 | 		../datasetinsights/commands \
19 | 		../datasetinsights/dashboard.py \
20 | 		../datasetinsights/constants.py
21 | 
22 | html:
23 | 	sphinx-build -b html $(SOURCEDIR) $(BUILDDIR)/html
24 | 	@echo
25 | 	@echo "Build finished. The HTML doumentation pages are in $(BUILDDIR)/html."
26 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | Building documentation
 2 | ======================
 3 | 
 4 | Run the following commands from `docs` directory.
 5 | 
 6 | Automatic generate of Sphinx sources using [sphinx-apidoc](https://www.sphinx-doc.org/en/master/man/sphinx-apidoc.html)
 7 | 
 8 | ```bash
 9 | make apidoc
10 | ```
11 | 
12 | This command only applies to newly created modules. It will not update modules that already exist. You will have to modify `docs/datasetinsighs.module_name` manually.
13 | 
14 | To build html files, run
15 | 
16 | ```bash
17 | make html
18 | ```
19 | 
20 | You can browse the documentation by opening `build/html/index.html` file directly in any web browser.
21 | 
22 | Cleanup build html files
23 | 
24 | ```bash
25 | make clean
26 | ```
27 | 
28 | Known issues
29 | ------------
30 | 
31 | 1. Some of the documents are written in markdown format. We use [recommonmark](https://github.com/readthedocs/recommonmark) to generate documentations. It uses [CommonMark](http://commonmark.org/) to convert markdown files to rst files. Due to it's limitation, links to headers cannot have `_` or `.`. If the header has either of those characters, they should be replaced by dashes `-`. e.g. if you have a header `#### annotation_definitions.json` in the markdown file, to link to that header the markdown needs to be `[click link](#annotation-definitions-json)`
32 | 
33 | 2. `Readthedocs.org` does not currently support [poetry](https://python-poetry.org/) officially. Until then, we have to manually generated a `docs/requirements.txt` file when new requirements is added to the repo. This file can be generated using command:
34 | 
35 | ```bash
36 | poetry export --dev --without-hashes -f requirements.txt > docs/requirements.txt
37 | ```
38 | 


--------------------------------------------------------------------------------
/docs/source/_images/captures_steps_timestamps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/captures_steps_timestamps.png


--------------------------------------------------------------------------------
/docs/source/_images/image_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_0.png


--------------------------------------------------------------------------------
/docs/source/_images/image_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_2.png


--------------------------------------------------------------------------------
/docs/source/_images/image_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_3.png


--------------------------------------------------------------------------------
/docs/source/_images/image_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_4.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/evaluate_pipeline_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/evaluate_pipeline_graph.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/evaluate_the_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/evaluate_the_model.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/notebook.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/notebook_docker_cpu_memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/notebook_docker_cpu_memory.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/notebook_gpu_volume.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/notebook_gpu_volume.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/train_on_real_world_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_real_world_dataset.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/train_on_synthdet_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_synthdet_sample.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/train_on_synthetic_and_real_world_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_synthetic_and_real_world_dataset.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/train_on_synthetic_dataset_unity_simulation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_synthetic_dataset_unity_simulation.png


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/train_pipeline_graph.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_pipeline_graph.jpg


--------------------------------------------------------------------------------
/docs/source/_images/kubeflow/upload_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/upload_pipeline.png


--------------------------------------------------------------------------------
/docs/source/_images/synthetic_data_pipeline_dataset_evaluation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/synthetic_data_pipeline_dataset_evaluation.png


--------------------------------------------------------------------------------
/docs/source/_templates/module.rst_t:
--------------------------------------------------------------------------------
1 | {%- if show_headings %}
2 | {{- basename | e | heading }}
3 | 
4 | {% endif -%}
5 | .. automodule:: {{ qualname }}
6 | {%- for option in automodule_options %}
7 |    :{{ option }}:
8 | {%- endfor %}
9 | 


--------------------------------------------------------------------------------
/docs/source/_templates/package.rst_t:
--------------------------------------------------------------------------------
 1 | {%- macro automodule(modname, options) -%}
 2 | .. automodule:: {{ modname }}
 3 | {%- for option in options %}
 4 |    :{{ option }}:
 5 | {%- endfor %}
 6 | {%- endmacro %}
 7 | 
 8 | {%- macro toctree(docnames) -%}
 9 | .. toctree::
10 |    :maxdepth: {{ maxdepth }}
11 | {% for docname in docnames %}
12 |    {{ docname }}
13 | {%- endfor %}
14 | {%- endmacro %}
15 | 
16 | {%- if is_namespace %}
17 | {{- pkgname | e | heading }}
18 | {% else %}
19 | {{- pkgname | e | heading }}
20 | {% endif %}
21 | 
22 | {%- if modulefirst and not is_namespace %}
23 | {{ automodule(pkgname, automodule_options) }}
24 | {% endif %}
25 | 
26 | {%- if subpackages %}
27 | 
28 | {{ toctree(subpackages) }}
29 | {% endif %}
30 | 
31 | {%- if submodules %}
32 | {% if separatemodules %}
33 | {{ toctree(submodules) }}
34 | {%- else %}
35 | {%- for submodule in submodules %}
36 | {% if show_headings %}
37 | {{- submodule | e | heading(2) }}
38 | {% endif %}
39 | {{ automodule(submodule, automodule_options) }}
40 | {% endfor %}
41 | {%- endif %}
42 | {% endif %}
43 | 
44 | {%- if not modulefirst and not is_namespace %}
45 | 
46 | {{ automodule(pkgname, automodule_options) }}
47 | {% endif %}
48 | 


--------------------------------------------------------------------------------
/docs/source/_templates/toc.rst_t:
--------------------------------------------------------------------------------
1 | {{ header | heading }}
2 | 
3 | .. toctree::
4 |    :maxdepth: {{ maxdepth }}
5 | {% for docname in docnames %}
6 |    {{ docname }}
7 | {%- endfor %}
8 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | import os
 7 | import sys
 8 | 
 9 | import pkg_resources
10 | 
11 | sys.path.insert(0, os.path.abspath("../.."))
12 | 
13 | 
14 | # -- Project information -----------------------------------------------------
15 | 
16 | project = "datasetinsights"
17 | copyright = "2020, Unity Technologies"
18 | author = "Unity Technologies"
19 | 
20 | # The full version, including alpha/beta/rc tags
21 | release = pkg_resources.get_distribution(project).version
22 | napoleon_google_docstring = True
23 | 
24 | # -- General configuration ---------------------------------------------------
25 | 
26 | master_doc = "index"
27 | 
28 | 
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = [
33 |     "recommonmark",
34 |     "sphinx.ext.autosectionlabel",
35 |     "sphinx_rtd_theme",
36 |     "sphinx.ext.napoleon",
37 |     "sphinx_click",
38 | ]
39 | 
40 | source_suffix = {
41 |     ".rst": "restructuredtext",
42 |     ".txt": "markdown",
43 |     ".md": "markdown",
44 | }
45 | 
46 | 
47 | # Add any paths that contain templates here, relative to this directory.
48 | templates_path = ["_templates"]
49 | 
50 | # List of patterns, relative to source directory, that match files and
51 | # directories to ignore when looking for source files.
52 | # This pattern also affects html_static_path and html_extra_path.
53 | exclude_patterns = []
54 | 
55 | 
56 | # -- Options for HTML output -------------------------------------------------
57 | 
58 | # The theme to use for HTML and HTML Help pages.  See the documentation for
59 | # a list of builtin themes.
60 | #
61 | html_theme = "sphinx_rtd_theme"
62 | 
63 | # Add any paths that contain custom static files (such as style sheets) here,
64 | # relative to this directory. They are copied after the builtin static files,
65 | # so a file named "default.css" will overwrite the builtin "default.css".
66 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.datasets.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.datasets
 2 | ========================
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    datasetinsights.datasets.unity_perception
 9 | 
10 | 
11 | datasetinsights.datasets.exceptions
12 | -----------------------------------
13 | 
14 | .. automodule:: datasetinsights.datasets.exceptions
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 
19 | datasetinsights.datasets.synthetic
20 | ----------------------------------
21 | 
22 | .. automodule:: datasetinsights.datasets.synthetic
23 |    :members:
24 |    :undoc-members:
25 |    :show-inheritance:
26 | 
27 | 
28 | 
29 | .. automodule:: datasetinsights.datasets
30 |    :members:
31 |    :undoc-members:
32 |    :show-inheritance:
33 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.datasets.transformers.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.datasets.transformers
 2 | =====================================
 3 | 
 4 | 
 5 | datasetinsights.datasets.transformers.coco
 6 | ------------------------------------------
 7 | 
 8 | .. automodule:: datasetinsights.datasets.transformers.coco
 9 |    :members:
10 |    :undoc-members:
11 |    :show-inheritance:
12 | 
13 | 
14 | 
15 | .. automodule:: datasetinsights.datasets.transformers
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.datasets.unity_perception.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.datasets.unity\_perception
 2 | ==========================================
 3 | 
 4 | 
 5 | datasetinsights.datasets.unity\_perception.captures
 6 | ---------------------------------------------------
 7 | 
 8 | .. automodule:: datasetinsights.datasets.unity_perception.captures
 9 |    :members:
10 |    :undoc-members:
11 |    :show-inheritance:
12 | 
13 | datasetinsights.datasets.unity\_perception.exceptions
14 | -----------------------------------------------------
15 | 
16 | .. automodule:: datasetinsights.datasets.unity_perception.exceptions
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 | 
21 | datasetinsights.datasets.unity\_perception.metrics
22 | --------------------------------------------------
23 | 
24 | .. automodule:: datasetinsights.datasets.unity_perception.metrics
25 |    :members:
26 |    :undoc-members:
27 |    :show-inheritance:
28 | 
29 | datasetinsights.datasets.unity\_perception.references
30 | -----------------------------------------------------
31 | 
32 | .. automodule:: datasetinsights.datasets.unity_perception.references
33 |    :members:
34 |    :undoc-members:
35 |    :show-inheritance:
36 | 
37 | datasetinsights.datasets.unity\_perception.tables
38 | -------------------------------------------------
39 | 
40 | .. automodule:: datasetinsights.datasets.unity_perception.tables
41 |    :members:
42 |    :undoc-members:
43 |    :show-inheritance:
44 | 
45 | datasetinsights.datasets.unity\_perception.validation
46 | -----------------------------------------------------
47 | 
48 | .. automodule:: datasetinsights.datasets.unity_perception.validation
49 |    :members:
50 |    :undoc-members:
51 |    :show-inheritance:
52 | 
53 | 
54 | 
55 | .. automodule:: datasetinsights.datasets.unity_perception
56 |    :members:
57 |    :undoc-members:
58 |    :show-inheritance:
59 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.io.downloader.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.io.downloader
 2 | =============================
 3 | 
 4 | 
 5 | datasetinsights.io.downloader.base
 6 | ----------------------------------
 7 | 
 8 | .. automodule:: datasetinsights.io.downloader.base
 9 |    :members:
10 |    :undoc-members:
11 |    :show-inheritance:
12 | 
13 | datasetinsights.io.downloader.gcs\_downloader
14 | ---------------------------------------------
15 | 
16 | .. automodule:: datasetinsights.io.downloader.gcs_downloader
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 | 
21 | datasetinsights.io.downloader.http\_downloader
22 | ----------------------------------------------
23 | 
24 | .. automodule:: datasetinsights.io.downloader.http_downloader
25 |    :members:
26 |    :undoc-members:
27 |    :show-inheritance:
28 | 
29 | 
30 | 
31 | .. automodule:: datasetinsights.io.downloader
32 |    :members:
33 |    :undoc-members:
34 |    :show-inheritance:
35 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.io.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.io
 2 | ==================
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    datasetinsights.io.downloader
 9 | 
10 | 
11 | datasetinsights.io.bbox
12 | -----------------------
13 | 
14 | .. automodule:: datasetinsights.io.bbox
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 
19 | datasetinsights.io.download
20 | ---------------------------
21 | 
22 | .. automodule:: datasetinsights.io.download
23 |    :members:
24 |    :undoc-members:
25 |    :show-inheritance:
26 | 
27 | datasetinsights.io.exceptions
28 | -----------------------------
29 | 
30 | .. automodule:: datasetinsights.io.exceptions
31 |    :members:
32 |    :undoc-members:
33 |    :show-inheritance:
34 | 
35 | datasetinsights.io.gcs
36 | ----------------------
37 | 
38 | .. automodule:: datasetinsights.io.gcs
39 |    :members:
40 |    :undoc-members:
41 |    :show-inheritance:
42 | 
43 | 
44 | 
45 | .. automodule:: datasetinsights.io
46 |    :members:
47 |    :undoc-members:
48 |    :show-inheritance:
49 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights
 2 | ===============
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    datasetinsights.datasets
 9 |    datasetinsights.io
10 |    datasetinsights.stats
11 | 
12 | 
13 | .. automodule:: datasetinsights
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.stats.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.stats
 2 | =====================
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    datasetinsights.stats.visualization
 9 | 
10 | 
11 | datasetinsights.stats.statistics
12 | --------------------------------
13 | 
14 | .. automodule:: datasetinsights.stats.statistics
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 
19 | 
20 | 
21 | .. automodule:: datasetinsights.stats
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 


--------------------------------------------------------------------------------
/docs/source/datasetinsights.stats.visualization.rst:
--------------------------------------------------------------------------------
 1 | datasetinsights.stats.visualization
 2 | ===================================
 3 | 
 4 | 
 5 | datasetinsights.stats.visualization.app
 6 | ---------------------------------------
 7 | 
 8 | .. automodule:: datasetinsights.stats.visualization.app
 9 |    :members:
10 |    :undoc-members:
11 |    :show-inheritance:
12 | 
13 | datasetinsights.stats.visualization.bbox2d\_plot
14 | ------------------------------------------------
15 | 
16 | .. automodule:: datasetinsights.stats.visualization.bbox2d_plot
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 | 
21 | datasetinsights.stats.visualization.bbox3d\_plot
22 | ------------------------------------------------
23 | 
24 | .. automodule:: datasetinsights.stats.visualization.bbox3d_plot
25 |    :members:
26 |    :undoc-members:
27 |    :show-inheritance:
28 | 
29 | datasetinsights.stats.visualization.constants
30 | ---------------------------------------------
31 | 
32 | .. automodule:: datasetinsights.stats.visualization.constants
33 |    :members:
34 |    :undoc-members:
35 |    :show-inheritance:
36 | 
37 | datasetinsights.stats.visualization.keypoints\_plot
38 | ---------------------------------------------------
39 | 
40 | .. automodule:: datasetinsights.stats.visualization.keypoints_plot
41 |    :members:
42 |    :undoc-members:
43 |    :show-inheritance:
44 | 
45 | datasetinsights.stats.visualization.object\_detection
46 | -----------------------------------------------------
47 | 
48 | .. automodule:: datasetinsights.stats.visualization.object_detection
49 |    :members:
50 |    :undoc-members:
51 |    :show-inheritance:
52 | 
53 | datasetinsights.stats.visualization.overview
54 | --------------------------------------------
55 | 
56 | .. automodule:: datasetinsights.stats.visualization.overview
57 |    :members:
58 |    :undoc-members:
59 |    :show-inheritance:
60 | 
61 | datasetinsights.stats.visualization.plots
62 | -----------------------------------------
63 | 
64 | .. automodule:: datasetinsights.stats.visualization.plots
65 |    :members:
66 |    :undoc-members:
67 |    :show-inheritance:
68 | 
69 | 
70 | 
71 | .. automodule:: datasetinsights.stats.visualization
72 |    :members:
73 |    :undoc-members:
74 |    :show-inheritance:
75 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | .. Thea documentation master file, created by
  2 |    sphinx-quickstart on Mon Apr 27 17:25:16 2020.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | Dataset Insights
  7 | ================
  8 | 
  9 | Unity Dataset Insights is a python package for downloading, parsing and analyzing synthetic datasets generated using the Unity `Perception SDK <https://github.com/Unity-Technologies/com.unity.perception>`_.
 10 | 
 11 | Installation
 12 | ------------
 13 | 
 14 | Dataset Insights maintains a pip package for easy installation. It can work in any standard Python environment using :code:`pip install datasetinsights` command. We support Python 3 (3.7 and 3.8).
 15 | 
 16 | Getting Started
 17 | ---------------
 18 | 
 19 | Dataset Statistics
 20 | ~~~~~~~~~~~~~~~~~~
 21 | We provide a sample `notebook <https://github.com/Unity-Technologies/datasetinsights/blob/master/notebooks/Perception_Statistics.ipynb>`_  to help you load synthetic datasets generated using `Perception package <https://github.com/Unity-Technologies/com.unity.perception>`_  and visualize dataset statistics. We plan to support other sample Unity projects in the future.
 22 | 
 23 | Dataset Download
 24 | ~~~~~~~~~~~~~~~~~~
 25 | 
 26 | You can download the datasets from HTTP(s), GCS, and Unity simulation projects using the download command from `CLI` or `API`.
 27 | 
 28 | `CLI <https://datasetinsights.readthedocs.io/en/latest/datasetinsights.commands.html#datasetinsights-commands-download>`_
 29 | 
 30 | .. code-block:: bash
 31 | 
 32 |    datasetinsights download \
 33 |       --source-uri=<xxx> \
 34 |       --output=$HOME/data
 35 | 
 36 | `API <https://datasetinsights.readthedocs.io/en/latest/datasetinsights.io.downloader.html#module-datasetinsights.io.downloader.gcs_downloader>`_
 37 | 
 38 | 
 39 | GCSDatasetDownloader downloads a dataset from GCS location.
 40 | 
 41 | .. code-block:: python3
 42 | 
 43 |    from datasetinsights.io.downloader import GCSDatasetDownloader
 44 | 
 45 |    source_uri=gs://url/to/file.zip or gs://url/to/folder
 46 |    dest = "~/data"
 47 |    downloader = GCSDatasetDownloader()
 48 |    downloader.download(source_uri=source_uri, output=data_root)
 49 | 
 50 | HTTPDatasetDownloader downloads a dataset from any HTTP(S) location.
 51 | 
 52 | .. code-block:: python3
 53 | 
 54 |    from datasetinsights.io.downloader import HTTPDatasetDownloader
 55 | 
 56 |    source_uri=http://url.to.file.zip
 57 |    dest = "~/data"
 58 |    downloader = HTTPDatasetDownloader()
 59 |    downloader.download(source_uri=source_uri, output=data_root)
 60 | 
 61 | Dataset Explore
 62 | ~~~~~~~~~~~~~~~~~~
 63 | 
 64 | You can explore the dataset `schema <https://datasetinsights.readthedocs.io/en/latest/Synthetic_Dataset_Schema.html#synthetic-dataset-schema>`_ by using following API:
 65 | 
 66 | `Unity Perception <https://datasetinsights.readthedocs.io/en/latest/datasetinsights.datasets.unity_perception.html#datasetinsights-datasets-unity-perception>`_
 67 | 
 68 | AnnotationDefinitions and MetricDefinitions loads synthetic dataset definition tables and return a dictionary containing the definitions.
 69 | 
 70 | .. code-block:: python3
 71 | 
 72 |    from datasetinsights.datasets.unity_perception import AnnotationDefinitions,
 73 |    MetricDefinitions
 74 |    annotation_def = AnnotationDefinitions(data_root=dest, version="my_schema_version")
 75 |    definition_dict = annotation_def.get_definition(def_id="my_definition_id")
 76 | 
 77 |    metric_def = MetricDefinitions(data_root=dest, version="my_schema_version")
 78 |    definition_dict = metric_def.get_definition(def_id="my_definition_id")
 79 | 
 80 | Captures loads synthetic dataset captures tables and return a pandas dataframe with captures and annotations columns.
 81 | 
 82 | .. code-block:: python3
 83 | 
 84 |    from datasetinsights.datasets.unity_perception import Captures
 85 |    captures = Captures(data_root=dest, version="my_schema_version")
 86 |    captures_df = captures.filter(def_id="my_definition_id")
 87 | 
 88 | Metrics loads synthetic dataset metrics table which holds extra metadata that can be used to describe a particular sequence, capture or annotation and return a pandas dataframe with captures and metrics columns.
 89 | 
 90 | .. code-block:: python3
 91 | 
 92 |    from datasetinsights.datasets.unity_perception import Metrics
 93 |    metrics = Metrics(data_root=dest, version="my_schema_version")
 94 |    metrics_df = metrics.filter_metrics(def_id="my_definition_id")
 95 | 
 96 | Contents
 97 | ========
 98 | 
 99 | .. toctree::
100 |    :maxdepth: 3
101 | 
102 |    modules
103 | 
104 | 
105 | .. toctree::
106 |    :maxdepth: 1
107 |    :hidden:
108 |    :caption: Getting Started
109 | 
110 |    SynthDet Guide <https://github.com/Unity-Technologies/SynthDet/blob/master/docs/Readme.md>
111 | 
112 | 
113 | .. toctree::
114 |    :maxdepth: 1
115 |    :hidden:
116 |    :caption: Synthetic Dataset
117 | 
118 |    Synthetic_Dataset_Schema
119 | 
120 | 
121 | Indices and tables
122 | ==================
123 | 
124 | * :ref:`genindex`
125 | * :ref:`modindex`
126 | * :ref:`search`
127 | 
128 | Citation
129 | ==================
130 | If you find this package useful, consider citing it using:
131 | 
132 | ::
133 | 
134 |    @misc{datasetinsights2020,
135 |        title={Unity {D}ataset {I}nsights Package},
136 |        author={{Unity Technologies}},
137 |        howpublished={\url{https://github.com/Unity-Technologies/datasetinsights}},
138 |        year={2020}
139 |    }
140 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | datasetinsights
2 | ===============
3 | 
4 | .. toctree::
5 |    :maxdepth: 2
6 | 
7 |    datasetinsights
8 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "datasetinsights"
  3 | version = "1.0.0"
  4 | description = "Synthetic dataset insights."
  5 | license = "Apache-2.0"
  6 | authors = [
  7 |   "Unity AI Perception Team <computer-vision@unity3d.com>"
  8 | ]
  9 | readme = "README.md"
 10 | homepage = "https://github.com/Unity-Technologies/datasetinsights"
 11 | repository = "https://github.com/Unity-Technologies/datasetinsights"
 12 | documentation = "https://datasetinsights.readthedocs.io/en/latest/"
 13 | classifiers = [
 14 |   "Development Status :: 3 - Alpha",
 15 |   "Environment :: Console",
 16 |   "Framework :: Jupyter",
 17 |   "Operating System :: OS Independent",
 18 |   "Programming Language :: Python :: 3.8",
 19 |   "Programming Language :: Python :: 3.9",
 20 |   "Programming Language :: Python :: 3.10",
 21 |   "Topic :: Scientific/Engineering :: Artificial Intelligence",
 22 |   "Topic :: Scientific/Engineering :: Visualization",
 23 |   "Topic :: Software Development :: Libraries :: Python Modules",
 24 |   "Topic :: Utilities"
 25 | ]
 26 | include = [
 27 |   "LICENSE",
 28 | ]
 29 | 
 30 | 
 31 | [tool.poetry.dependencies]
 32 | python = ">=3.8 <3.11"
 33 | cython = "^0.29.14"
 34 | google-cloud-storage = "^1.24.1"
 35 | numpy = "^1.17"
 36 | plotly = ">=5.0.0"
 37 | pyquaternion = "^0.9.5"
 38 | codetiming = "^1.2.0"
 39 | pandas = "^1.0.1"
 40 | tqdm = "^4.45.0"
 41 | dask = {extras = ["complete"], version = "^2.14.0"}
 42 | dash = "^2.3.1"
 43 | click = "8.0.4"
 44 | opencv-python = "^4.4.0.42"
 45 | matplotlib = "^3.3.1"
 46 | scipy = "^1.8.0"
 47 | PyWavelets = "^1.3.0"
 48 | pycocotools = "^2.0.4"
 49 | seaborn = "^0.11.2"
 50 | 
 51 | 
 52 | [tool.poetry.dev-dependencies]
 53 | black = "22.3.0"
 54 | flake8 = "^3.7.9"
 55 | pytest = "^6.0.2"
 56 | pytest-cov = "^2.8.1"
 57 | responses = "^0.10.9"
 58 | isort = "^4.3.21"
 59 | sphinx-rtd-theme = "^0.5.0"
 60 | recommonmark = "^0.6.0"
 61 | sphinx-click = "^2.5.0"
 62 | 
 63 | 
 64 | [tool.isort]
 65 | multi_line_output = 3
 66 | include_trailing_comma = true
 67 | force_grid_wrap = 0
 68 | use_parentheses = true
 69 | line_length = 80
 70 | 
 71 | [tool.black]
 72 | line-length = 80
 73 | target-version = ["py37"]
 74 | include = '\.pyi?$'
 75 | exclude = '''
 76 | 
 77 | (
 78 |   /(
 79 |       \.eggs         # exclude a few common directories in the
 80 |     | \.git          # root of the project
 81 |     | \.hg
 82 |     | \.mypy_cache
 83 |     | \.tox
 84 |     | \.venv
 85 |     | _build
 86 |     | buck-out
 87 |     | build
 88 |     | dist
 89 |     | protos
 90 |   )/
 91 | )
 92 | '''
 93 | 
 94 | [tool.pytest.ini_options]
 95 | addopts = "--cov=datasetinsights -rxXs --verbose"
 96 | testpaths = [
 97 |     "tests"
 98 | ]
 99 | 
100 | [tool.poetry.scripts]
101 | datasetinsights = "datasetinsights.__main__:entrypoint"
102 | 
103 | [build-system]
104 | requires = ["poetry>=1.0.5"]
105 | build-backend = "poetry.masonry.api"
106 | 


--------------------------------------------------------------------------------
/tests/datasets/test_coco_transformers.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import tempfile
 3 | from pathlib import Path
 4 | 
 5 | from datasetinsights.datasets.transformers import (
 6 |     COCOInstancesTransformer,
 7 |     COCOKeypointsTransformer,
 8 | )
 9 | 
10 | 
11 | def assert_json_equals(file1, file2):
12 |     with open(file1, "r") as f1:
13 |         j1 = json.dumps(json.load(f1), sort_keys=True, indent=4)
14 |     with open(file2, "r") as f2:
15 |         j2 = json.dumps(json.load(f2), sort_keys=True, indent=4)
16 | 
17 |     assert j1 == j2
18 | 
19 | 
20 | def test_coco_instances_transformer():
21 |     parent_dir = Path(__file__).parent.parent.absolute()
22 |     mock_data_dir = parent_dir / "mock_data" / "simrun"
23 |     mock_coco_dir = parent_dir / "mock_data" / "coco"
24 | 
25 |     transformer = COCOInstancesTransformer(str(mock_data_dir))
26 | 
27 |     with tempfile.TemporaryDirectory() as tmp_dir:
28 |         transformer.execute(tmp_dir)
29 |         output_file = Path(tmp_dir) / "annotations" / "instances.json"
30 |         expected_file = mock_coco_dir / "annotations" / "instances.json"
31 |         output_image_folder = Path(tmp_dir) / "images"
32 | 
33 |         assert output_file.exists()
34 |         assert output_image_folder.exists()
35 |         assert list(output_image_folder.glob("*"))
36 |         assert_json_equals(expected_file, output_file)
37 | 
38 | 
39 | def test_coco_keypoints_transformer():
40 |     parent_dir = Path(__file__).parent.parent.absolute()
41 |     mock_data_dir = parent_dir / "mock_data" / "simrun_keypoint_dataset"
42 |     mock_coco_dir = parent_dir / "mock_data" / "coco"
43 | 
44 |     transformer = COCOKeypointsTransformer(str(mock_data_dir))
45 | 
46 |     with tempfile.TemporaryDirectory() as tmp_dir:
47 |         transformer.execute(tmp_dir)
48 |         output_file = Path(tmp_dir) / "annotations" / "keypoints.json"
49 |         expected_file = mock_coco_dir / "annotations" / "keypoints.json"
50 |         output_image_folder = Path(tmp_dir) / "images"
51 | 
52 |         assert output_file.exists()
53 |         assert output_image_folder.exists()
54 |         assert list(output_image_folder.glob("*"))
55 |         assert_json_equals(expected_file, output_file)
56 | 


--------------------------------------------------------------------------------
/tests/datasets/test_statistics.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from datasetinsights.stats.statistics import RenderedObjectInfo
 4 | 
 5 | 
 6 | def test_read_filtered_metrics():
 7 |     metrics = pd.DataFrame(
 8 |         {
 9 |             "capture_id": [
10 |                 "",
11 |                 "1231",
12 |                 "1231",
13 |                 "1231",
14 |                 "2324",
15 |                 "323523",
16 |                 "323523",
17 |             ],
18 |             "label_id": [0, 1, 2, 3, 1, 2, 3],
19 |             "label_name": ["", "car", "bike", "child", "car", "bike", "child"],
20 |             "value": [0, 2, 3, 1, 1, 1, 4],
21 |         }
22 |     )
23 |     mappings = {1: "car", 2: "bike", 3: "child"}
24 |     expected = pd.DataFrame(
25 |         {
26 |             "capture_id": ["1231", "1231", "1231", "2324", "323523", "323523"],
27 |             "label_id": [1, 2, 3, 1, 2, 3],
28 |             "label_name": ["car", "bike", "child", "car", "bike", "child"],
29 |             "value": [2, 3, 1, 1, 1, 4],
30 |         }
31 |     )
32 | 
33 |     agg = RenderedObjectInfo._read_filtered_metrics(metrics, mappings)
34 |     agg = agg.reset_index(drop=True)
35 |     pd.testing.assert_frame_equal(agg, expected, check_like=True)
36 | 


--------------------------------------------------------------------------------
/tests/datasets/test_synthetic.py:
--------------------------------------------------------------------------------
 1 | from datasetinsights.datasets.synthetic import read_bounding_box_2d
 2 | from datasetinsights.io.bbox import BBox2D
 3 | 
 4 | 
 5 | def test_read_bounding_box_2d():
 6 |     annotation = [
 7 |         {
 8 |             "instance_id": "...",
 9 |             "label_id": 27,
10 |             "label_name": "car",
11 |             "x": 30,
12 |             "y": 50,
13 |             "width": 100,
14 |             "height": 100,
15 |         }
16 |     ]
17 |     definition = {
18 |         "id": 1243,
19 |         "name": "...",
20 |         "description": "...",
21 |         "format": "JSON",
22 |         "spec": [{"label_id": 27, "label_name": "car"}],
23 |     }
24 |     label_mappings = {
25 |         m["label_id"]: m["label_name"] for m in definition["spec"]
26 |     }
27 |     bbox = read_bounding_box_2d(annotation, label_mappings)
28 | 
29 |     assert bbox == [BBox2D(27, 30, 50, 100, 100)]
30 | 


--------------------------------------------------------------------------------
/tests/mock_data/calib000000.txt:
--------------------------------------------------------------------------------
1 | P0: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 0.000000000000e+00 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.797842000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 4.575831000000e+01 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 -3.454157000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 4.981016000000e-03
4 | P3: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.341081000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 2.330660000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 3.201153000000e-03
5 | R0_rect: 9.999128000000e-01 1.009263000000e-02 -8.511932000000e-03 -1.012729000000e-02 9.999406000000e-01 -4.037671000000e-03 8.470675000000e-03 4.123522000000e-03 9.999556000000e-01
6 | Tr_velo_to_cam: 6.927964000000e-03 -9.999722000000e-01 -2.757829000000e-03 -2.457729000000e-02 -1.162982000000e-03 2.749836000000e-03 -9.999955000000e-01 -6.127237000000e-02 9.999753000000e-01 6.931141000000e-03 -1.143899000000e-03 -3.321029000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 


--------------------------------------------------------------------------------
/tests/mock_data/coco/annotations/instances.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "info": {
 3 |         "description": "COCO compatible Synthetic Dataset"
 4 |     },
 5 |     "licences": [
 6 |         {
 7 |             "url": "",
 8 |             "id": 1,
 9 |             "name": "default"
10 |         }
11 |     ],
12 |     "images": [
13 |         {
14 |             "file_name": "camera_91891091516384550185081373185892902457.png",
15 |             "height": 240,
16 |             "width": 320,
17 |             "id": 91891091516384550185081373185892902457
18 |         }
19 |     ],
20 |     "annotations": [
21 |         {
22 |             "segmentation": [],
23 |             "area": 10000.0,
24 |             "iscrowd": 0,
25 |             "image_id": 91891091516384550185081373185892902457,
26 |             "bbox": [
27 |                 30.0,
28 |                 50.0,
29 |                 100.0,
30 |                 100.0
31 |             ],
32 |             "category_id": 27,
33 |             "id": 244409769007218865362436775986662996774
34 |         },
35 |         {
36 |             "segmentation": [],
37 |             "area": 1000.0,
38 |             "iscrowd": 0,
39 |             "image_id": 91891091516384550185081373185892902457,
40 |             "bbox": [
41 |                 120.0,
42 |                 231.0,
43 |                 50.0,
44 |                 20.0
45 |             ],
46 |             "category_id": 34,
47 |             "id": 328316353567376980370842232520647311162
48 |         },
49 |         {
50 |             "segmentation": [],
51 |             "area": 200.0,
52 |             "iscrowd": 0,
53 |             "image_id": 91891091516384550185081373185892902457,
54 |             "bbox": [
55 |                 132.0,
56 |                 83.0,
57 |                 10.0,
58 |                 20.0
59 |             ],
60 |             "category_id": 25,
61 |             "id": 244577869532652886288531412869200144247
62 |         }
63 |     ],
64 |     "categories": [
65 |         {
66 |             "id": 27,
67 |             "name": "car",
68 |             "supercategory": "default"
69 |         },
70 |         {
71 |             "id": 34,
72 |             "name": "bicycle",
73 |             "supercategory": "default"
74 |         },
75 |         {
76 |             "id": 25,
77 |             "name": "person",
78 |             "supercategory": "default"
79 |         }
80 |     ]
81 | }
82 | 


--------------------------------------------------------------------------------
/tests/mock_data/coco/annotations/keypoints.json:
--------------------------------------------------------------------------------
1 | {"info": {"description": "COCO compatible Synthetic Dataset"}, "licences": [{"url": "", "id": 1, "name": "default"}], "images": [{"file_name": "camera_61855733451949387398181790757513827492.png", "height": 640, "width": 640, "id": 61855733451949387398181790757513827492}, {"file_name": "camera_125709864006893838062514269195103918838.png", "height": 640, "width": 640, "id": 125709864006893838062514269195103918838}], "annotations": [{"segmentation": [], "area": 1035.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [72.0, 0.0, 45.0, 23.0], "keypoints": [442, 66, 1, 438, 61, 1, 441, 60, 1, 429, 59, 1, 439, 53, 2, 417, 75, 1, 442, 63, 2, 419, 92, 1, 468, 52, 2, 434, 112, 1, 489, 53, 2, 414, 119, 2, 423, 119, 2, 435, 145, 2, 425, 152, 2, 425, 185, 2, 396, 181, 2], "num_keypoints": 17, "category_id": 1, "id": 334610669898986761222408873459836779863}, {"segmentation": [], "area": 2064.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [263.0, 0.0, 48.0, 43.0], "keypoints": [570, 308, 2, 573, 304, 2, 567, 304, 2, 577, 304, 2, 562, 302, 2, 587, 318, 2, 557, 321, 2, 608, 327, 2, 549, 338, 2, 617, 339, 2, 550, 358, 2, 579, 353, 2, 567, 354, 2, 578, 390, 2, 591, 378, 2, 565, 410, 2, 594, 413, 2], "num_keypoints": 17, "category_id": 1, "id": 339285116325029240400003306295766986617}, {"segmentation": [], "area": 2240.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [626.0, 18.0, 14.0, 160.0], "keypoints": [308, 236, 2, 312, 236, 2, 304, 235, 2, 319, 245, 2, 296, 242, 2, 325, 266, 2, 288, 268, 2, 342, 267, 2, 259, 270, 2, 329, 250, 2, 265, 250, 2, 317, 320, 2, 292, 320, 2, 319, 375, 2, 283, 374, 2, 323, 418, 2, 281, 419, 2], "num_keypoints": 17, "category_id": 1, "id": 340280744268748444979888511031625297271}, {"segmentation": [], "area": 16830.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [389.0, 44.0, 110.0, 153.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638, 37, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "num_keypoints": 1, "category_id": 1, "id": 333968447710968161883062887274436145021}, {"segmentation": [], "area": 4312.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [97.0, 129.0, 49.0, 88.0], "keypoints": [116, 142, 2, 119, 139, 2, 116, 139, 1, 123, 137, 2, 116, 138, 1, 131, 152, 2, 114, 148, 2, 129, 169, 2, 102, 166, 2, 125, 185, 2, 100, 176, 2, 139, 169, 1, 128, 167, 1, 129, 177, 2, 104, 174, 2, 129, 205, 2, 110, 202, 2], "num_keypoints": 17, "category_id": 1, "id": 333947764713911287440370339802593095505}, {"segmentation": [], "area": 10108.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [546.0, 291.0, 76.0, 133.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 304, 32, 2, 274, 34, 2], "num_keypoints": 2, "category_id": 1, "id": 339595274893044431302040368385860419549}, {"segmentation": [], "area": 18900.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [255.0, 227.0, 90.0, 210.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 13, 2, 112, 12, 2], "num_keypoints": 2, "category_id": 1, "id": 334944599550083874293864550321790824317}, {"segmentation": [], "area": 28400.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [143.0, 228.0, 100.0, 284.0], "keypoints": [174, 258, 2, 182, 250, 2, 174, 251, 2, 199, 247, 2, 178, 248, 1, 224, 281, 2, 174, 285, 2, 232, 320, 2, 163, 327, 2, 215, 362, 2, 150, 367, 2, 222, 372, 2, 190, 373, 1, 209, 432, 2, 165, 427, 1, 220, 493, 2, 173, 495, 1], "num_keypoints": 17, "category_id": 1, "id": 334943387676138995482526044752816615411}, {"segmentation": [], "area": 5900.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [41.0, 0.0, 59.0, 100.0], "keypoints": [62, 235, 2, 47, 224, 1, 48, 223, 2, 34, 225, 1, 34, 225, 2, 25, 287, 1, 23, 303, 2, 11, 375, 1, 9, 401, 2, 28, 448, 1, 39, 486, 2, 26, 425, 1, 29, 423, 2, 63, 566, 1, 46, 577, 2, 0, 0, 0, 0, 0, 0], "num_keypoints": 15, "category_id": 1, "id": 318336697634370290125178793847372611071}, {"segmentation": [], "area": 7743.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [225.0, 95.0, 87.0, 89.0], "keypoints": [267, 102, 1, 266, 100, 1, 269, 100, 1, 265, 100, 1, 272, 101, 1, 261, 111, 2, 277, 111, 2, 246, 109, 2, 291, 109, 2, 234, 108, 2, 304, 106, 2, 262, 131, 2, 271, 133, 2, 259, 147, 2, 267, 157, 2, 257, 167, 2, 264, 178, 2], "num_keypoints": 17, "category_id": 1, "id": 315691618573741103626618763093389048572}, {"segmentation": [], "area": 12152.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [37.0, 265.0, 56.0, 217.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 38, 2, 0, 0, 0, 65, 86, 2, 60, 16, 2], "num_keypoints": 3, "category_id": 1, "id": 339604711293634020202778213587130907389}, {"segmentation": [], "area": 3731.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [305.0, 426.0, 41.0, 91.0], "keypoints": [586, 439, 2, 583, 434, 1, 584, 435, 2, 577, 433, 1, 579, 437, 2, 566, 450, 1, 570, 460, 2, 565, 466, 1, 561, 480, 2, 579, 482, 2, 563, 501, 2, 558, 485, 1, 558, 494, 2, 578, 513, 2, 558, 531, 2, 575, 546, 2, 523, 539, 2], "num_keypoints": 17, "category_id": 1, "id": 336948836902231858787782549365331982589}, {"segmentation": [], "area": 10472.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [515.0, 421.0, 77.0, 136.0], "keypoints": [319, 439, 2, 319, 438, 1, 320, 435, 1, 323, 439, 2, 327, 431, 1, 326, 452, 2, 334, 442, 2, 314, 456, 2, 337, 444, 1, 309, 456, 1, 337, 448, 1, 338, 467, 2, 340, 461, 2, 327, 486, 2, 329, 464, 1, 328, 508, 2, 341, 480, 2], "num_keypoints": 17, "category_id": 1, "id": 296747512066848616081022417879095443452}, {"segmentation": [], "area": 4851.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [312.0, 563.0, 63.0, 77.0], "keypoints": [359, 582, 2, 356, 576, 1, 355, 578, 2, 350, 572, 1, 347, 575, 2, 333, 595, 1, 344, 599, 2, 328, 613, 1, 341, 617, 2, 329, 633, 1, 356, 617, 2, 324, 635, 1, 0, 0, 0, 351, 624, 2, 0, 0, 0, 362, 638, 2, 0, 0, 0], "num_keypoints": 14, "category_id": 1, "id": 340279443083261501468039155580732538109}, {"segmentation": [], "area": 38700.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [0.0, 190.0, 86.0, 450.0], "keypoints": [43, 285, 1, 37, 280, 1, 45, 280, 1, 27, 280, 1, 49, 281, 2, 12, 313, 1, 63, 314, 1, 0, 0, 0, 81, 343, 2, 14, 363, 1, 86, 369, 2, 32, 378, 1, 52, 373, 1, 36, 431, 1, 59, 414, 1, 63, 469, 1, 73, 469, 1], "num_keypoints": 16, "category_id": 1, "id": 273817934909826610959195516144229154047}], "categories": [{"id": 1, "name": "person", "supercategory": "default", "keypoints": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"], "skeleton": [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]}]}
2 | 


--------------------------------------------------------------------------------
/tests/mock_data/coco/images/camera_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/coco/images/camera_001.png


--------------------------------------------------------------------------------
/tests/mock_data/coco/images/camera_125709864006893838062514269195103918838.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/coco/images/camera_125709864006893838062514269195103918838.png


--------------------------------------------------------------------------------
/tests/mock_data/coco/images/camera_61855733451949387398181790757513827492.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/coco/images/camera_61855733451949387398181790757513827492.png


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/annotation_definitions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "annotation_definitions": [
 4 |     {
 5 |       "id": 1,
 6 |       "name": "semantic segmentation",
 7 |       "description": "pixel-wise semantic segmentation label",
 8 |       "format": "PNG",
 9 |       "spec": [
10 |         {"label_id": 8, "label_name": "road", "pixel_value": 0},
11 |         {"label_id": 9, "label_name": "sidewalk", "pixel_value": 1},
12 |         {"label_id": 12, "label_name": "building", "pixel_value": 2},
13 |         {"label_id": 13, "label_name": "wall", "pixel_value": 3},
14 |         {"label_id": 14, "label_name": "fence", "pixel_value": 4},
15 |         {"label_id": 18, "label_name": "pole", "pixel_value": 5},
16 |         {"label_id": 20, "label_name": "traffic light", "pixel_value": 6},
17 |         {"label_id": 21, "label_name": "traffic sign", "pixel_value": 7},
18 |         {"label_id": 22, "label_name": "vegetation", "pixel_value": 8},
19 |         {"label_id": 23, "label_name": "terrain", "pixel_value": 9},
20 |         {"label_id": 24, "label_name": "sky", "pixel_value": 10},
21 |         {"label_id": 25, "label_name": "person", "pixel_value": 11},
22 |         {"label_id": 26, "label_name": "rider", "pixel_value": 12},
23 |         {"label_id": 27, "label_name": "car", "pixel_value": 13},
24 |         {"label_id": 28, "label_name": "truck", "pixel_value": 14},
25 |         {"label_id": 29, "label_name": "bus", "pixel_value": 15},
26 |         {"label_id": 32, "label_name": "train", "pixel_value": 16},
27 |         {"label_id": 33, "label_name": "motorcycle", "pixel_value": 17},
28 |         {"label_id": 34, "label_name": "bicycle", "pixel_value": 18}
29 |       ]
30 |     },
31 |     {
32 |       "id": 2,
33 |       "name": "3d bounding box",
34 |       "description": "3d bounding box annotation of object instances",
35 |       "format": "JSON",
36 |       "spec": [
37 |         {"label_id": 27, "label_name": "car"},
38 |         {"label_id": 34, "label_name": "bicycle"},
39 |         {"label_id": 25, "label_name": "person"}
40 |       ]
41 |     },
42 |     {
43 |       "id": 3,
44 |       "name": "lidar semantic segmention",
45 |       "description": "3d point cloud semantic segmentation",
46 |       "format": "PCD",
47 |       "spec": [
48 |         {"label_id": 27, "label_name": "car", "point_value": 0},
49 |         {"label_id": 34, "label_name": "bicycle", "point_value": 1},
50 |         {"label_id": 25, "label_name": "person", "point_value": 2}
51 |       ]
52 |     },
53 |     {
54 |       "id": 4,
55 |       "name": "2d bounding box",
56 |       "description": "2d bounding box annotation",
57 |       "format": "JSON",
58 |       "spec": [
59 |         {"label_id": 27, "label_name": "car"},
60 |         {"label_id": 34, "label_name": "bicycle"},
61 |         {"label_id": 25, "label_name": "person"}
62 |       ]
63 |     }
64 |   ]
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/captures_000.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "captures": [
 4 |     {
 5 |       "id": "e8b44709-dddf-439d-94d2-975460924903",
 6 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
 7 |       "step": 1,
 8 |       "timestamp": 1,
 9 |       "sensor": {
10 |         "sensor_id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f",
11 |         "ego_id": "4f80234d-4342-420f-9187-07004613cd1f",
12 |         "modality": "camera",
13 |         "translation": [0.2, 1.1, 0.3],
14 |         "rotation": [0.3, 0.2, 0.1, 0.5],
15 |         "camera_intrinsic": [
16 |           [0.1, 0, 0],
17 |           [3.0, 0.1, 0],
18 |           [0.5, 0.45, 1]
19 |         ]
20 |       },
21 |       "ego": {
22 |         "ego_id": 1,
23 |         "translation": [0.02, 0.0, 0.0],
24 |         "rotation": [0.1, 0.1, 0.3, 0.0],
25 |         "velocity": [0.1, 0.1, 0.0],
26 |         "acceleration": null
27 |       },
28 |       "filename": "captures/camera_000.png",
29 |       "format": "PNG",
30 |       "annotations": [
31 |       ]
32 |     }
33 |   ]
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/captures_001.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "captures": [
 4 |     {
 5 |       "id": "4521949a-2a71-4c03-beb0-4f6362676639",
 6 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
 7 |       "step": 2,
 8 |       "timestamp": 2,
 9 |       "sensor": {
10 |         "sensor_id": 1,
11 |         "ego_id": 1,
12 |         "modality": "camera",
13 |         "translation": [0.2, 1.1, 0.3],
14 |         "rotation": [0.3, 0.2, 0.1, 0.5],
15 |         "camera_intrinsic": [
16 |           [0.1, 0, 0],
17 |           [3.0, 0.1, 0],
18 |           [0.5, 0.45, 1]
19 |         ]
20 |       },
21 |       "ego": {
22 |         "ego_id": 1,
23 |         "translation": [0.12, 0.1, 0.0],
24 |         "rotation": [0.0, 0.15, 0.24, 0.0],
25 |         "velocity": [0.0, 0.0, 0.0],
26 |         "acceleration": null
27 |       },
28 |       "filename": "captures/camera_001.png",
29 |       "format": "PNG",
30 |       "annotations": [
31 |       ]
32 |     },
33 |     {
34 |       "id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75",
35 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
36 |       "step": 2,
37 |       "timestamp": 2,
38 |       "sensor": {
39 |         "sensor_id": 2,
40 |         "ego_id": 1,
41 |         "modality": "lidar",
42 |         "translation": [0.0, 0.0, 0.0],
43 |         "rotation": [0.0, 0.0, 0.0, 0.0],
44 |         "camera_intrinsic": null
45 |       },
46 |       "ego": {
47 |         "ego_id": 1,
48 |         "translation": [0.12, 0.1, 0.0],
49 |         "rotation": [0.0, 0.15, 0.24, 0.0],
50 |         "velocity": [0.0, 0.0, 0.0],
51 |         "acceleration": null
52 |       },
53 |       "filename": "captures/lidar_000.pcd",
54 |       "format": "PCD",
55 |       "annotations": [
56 |       ]
57 |     }
58 |   ]
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/egos.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "egos": [
 4 |     {
 5 |       "id": "4f80234d-4342-420f-9187-07004613cd1f",
 6 |       "description": "the main car driving in simulation"
 7 |     }
 8 |   ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/metric_definitions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "metric_definitions": [
 4 |     {
 5 |       "id": 1,
 6 |       "name": "object count",
 7 |       "description": "count number of objects observed",
 8 |       "spec": [
 9 |         {"label_id": 27, "label_name": "car"},
10 |         {"label_id": 34, "label_name": "bicycle"},
11 |         {"label_id": 25, "label_name": "person"}
12 |       ]
13 |     },
14 |     {
15 |       "id": 2,
16 |       "name": "visible pixel",
17 |       "description": "visible pixel",
18 |       "spec": [
19 |         {"label_id": 21, "label_name": "watch"},
20 |         {"label_id": 28, "label_name": "book"}
21 |       ]
22 |     }
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/metrics_000.json:
--------------------------------------------------------------------------------
1 | {
2 |   "version": "0.0.1",
3 |   "metrics": [
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/tests/mock_data/no_annotations_or_metrics/Dataset/sensors.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "sensors": [
 4 |     {
 5 |       "id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f",
 6 |       "ego_id": "4f80234d-4342-420f-9187-07004613cd1f",
 7 |       "modality": "camera",
 8 |       "description": "Point Grey Flea 2 (FL2-14S3M-C)"
 9 |     },
10 |     {
11 |       "id": "6fb1a823-5b83-4a79-b566-fe4435ec1942",
12 |       "ego_id": "4f80234d-4342-420f-9187-07004613cd1f",
13 |       "modality": "lidar",
14 |       "description": "Velodyne HDL-64E"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/annotation_definitions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "annotation_definitions": [
 4 |     {
 5 |       "id": "1",
 6 |       "name": "semantic segmentation",
 7 |       "description": "pixel-wise semantic segmentation label",
 8 |       "format": "PNG",
 9 |       "spec": [
10 |         {"label_id": 8, "label_name": "road", "pixel_value": 0},
11 |         {"label_id": 9, "label_name": "sidewalk", "pixel_value": 1},
12 |         {"label_id": 12, "label_name": "building", "pixel_value": 2},
13 |         {"label_id": 13, "label_name": "wall", "pixel_value": 3},
14 |         {"label_id": 14, "label_name": "fence", "pixel_value": 4},
15 |         {"label_id": 18, "label_name": "pole", "pixel_value": 5},
16 |         {"label_id": 20, "label_name": "traffic light", "pixel_value": 6},
17 |         {"label_id": 21, "label_name": "traffic sign", "pixel_value": 7},
18 |         {"label_id": 22, "label_name": "vegetation", "pixel_value": 8},
19 |         {"label_id": 23, "label_name": "terrain", "pixel_value": 9},
20 |         {"label_id": 24, "label_name": "sky", "pixel_value": 10},
21 |         {"label_id": 25, "label_name": "person", "pixel_value": 11},
22 |         {"label_id": 26, "label_name": "rider", "pixel_value": 12},
23 |         {"label_id": 27, "label_name": "car", "pixel_value": 13},
24 |         {"label_id": 28, "label_name": "truck", "pixel_value": 14},
25 |         {"label_id": 29, "label_name": "bus", "pixel_value": 15},
26 |         {"label_id": 32, "label_name": "train", "pixel_value": 16},
27 |         {"label_id": 33, "label_name": "motorcycle", "pixel_value": 17},
28 |         {"label_id": 34, "label_name": "bicycle", "pixel_value": 18}
29 |       ]
30 |     },
31 |     {
32 |       "id": "2",
33 |       "name": "3d bounding box",
34 |       "description": "3d bounding box annotation of object instances",
35 |       "format": "JSON",
36 |       "spec": [
37 |         {"label_id": 27, "label_name": "car"},
38 |         {"label_id": 34, "label_name": "bicycle"},
39 |         {"label_id": 25, "label_name": "person"}
40 |       ]
41 |     },
42 |     {
43 |       "id": "3",
44 |       "name": "lidar semantic segmention",
45 |       "description": "3d point cloud semantic segmentation",
46 |       "format": "PCD",
47 |       "spec": [
48 |         {"label_id": 27, "label_name": "car", "point_value": 0},
49 |         {"label_id": 34, "label_name": "bicycle", "point_value": 1},
50 |         {"label_id": 25, "label_name": "person", "point_value": 2}
51 |       ]
52 |     },
53 |     {
54 |       "id": "4",
55 |       "name": "2d bounding box",
56 |       "description": "2d bounding box annotation",
57 |       "format": "JSON",
58 |       "spec": [
59 |         {"label_id": 27, "label_name": "car"},
60 |         {"label_id": 34, "label_name": "bicycle"},
61 |         {"label_id": 25, "label_name": "person"}
62 |       ]
63 |     }
64 |   ]
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/captures_000.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "captures": [
 4 |     {
 5 |       "id": "e8b44709-dddf-439d-94d2-975460924903",
 6 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
 7 |       "step": 1,
 8 |       "timestamp": 1,
 9 |       "sensor": {
10 |         "sensor_id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f",
11 |         "ego_id": "4f80234d-4342-420f-9187-07004613cd1f",
12 |         "modality": "camera",
13 |         "translation": [0.2, 1.1, 0.3],
14 |         "rotation": [0.3, 0.2, 0.1, 0.5],
15 |         "camera_intrinsic": [
16 |           [0.1, 0, 0],
17 |           [3.0, 0.1, 0],
18 |           [0.5, 0.45, 1]
19 |         ]
20 |       },
21 |       "ego": {
22 |         "ego_id": 1,
23 |         "translation": [0.02, 0.0, 0.0],
24 |         "rotation": [0.1, 0.1, 0.3, 0.0],
25 |         "velocity": [0.1, 0.1, 0.0],
26 |         "acceleration": null
27 |       },
28 |       "filename": "captures/camera_000.png",
29 |       "format": "PNG",
30 |       "annotations": [
31 |         {
32 |           "id": "35cbdf6e-96e5-446e-852d-fe40be79ce77",
33 |           "annotation_definition": "1",
34 |           "filename": "annotations/semantic_segmentation_000.png",
35 |           "values": null
36 |         }
37 |       ]
38 |     }
39 |   ]
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/captures_001.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "version": "0.0.1",
  3 |   "captures": [
  4 |     {
  5 |       "id": "4521949a-2a71-4c03-beb0-4f6362676639",
  6 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
  7 |       "step": 2,
  8 |       "timestamp": 2,
  9 |       "sensor": {
 10 |         "sensor_id": 1,
 11 |         "ego_id": 1,
 12 |         "modality": "camera",
 13 |         "translation": [0.2, 1.1, 0.3],
 14 |         "rotation": [0.3, 0.2, 0.1, 0.5],
 15 |         "camera_intrinsic": [
 16 |           [0.1, 0, 0],
 17 |           [3.0, 0.1, 0],
 18 |           [0.5, 0.45, 1]
 19 |         ]
 20 |       },
 21 |       "ego": {
 22 |         "ego_id": 1,
 23 |         "translation": [0.12, 0.1, 0.0],
 24 |         "rotation": [0.0, 0.15, 0.24, 0.0],
 25 |         "velocity": [0.0, 0.0, 0.0],
 26 |         "acceleration": null
 27 |       },
 28 |       "filename": "captures/camera_001.png",
 29 |       "format": "PNG",
 30 |       "annotations": [
 31 |         {
 32 |           "id": "a79ab4fb-acf3-47ad-8a6f-20af795e23e1",
 33 |           "annotation_definition": "1",
 34 |           "filename": "annotations/semantic_segmentation_001.png",
 35 |           "values": null
 36 |         },
 37 |         {
 38 |           "id": "36db01f8-e322-4c81-a650-bec89a7e6100",
 39 |           "annotation_definition": "2",
 40 |           "filename": null,
 41 |           "values": [
 42 |             {
 43 |               "instance_id": "85149ab1-3b75-443b-8540-773b31559a26",
 44 |               "label_id": 27,
 45 |               "label_name": "car",
 46 |               "translation": [24.0, 12.1, 0.0],
 47 |               "size": [2.0, 3.0, 1.0],
 48 |               "rotation": [0.0, 1.0, 2.0, 0.0],
 49 |               "velocity": [0.5, 0.0, 0.0],
 50 |               "acceleration": null
 51 |             },
 52 |             {
 53 |               "instance_id": "f2e56dad-9bfd-4930-9dca-bfe08672de3a",
 54 |               "label_id": 34,
 55 |               "label_name": "bicycle",
 56 |               "translation": [5.2, 7.9, 0.0],
 57 |               "size": [0.3, 0.5, 1.0],
 58 |               "rotation": [0.0, 1.0, 2.0, 0.0],
 59 |               "velocity": [0.0, 0.1, 0.0],
 60 |               "acceleration": null
 61 |             },
 62 |             {
 63 |               "instance_id": "a52dfb48-e5a4-4008-96b6-80da91caa777",
 64 |               "label_id": 25,
 65 |               "label_name": "person",
 66 |               "translation": [41.2, 1.5, 0.0],
 67 |               "size": [0.3, 0.3, 1.8],
 68 |               "rotation": [0.0, 1.0, 2.0, 0.0],
 69 |               "velocity": [0.05, 0.0, 0.0],
 70 |               "acceleration": null
 71 |             }
 72 |           ]
 73 |         },
 74 |         {
 75 |           "id": "36db01f8-e322-4c81-a650-bec89a7e6100",
 76 |           "annotation_definition": "4",
 77 |           "filename": null,
 78 |           "values": [
 79 |             {
 80 |               "instance_id": "85149ab1-3b75-443b-8540-773b31559a26",
 81 |               "label_id": 27,
 82 |               "label_name": "car",
 83 |               "x": 30.0,
 84 |               "y": 50.0,
 85 |               "width": 100.0,
 86 |               "height": 100.0
 87 |             },
 88 |             {
 89 |               "instance_id": "f2e56dad-9bfd-4930-9dca-bfe08672de3a",
 90 |               "label_id": 34,
 91 |               "label_name": "bicycle",
 92 |               "x": 120.0,
 93 |               "y": 231.0,
 94 |               "width": 50.0,
 95 |               "height": 20.0
 96 |             },
 97 |             {
 98 |               "instance_id": "a52dfb48-e5a4-4008-96b6-80da91caa777",
 99 |               "label_id": 25,
100 |               "label_name": "person",
101 |               "x": 132.0,
102 |               "y": 83.0,
103 |               "width": 10.0,
104 |               "height": 20.0
105 |             }
106 |           ]
107 |         }
108 |       ]
109 |     },
110 |     {
111 |       "id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75",
112 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
113 |       "step": 2,
114 |       "timestamp": 2,
115 |       "sensor": {
116 |         "sensor_id": 2,
117 |         "ego_id": 1,
118 |         "modality": "lidar",
119 |         "translation": [0.0, 0.0, 0.0],
120 |         "rotation": [0.0, 0.0, 0.0, 0.0],
121 |         "camera_intrinsic": null
122 |       },
123 |       "ego": {
124 |         "ego_id": 1,
125 |         "translation": [0.12, 0.1, 0.0],
126 |         "rotation": [0.0, 0.15, 0.24, 0.0],
127 |         "velocity": [0.0, 0.0, 0.0],
128 |         "acceleration": null
129 |       },
130 |       "filename": "captures/lidar_000.pcd",
131 |       "format": "PCD",
132 |       "annotations": [
133 |         {
134 |           "id": "3b7b2af7-4d9f-4f1d-a9f5-32365c5896c8",
135 |           "annotation_definition": "3",
136 |           "filename": "annotations/lidar_semantic_segmentation_000.pcd"
137 |         }
138 |       ]
139 |     }
140 |   ]
141 | }
142 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/egos.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "egos": [
 4 |     {
 5 |       "id": "4f80234d-4342-420f-9187-07004613cd1f",
 6 |       "description": "the main car driving in simulation"
 7 |     }
 8 |   ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/metric_definitions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "metric_definitions": [
 4 |     {
 5 |       "id": 1,
 6 |       "name": "object count",
 7 |       "description": "count number of objects observed",
 8 |       "spec": [
 9 |         {"label_id": 27, "label_name": "car"},
10 |         {"label_id": 34, "label_name": "bicycle"},
11 |         {"label_id": 25, "label_name": "person"}
12 |       ]
13 |     },
14 |     {
15 |       "id": 2,
16 |       "name": "visible pixel",
17 |       "description": "visible pixel",
18 |       "spec": [
19 |         {"label_id": 21, "label_name": "watch"},
20 |         {"label_id": 28, "label_name": "book"}
21 |       ]
22 |     }
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/metrics_000.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "metrics": [
 4 |     {
 5 |       "capture_id": "e8b44709-dddf-439d-94d2-975460924903",
 6 |       "annotation_id": null,
 7 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
 8 |       "step": 1,
 9 |       "metric_definition": 1,
10 |       "values": [
11 |         {"label_id": 27, "label_name": "car", "count": 5},
12 |         {"label_id": 34, "label_name": "bicycle", "count": 1},
13 |         {"label_id": 25, "label_name": "person", "count": 7}
14 |       ]
15 |     },
16 |     {
17 |       "capture_id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75",
18 |       "annotation_id": "35cbdf6e-96e5-446e-852d-fe40be79ce77",
19 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
20 |       "step": 1,
21 |       "metric_definition": 1,
22 |       "values": [
23 |         {"label_id": 27, "label_name": "car", "count": 3},
24 |         {"label_id": 25, "label_name": "person", "count": 2}
25 |       ]
26 |     },
27 |     {
28 |       "capture_id": "3d09bbce-7f7b-4d9c-8c8a-2f75158e0c8e",
29 |       "annotation_id": null,
30 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
31 |       "step": 1,
32 |       "metric_definition": 1,
33 |       "values": [
34 |         {"label_id": 27, "label_name": "car", "count": 1},
35 |         {"label_id": 34, "label_name": "bicycle", "count": 2},
36 |         {"label_id": 25, "label_name": "person", "count": 2}
37 |       ]
38 |     },
39 |     {
40 |       "capture_id": "3d09bbce-7f7b-4d9c-8c8a-2f75158e0c8e",
41 |       "annotation_id": null,
42 |       "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b",
43 |       "step": 1,
44 |       "metric_definition": 2,
45 |       "values": [
46 |         {"label_id": 21, "visible_pixels": 1},
47 |         {"label_id": 28, "visible_pixels": 2}
48 |       ]
49 |     }
50 |   ]
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/Dataset/sensors.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "sensors": [
 4 |     {
 5 |       "id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f",
 6 |       "ego_id": "4f80234d-4342-420f-9187-07004613cd1f",
 7 |       "modality": "camera",
 8 |       "description": "Point Grey Flea 2 (FL2-14S3M-C)"
 9 |     },
10 |     {
11 |       "id": "6fb1a823-5b83-4a79-b566-fe4435ec1942",
12 |       "ego_id": "4f80234d-4342-420f-9187-07004613cd1f",
13 |       "modality": "lidar",
14 |       "description": "Velodyne HDL-64E"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/README.md:
--------------------------------------------------------------------------------
 1 | Mockup of Synthetic Dataset
 2 | 
 3 | This is a mock dataset that is created according to this schema [design](https://docs.google.com/document/d/1lKPm06z09uX9gZIbmBUMO6WKlIGXiv3hgXb_taPOnU0)
 4 | 
 5 | Included in this mockup:
 6 | 
 7 | - 1 ego car
 8 | - 2 sensors: 1 camera and 1 LIDAR
 9 | - 19 labels
10 | - 3 captures, 2 metrics, 1 sequence, 2 steps
11 |     - the first includes 1 camera capture and 1 semantic segmentation annotation.
12 |     - two captures, 1 camera capture and 1 LIDAR capture, are triggered at the same time. For the camera, semantic segmentation, instance segmentation and 3d bounding box annotations are provided. For the LIDAR sensor, semantic segmentation annotation of point cloud is included.
13 |     - one of the metric event is emitted for metrics at capture level. The other one is emitted at annotation level.
14 | - 4 types of annotations: semantic segmentation, 2d bounding box, 3d bounding box and LIDAR semantic segmentation.
15 | - 1 type of metric: object count
16 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun/annotations/instance_segmantation_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/instance_segmantation_000.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun/annotations/lidar_semantic_segmentation_000.pcd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/lidar_semantic_segmentation_000.pcd


--------------------------------------------------------------------------------
/tests/mock_data/simrun/annotations/sementic_segmantation_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/sementic_segmantation_000.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun/annotations/sementic_segmantation_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/sementic_segmantation_001.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun/captures/camera_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/captures/camera_000.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun/captures/camera_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/captures/camera_001.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun/captures/lidar_000.pcd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/captures/lidar_000.pcd


--------------------------------------------------------------------------------
/tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_000.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_001.png


--------------------------------------------------------------------------------
/tests/mock_data/simrun_keypoint_dataset/egos.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "egos": [
 4 |     {
 5 |       "id": "f20cb747-f561-4963-8171-f699a0aadb3c",
 6 |       "description": ""
 7 |     }
 8 |   ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun_keypoint_dataset/metric_definitions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "metric_definitions": [
 4 |     {
 5 |       "id": "db1b258e-d1d0-41b6-8751-16f601a2e230",
 6 |       "name": "scenario_iteration",
 7 |       "description": "Iteration information for dataset sequences"
 8 |     },
 9 |     {
10 |       "id": "c0b5e272-9715-4ea2-930e-cfe8cecf1b6e",
11 |       "name": "Light position",
12 |       "description": "The world-space position of the light"
13 |     },
14 |     {
15 |       "id": "317735b9-b4a4-4f6b-a4c9-eb846463a583",
16 |       "name": "Light rotation",
17 |       "description": "The world-space rotation of the light"
18 |     },
19 |     {
20 |       "id": "1a709e09-81bd-43b5-b8f0-f3952a1af444",
21 |       "name": "Light intensity",
22 |       "description": "The intensity of the light"
23 |     },
24 |     {
25 |       "id": "a640e390-fa13-4bb0-b2cd-1e0cb3f43eb9",
26 |       "name": "Light color",
27 |       "description": "The color of the light"
28 |     },
29 |     {
30 |       "id": "1529faeb-863f-40c2-840f-5fe4221c1065",
31 |       "name": "Camera position",
32 |       "description": "The world-space position of the camera"
33 |     },
34 |     {
35 |       "id": "5199deef-2eb0-42fe-b00d-1d2418aedaff",
36 |       "name": "Camera rotation",
37 |       "description": "The world-space rotation of the camera"
38 |     },
39 |     {
40 |       "id": "42e7fa88-084b-423d-ba6e-830c711383e1",
41 |       "name": "Camera field of view",
42 |       "description": "The field of view of the camera"
43 |     },
44 |     {
45 |       "id": "11aa1dfc-3495-467c-a998-71d9bfe6980e",
46 |       "name": "Camera focal length",
47 |       "description": "The focal length of the camera"
48 |     },
49 |     {
50 |       "id": "14adb394-46c0-47e8-a3f0-99e754483b76",
51 |       "name": "random-seed",
52 |       "description": "The random seed used to initialize the random state of the simulation. Only triggered once per simulation."
53 |     },
54 |     {
55 |       "id": "51da3c27-369d-4929-aea6-d01614635ce2",
56 |       "name": "object count",
57 |       "description": "Counts of objects for each label in the sensor's view",
58 |       "spec": [
59 |         {
60 |           "label_id": 1,
61 |           "label_name": "person"
62 |         }
63 |       ]
64 |     },
65 |     {
66 |       "id": "5ba92024-b3b7-41a7-9d3f-c03a6a8ddd01",
67 |       "name": "rendered object info",
68 |       "description": "Information about each labeled object visible to the sensor",
69 |       "spec": [
70 |         {
71 |           "label_id": 1,
72 |           "label_name": "person"
73 |         }
74 |       ]
75 |     }
76 |   ]
77 | }
78 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun_keypoint_dataset/sensors.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "sensors": [
 4 |     {
 5 |       "id": "f4644cfe-4219-4936-a686-0e0fbe5b6559",
 6 |       "ego_id": "f20cb747-f561-4963-8171-f699a0aadb3c",
 7 |       "modality": "camera",
 8 |       "description": ""
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/mock_data/simrun_manifest.csv:
--------------------------------------------------------------------------------
 1 | "run_execution_id","app_param_id","instance_id","attempt_id","file_name","download_uri"
 2 | "simrun","18cWX0n","1","1","Annotations/lidar_semantic_segmentation_000.pcd","https://mock.url/Annotations/lidar_semantic_segmentation_000.pcd"
 3 | "simrun","18cWX0n","1","1","Annotations/sementic_segmantation_000.png","https://mock.url/Annotations/sementic_segmantation_000.png"
 4 | "simrun","18cWX0n","1","1","Annotations/sementic_segmantation_001.png","https://mock.url/Annotations/sementic_segmantation_001.png"
 5 | "simrun","18cWX0n","1","1","Captures/camera_000.png","https://mock.url/Captures/camera_000.png"
 6 | "simrun","18cWX0n","1","1","Captures/camera_001.png","https://mock.url/Captures/camera_001.png"
 7 | "simrun","18cWX0n","1","1","Captures/lidar_000.pcd","https://mock.url/Captures/lidar_000.pcd"
 8 | "simrun","18cWX0n","1","1","Dataset/captures_000.json","https://mock.url/Dataset/captures_000.json"
 9 | "simrun","18cWX0n","1","1","Dataset/captures_001.json","https://mock.url/Dataset/captures_001.json"
10 | "simrun","18cWX0n","1","1","Dataset/metrics_000.json","https://mock.url/Dataset/metrics_000.json"
11 | "simrun","18cWX0n","1","1","Dataset/annotation_definitions.json","https://mock.url/Dataset/annotation_definitions.json"
12 | "simrun","18cWX0n","1","1","Dataset/metric_definitions.json","https://mock.url/Dataset/metric_definitions.json"
13 | "simrun","18cWX0n","1","1","Dataset/egos.json","https://mock.url/Dataset/egos.json"
14 | "simrun","18cWX0n","1","1","Dataset/sensors.json","https://mock.url/Dataset/sensors.json"
15 | 


--------------------------------------------------------------------------------
/tests/test_bbox.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from datasetinsights.io.bbox import BBox2D, BBox3D, group_bbox2d_per_label
 4 | from datasetinsights.stats.visualization.bbox3d_plot import (
 5 |     _project_pt_to_pixel_location,
 6 |     _project_pt_to_pixel_location_orthographic,
 7 | )
 8 | 
 9 | 
10 | def test_group_bbox2d_per_label():
11 |     count1, count2 = 10, 11
12 |     bbox1 = BBox2D(label="car", x=1, y=1, w=2, h=3)
13 |     bbox2 = BBox2D(label="pedestrian", x=7, y=6, w=3, h=4)
14 |     bboxes = []
15 |     bboxes.extend([bbox1] * count1)
16 |     bboxes.extend([bbox2] * count2)
17 |     bboxes_per_label = group_bbox2d_per_label(bboxes)
18 |     assert len(bboxes_per_label["car"]) == count1
19 |     assert len(bboxes_per_label["pedestrian"]) == count2
20 | 
21 | 
22 | def test_group_bbox3d():
23 |     bbox = BBox3D(
24 |         label="na", sample_token=0, translation=[0, 0, 0], size=[5, 5, 5]
25 |     )
26 |     flb = bbox.front_left_bottom_pt
27 |     frb = bbox.front_right_bottom_pt
28 |     flt = bbox.front_left_top_pt
29 |     frt = bbox.front_right_top_pt
30 | 
31 |     blb = bbox.back_left_bottom_pt
32 |     brb = bbox.back_right_bottom_pt
33 |     blt = bbox.back_left_top_pt
34 |     brt = bbox.back_right_top_pt
35 | 
36 |     assert flb[0] == flt[0] == blb[0] == blt[0] == -2.5
37 |     assert frb[0] == frt[0] == brb[0] == brt[0] == 2.5
38 | 
39 |     assert flt[1] == frt[1] == blt[1] == brt[1] == 2.5
40 |     assert flb[1] == frb[1] == blb[1] == brb[1] == -2.5
41 | 
42 |     assert flt[2] == flb[2] == frt[2] == frb[2] == 2.5
43 |     assert blt[2] == blb[2] == brt[2] == brb[2] == -2.5
44 | 
45 | 
46 | def test_project_pt_to_pixel_location():
47 |     pt = [0, 0, 0]
48 |     proj = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
49 |     img_height = 480
50 |     img_width = 640
51 | 
52 |     pixel_loc = _project_pt_to_pixel_location(pt, proj, img_height, img_width)
53 |     assert pixel_loc[0] == 320
54 |     assert pixel_loc[1] == 240
55 | 
56 |     # more interesting case
57 |     pt = [0, 0, 70]
58 |     proj = numpy.array([[1.299038, 0, 0], [0, 1.7320, 0], [0, 0, -1.0006]])
59 | 
60 |     pixel_loc = _project_pt_to_pixel_location(pt, proj, img_height, img_width)
61 |     assert pixel_loc[0] == 320
62 |     assert pixel_loc[1] == 240
63 | 
64 | 
65 | def test_project_pt_to_pixel_location_orthographic():
66 |     pt = [0, 0, 0]
67 |     proj = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
68 |     img_height = 480
69 |     img_width = 640
70 | 
71 |     pixel_loc = _project_pt_to_pixel_location_orthographic(
72 |         pt, proj, img_height, img_width
73 |     )
74 |     assert pixel_loc[0] == 320
75 |     assert pixel_loc[1] == 240
76 | 
77 |     # more interesting case
78 |     pt = [0.3, 0, 0]
79 |     proj = numpy.array([[0.08951352, 0, 0], [0, 0.2, 0], [0, 0, -0.0020006]])
80 | 
81 |     pixel_loc = _project_pt_to_pixel_location_orthographic(
82 |         pt, proj, img_height, img_width
83 |     )
84 |     assert pixel_loc[0] == int(
85 |         (proj[0][0] * pt[0] + 1) * 0.5 * img_width
86 |     )  # 328
87 |     assert pixel_loc[1] == img_height // 2
88 | 


--------------------------------------------------------------------------------
/tests/test_create_downloader.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from datasetinsights.io.downloader.base import create_dataset_downloader
 4 | from datasetinsights.io.downloader.http_downloader import HTTPDatasetDownloader
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "source_uri",
 9 |     ["http://", "https://"],
10 | )
11 | def test_create_dataset_downloader_http_downloader(source_uri):
12 | 
13 |     # act
14 |     downloader = create_dataset_downloader(source_uri=source_uri)
15 | 
16 |     # assert
17 |     assert isinstance(downloader, HTTPDatasetDownloader)
18 | 
19 | 
20 | def test_create_dataset_downloader_invalid_input():
21 |     # arrange
22 |     source_uri = "invalid_protocol://"
23 |     # assert
24 |     with pytest.raises(ValueError):
25 |         # act
26 |         create_dataset_downloader(source_uri=source_uri)
27 | 
28 | 
29 | def test_create_dataset_downloader_none_input():
30 |     # arrange
31 |     source_uri = None
32 |     # assert
33 |     with pytest.raises(TypeError):
34 |         # act
35 |         create_dataset_downloader(source_uri=source_uri)
36 | 


--------------------------------------------------------------------------------
/tests/test_dashboard.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from datasetinsights.stats.visualization.object_detection import ScaleFactor
 4 | 
 5 | 
 6 | def test_generate_scale_data():
 7 |     captures = [
 8 |         {
 9 |             "id": "4521949a- 2a71-4c03-beb0-4f6362676639",
10 |             "sensor": {"scale": 1.0},
11 |         },
12 |         {
13 |             "id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75",
14 |             "sensor": {"scale": 2.0},
15 |         },
16 |     ]
17 | 
18 |     captures = pd.DataFrame(captures)
19 |     actual_scale = ScaleFactor.generate_scale_data(captures)
20 |     expected_scale = pd.DataFrame([1.0, 2.0], columns=["scale"])
21 |     pd.testing.assert_frame_equal(expected_scale, actual_scale)
22 | 


--------------------------------------------------------------------------------
/tests/test_download_command.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | from click.exceptions import BadParameter
 5 | from click.testing import CliRunner
 6 | 
 7 | from datasetinsights.commands.download import SourceURI, cli
 8 | 
 9 | 
10 | def test_source_uri_validation():
11 |     validate_source_uri = SourceURI()
12 | 
13 |     gcs_path = "gs://bucket/path/to/folder"
14 |     usim_path = "usim://auth@project_id/abdde"
15 |     http_path = "http://domain/file.zip"
16 |     https_path = "https://domain/file.zip"
17 | 
18 |     assert validate_source_uri(gcs_path) == gcs_path
19 |     assert validate_source_uri(usim_path) == usim_path
20 |     assert validate_source_uri(http_path) == http_path
21 |     assert validate_source_uri(https_path) == https_path
22 | 
23 |     with pytest.raises(BadParameter):
24 |         validate_source_uri("s3://bucket/file")
25 |         validate_source_uri("/path/to/file")
26 |         validate_source_uri("dasdklsdk")
27 |         validate_source_uri("")
28 | 
29 | 
30 | @pytest.mark.parametrize(
31 |     "args",
32 |     [
33 |         ["download", "--source-uri=usim://", "--output=tests/"],
34 |         ["download", "--source-uri=http://", "--output=tests/"],
35 |         ["download", "--source-uri=https://", "--output=tests/"],
36 |         ["download", "--source-uri=gs://", "--output=tests/"],
37 |     ],
38 | )
39 | @patch("datasetinsights.commands.download.create_dataset_downloader")
40 | def test_download_except_called_once(mock_create, args):
41 |     # arrange
42 |     runner = CliRunner()
43 |     # act
44 |     runner.invoke(cli, args)
45 |     # assert
46 |     mock_create.assert_called_once()
47 |     mock_create.return_value.download.assert_called_once()
48 | 
49 | 
50 | @pytest.mark.parametrize(
51 |     "args",
52 |     [["download"], ["download", "--source-uri=s3://"]],
53 | )
54 | @patch("datasetinsights.commands.download.create_dataset_downloader")
55 | def test_download_except_not_called(mock_create, args):
56 |     # arrange
57 |     runner = CliRunner()
58 |     # act
59 |     runner.invoke(cli, args)
60 |     # assert
61 |     mock_create.assert_not_called()
62 |     mock_create.return_value.download.assert_not_called()
63 | 


--------------------------------------------------------------------------------
/tests/test_http_downloader.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | 
 5 | from datasetinsights.io.downloader.http_downloader import HTTPDatasetDownloader
 6 | from datasetinsights.io.exceptions import ChecksumError
 7 | 
 8 | 
 9 | @patch("datasetinsights.io.downloader.http_downloader.download_file")
10 | def test_download_without_checksum(mock_download_file):
11 |     # arrange
12 |     source_uri = "http://some/path"
13 |     output = "/some/path/"
14 |     downloader = HTTPDatasetDownloader()
15 | 
16 |     # act
17 |     downloader.download(source_uri=source_uri, output=output)
18 | 
19 |     # assert
20 |     mock_download_file.assert_called_once()
21 | 
22 | 
23 | @patch("datasetinsights.io.downloader.http_downloader.download_file")
24 | @patch("datasetinsights.io.downloader.http_downloader.validate_checksum")
25 | @patch("datasetinsights.io.downloader.http_downloader.get_checksum_from_file")
26 | def test_download_with_checksum(
27 |     mock_get_checksum_from_file,
28 |     mock_validate_check_sum,
29 |     mock_download_file,
30 | ):
31 |     # arrange
32 |     source_uri = "http://some/path"
33 |     checksum_file = "/some/checksum_file.txt"
34 |     output = "/some/path/"
35 |     downloader = HTTPDatasetDownloader()
36 | 
37 |     # act
38 |     downloader.download(
39 |         source_uri=source_uri, output=output, checksum_file=checksum_file
40 |     )
41 | 
42 |     # assert
43 |     mock_download_file.assert_called_once()
44 |     mock_get_checksum_from_file.assert_called_once()
45 |     mock_validate_check_sum.assert_called_once()
46 | 
47 | 
48 | @patch("os.remove")
49 | @patch("datasetinsights.io.downloader.http_downloader.download_file")
50 | @patch("datasetinsights.io.downloader.http_downloader.validate_checksum")
51 | @patch("datasetinsights.io.downloader.http_downloader.get_checksum_from_file")
52 | def test_download_with_wrong_checksum(
53 |     mock_get_checksum_from_file,
54 |     mock_validate_checksum,
55 |     mock_download_file,
56 |     mock_remove,
57 | ):
58 |     # arrange
59 |     mock_validate_checksum.side_effect = ChecksumError
60 |     output = "/some/path"
61 |     source_uri = "http://some/path"
62 |     checksum_file = "/some/checksum_file.txt"
63 |     downloader = HTTPDatasetDownloader()
64 | 
65 |     # act
66 |     with pytest.raises(ChecksumError):
67 |         downloader.download(
68 |             source_uri=source_uri, output=output, checksum_file=checksum_file
69 |         )
70 | 
71 |     # assert
72 |     mock_get_checksum_from_file.assert_called_once()
73 |     mock_download_file.assert_called_once()
74 |     mock_remove.assert_called_once()
75 | 


--------------------------------------------------------------------------------
/tests/test_image_analysis.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import json
 3 | import pathlib
 4 | 
 5 | import numpy as np
 6 | 
 7 | from datasetinsights.stats.image_analysis import (
 8 |     get_average_psd_1d,
 9 |     get_bbox_fg_bg_var_laplacian,
10 |     get_final_mask,
11 |     get_psd2d,
12 |     get_seg_fg_bg_var_laplacian,
13 |     get_wt_coeffs_var,
14 |     laplacian_img,
15 | )
16 | 
17 | 
18 | def test_get_bbox_fg_bg_var_laplacian():
19 |     cur_dir = pathlib.Path(__file__).parent.absolute()
20 |     img_path = str(
21 |         cur_dir
22 |         / "mock_data"
23 |         / "coco"
24 |         / "images"
25 |         / "camera_61855733451949387398181790757513827492.png"
26 |     )
27 |     ann_path = str(
28 |         cur_dir / "mock_data" / "coco" / "annotations" / "keypoints.json"
29 |     )
30 |     laplacian = laplacian_img(img_path)
31 |     f = open(ann_path)
32 |     annotations = json.load(f)["annotations"]
33 |     bbox_var_lap, img_var_lap = get_bbox_fg_bg_var_laplacian(
34 |         laplacian, annotations
35 |     )
36 |     assert len(bbox_var_lap) > 0
37 |     assert img_var_lap is not None
38 | 
39 | 
40 | def test_get_seg_fg_bg_var_laplacian():
41 |     laplacian = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
42 |     final_mask = np.array([[1, 0, 1], [1, 0, 1], [1, 0, 1]])
43 |     expected_fg_var_lap = np.array([2, 5, 8]).var()
44 |     expected_bg_var_lap = np.array([1, 3, 4, 6, 7, 9]).var()
45 | 
46 |     fg_var_lap, bg_var_lap = get_seg_fg_bg_var_laplacian(laplacian, final_mask)
47 | 
48 |     assert fg_var_lap == expected_fg_var_lap
49 |     assert bg_var_lap == expected_bg_var_lap
50 | 
51 | 
52 | def test_get_final_mask():
53 |     mask_a = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]])
54 |     mask_b = np.array([[1, 1, 1], [0, 0, 0], [0, 0, 0]])
55 |     mask_c = np.array([[0, 0, 0], [0, 0, 0], [0, 1, 0]])
56 |     expected_final_mask = np.array([[1, 1, 1], [0, 0, 0], [0, 1, 0]])
57 | 
58 |     final_mask = get_final_mask(masks=[mask_a, mask_b, mask_c])
59 | 
60 |     assert np.array_equal(expected_final_mask, final_mask)
61 | 
62 | 
63 | def test_get_psd2d():
64 |     test_img = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]])
65 |     psd2d = get_psd2d(image=test_img)
66 | 
67 |     assert psd2d.shape == test_img.shape
68 | 
69 | 
70 | def test_get_avg_psd():
71 |     cur_dir = pathlib.Path(__file__).parent.absolute()
72 |     img_dir_path = str(cur_dir / "mock_data" / "coco" / "images")
73 |     avg_psd_1d, std_psd_1d = get_average_psd_1d(img_dir_path)
74 | 
75 |     assert avg_psd_1d is not None
76 |     assert type(std_psd_1d) == np.ndarray
77 | 
78 | 
79 | def test_get_wt_coeff_var():
80 |     cur_dir = pathlib.Path(__file__).parent.absolute()
81 |     img_dir_path = str(cur_dir / "mock_data" / "coco" / "images")
82 |     num_img = len(glob.glob(img_dir_path + f"/*.png"))
83 |     h, v, d = get_wt_coeffs_var(img_dir_path)
84 | 
85 |     assert h is not None and len(h) == num_img
86 |     assert v is not None and len(v) == num_img
87 |     assert d is not None and len(d) == num_img
88 | 


--------------------------------------------------------------------------------
/tests/test_keypoints_stats.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | 
 6 | from datasetinsights.stats import (
 7 |     get_average_skeleton,
 8 |     get_scale_keypoints,
 9 |     get_visible_keypoints_dict,
10 | )
11 | from datasetinsights.stats.constants import COCO_KEYPOINTS, COCO_SKELETON
12 | 
13 | 
14 | @pytest.fixture()
15 | def _setup_annotations():
16 |     parent_dir = Path.cwd()
17 |     json_file = (
18 |         parent_dir
19 |         / "tests"
20 |         / "mock_data"
21 |         / "coco"
22 |         / "annotations"
23 |         / "keypoints.json"
24 |     )
25 |     f = open(json_file)
26 |     data = json.load(f)
27 |     annotations = data["annotations"]
28 |     keypoints_list = []
29 |     for k in annotations:
30 |         keypoints_list.append(k["keypoints"])
31 |     yield keypoints_list
32 |     keypoints_list = None
33 | 
34 | 
35 | def test_get_scale_keypoints(_setup_annotations):
36 |     annotations = _setup_annotations
37 |     processed_kp_dict = get_scale_keypoints(annotations)
38 | 
39 |     assert set(COCO_KEYPOINTS).issubset(set(processed_kp_dict.keys()))
40 |     for keypoint in COCO_KEYPOINTS:
41 |         count = sum(
42 |             map(lambda x: x > 2.5 or x < -2.5, processed_kp_dict[keypoint]["x"])
43 |         )
44 |         assert count == 0
45 |         count = sum(
46 |             map(lambda x: x > 2.5 or x < -2.5, processed_kp_dict[keypoint]["y"])
47 |         )
48 |         assert count == 0
49 | 
50 | 
51 | def test_get_visible_keypoints_dict(_setup_annotations):
52 |     keypoint_list = _setup_annotations
53 | 
54 |     labeled_kpt_dict = get_visible_keypoints_dict(keypoint_list)
55 |     for keypoint in COCO_KEYPOINTS:
56 |         assert keypoint in labeled_kpt_dict.keys()
57 |     for value in labeled_kpt_dict.values():
58 |         assert value < 1 and value >= 0
59 | 
60 | 
61 | def test_get_scale_keypoints_bad_case():
62 |     annotations = [[0] * 40, [1] * 60]
63 |     with pytest.raises(ValueError):
64 |         get_scale_keypoints(annotations)
65 | 
66 | 
67 | @pytest.fixture()
68 | def _setup_kp_dict():
69 |     kp_dict = {}
70 |     for name in COCO_KEYPOINTS:
71 |         kp_dict[name] = {"x": [2, 0], "y": [0, 2]}
72 |     yield kp_dict
73 |     kp_dict = None
74 | 
75 | 
76 | def test_get_average_skeleton(_setup_kp_dict):
77 |     kp_dict = _setup_kp_dict
78 |     kp_link_list = get_average_skeleton(kp_dict)
79 | 
80 |     assert kp_link_list[0] == [(1, 1), (1, 1)]
81 |     assert len(kp_link_list) == len(COCO_SKELETON)
82 | 


--------------------------------------------------------------------------------
/tests/test_main_entrypoint.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | from click.testing import CliRunner
 5 | 
 6 | from datasetinsights.__main__ import entrypoint
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("args", [[], ["-v"], ["-v", "invalid_command"]])
10 | @patch("datasetinsights.__main__.logging")
11 | def test_entrypoint_except_not_called(logger_mock, args):
12 |     # arrange
13 |     runner = CliRunner()
14 |     # act
15 |     runner.invoke(entrypoint, args)
16 |     # assert
17 |     logger_mock.getLogger.assert_not_called()
18 |     logger_mock.getLogger.return_value.setLevel.assert_not_called()
19 | 
20 | 
21 | @pytest.mark.parametrize("args", [["-v", "download"]])
22 | @patch("datasetinsights.__main__.logging")
23 | def test_entrypoint_except_called_once(logger_mock, args):
24 |     # arrange
25 |     runner = CliRunner()
26 |     # act
27 |     runner.invoke(entrypoint, args)
28 |     # assert
29 |     logger_mock.getLogger.assert_called_once()
30 |     logger_mock.getLogger.return_value.setLevel.assert_called_once()
31 | 


--------------------------------------------------------------------------------
/tests/test_object_detection_stats.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import math
 3 | from pathlib import Path
 4 | 
 5 | import pandas as pd
 6 | import pytest
 7 | 
 8 | from datasetinsights.stats import (
 9 |     convert_coco_annotations_to_df,
10 |     get_bbox_heatmap,
11 |     get_bbox_per_img_dict,
12 |     get_bbox_relative_size_list,
13 | )
14 | 
15 | 
16 | @pytest.fixture()
17 | def annotations_path():
18 |     parent_dir = Path.cwd()
19 |     json_file = (
20 |         parent_dir
21 |         / "tests"
22 |         / "mock_data"
23 |         / "coco"
24 |         / "annotations"
25 |         / "keypoints.json"
26 |     )
27 |     yield json_file
28 |     json_file = None
29 | 
30 | 
31 | @pytest.fixture()
32 | def _setup_annotation_df(annotations_path):
33 |     coco_json = json.load(open(annotations_path, "r"))
34 | 
35 |     df_image = pd.DataFrame(coco_json["images"])
36 |     df_annotation = pd.DataFrame(coco_json["annotations"])
37 | 
38 |     df_coco = df_annotation.merge(df_image, left_on="image_id", right_on="id")
39 |     yield df_coco
40 |     df_coco = None
41 | 
42 | 
43 | def test_convert_coco_annotations_to_df(annotations_path):
44 |     processed_kp_dict = convert_coco_annotations_to_df(annotations_path)
45 |     target_column_names = processed_kp_dict.columns.values.tolist()
46 | 
47 |     column_names = [
48 |         "image_id",
49 |         "area",
50 |         "bbox",
51 |         "iscrowd",
52 |         "num_keypoints",
53 |         "keypoints",
54 |         "width",
55 |         "height",
56 |     ]
57 | 
58 |     for column_name in column_names:
59 |         assert column_name in target_column_names
60 | 
61 | 
62 | def test_get_bbox_heatmap(_setup_annotation_df):
63 |     annotation_df = _setup_annotation_df
64 |     bbox_heatmap = get_bbox_heatmap(annotation_df)
65 |     height, width, _ = bbox_heatmap.shape
66 | 
67 |     max_width = max(annotation_df["width"])
68 |     max_height = max(annotation_df["height"])
69 | 
70 |     assert max_width == width
71 |     assert max_height == height
72 |     assert (bbox_heatmap < 0).sum() == 0
73 | 
74 | 
75 | def test_get_bbox_relative_size_list(_setup_annotation_df):
76 |     annotation_df = _setup_annotation_df
77 |     bbox_relative_size = get_bbox_relative_size_list(annotation_df)
78 |     assert annotation_df.shape[0] == bbox_relative_size.shape[0]
79 | 
80 |     test_row = annotation_df.iloc[0]
81 |     assert bbox_relative_size[0] == math.sqrt(
82 |         test_row["area"] / (test_row["width"] * test_row["height"])
83 |     )
84 | 
85 | 
86 | def test_get_bbox_per_img_dict(_setup_annotation_df):
87 |     annotation_df = _setup_annotation_df
88 | 
89 |     bbox_num_dict = get_bbox_per_img_dict(annotation_df)
90 |     for value in bbox_num_dict.values():
91 |         assert value < 1 and value >= 0
92 |     assert sum(bbox_num_dict.values()) == 1
93 | 


--------------------------------------------------------------------------------
/tests/unity_perception/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def mock_data_base_dir():
 8 |     parent_dir = Path(__file__).parent.parent.absolute()
 9 |     mock_data_dir = parent_dir / "mock_data"
10 | 
11 |     return mock_data_dir
12 | 
13 | 
14 | @pytest.fixture
15 | def mock_data_dir(mock_data_base_dir):
16 |     mock_data_dir = mock_data_base_dir / "simrun"
17 | 
18 |     return mock_data_dir
19 | 


--------------------------------------------------------------------------------
/tests/unity_perception/test_captures.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import json
 3 | 
 4 | import pytest
 5 | 
 6 | from datasetinsights.datasets.unity_perception import Captures
 7 | from datasetinsights.datasets.unity_perception.exceptions import (
 8 |     DefinitionIDError,
 9 | )
10 | from datasetinsights.datasets.unity_perception.tables import (
11 |     SCHEMA_VERSION,
12 |     glob,
13 | )
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "data_dir_name",
18 |     ["simrun", "no_annotations_or_metrics"],
19 | )
20 | def test_get_captures_and_annotations(mock_data_base_dir, data_dir_name):
21 |     mock_data_dir = mock_data_base_dir / data_dir_name
22 |     captures = Captures(str(mock_data_dir), version=SCHEMA_VERSION)
23 | 
24 |     captures_per_definition = collections.defaultdict(int)
25 |     json_files = glob(mock_data_dir, captures.FILE_PATTERN)
26 |     for json_file in json_files:
27 |         records = json.load(open(json_file, "r", encoding="utf8"))[
28 |             Captures.TABLE_NAME
29 |         ]
30 |         for record in records:
31 |             for annotation in record["annotations"]:
32 |                 def_id = annotation["annotation_definition"]
33 |                 captures_per_definition[def_id] += 1
34 | 
35 |     for def_id, count in captures_per_definition.items():
36 |         assert len(captures.filter(def_id)) == count
37 | 
38 |     with pytest.raises(DefinitionIDError):
39 |         captures.filter("bad_definition_id")
40 | 


--------------------------------------------------------------------------------
/tests/unity_perception/test_metrics.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import json
  3 | 
  4 | import pandas as pd
  5 | import pytest
  6 | 
  7 | from datasetinsights.datasets.unity_perception import Metrics
  8 | from datasetinsights.datasets.unity_perception.exceptions import (
  9 |     DefinitionIDError,
 10 | )
 11 | from datasetinsights.datasets.unity_perception.tables import (
 12 |     SCHEMA_VERSION,
 13 |     glob,
 14 | )
 15 | 
 16 | 
 17 | @pytest.mark.parametrize(
 18 |     "data_dir_name",
 19 |     ["simrun", "no_annotations_or_metrics"],
 20 | )
 21 | def test_filter_metrics(mock_data_base_dir, data_dir_name):
 22 |     mock_data_dir = mock_data_base_dir / data_dir_name
 23 |     metrics = Metrics(str(mock_data_dir), version=SCHEMA_VERSION)
 24 | 
 25 |     expected_rows = collections.defaultdict(int)
 26 |     expected_cols = collections.defaultdict(set)
 27 |     exclude_metrics = set(["metric_definition", "values"])
 28 |     def_ids = set()
 29 |     actual_metrics = collections.defaultdict(pd.DataFrame)
 30 |     json_files = glob(mock_data_dir, metrics.FILE_PATTERN)
 31 |     for json_file in json_files:
 32 |         records = json.load(open(json_file, "r", encoding="utf8"))[
 33 |             Metrics.TABLE_NAME
 34 |         ]
 35 |         for record in records:
 36 |             def_id = record["metric_definition"]
 37 |             def_ids.add(def_id)
 38 |             for key in record:
 39 |                 if key not in exclude_metrics:
 40 |                     expected_cols[def_id].add(key)
 41 |             values = pd.json_normalize(record["values"])
 42 |             for key in values.columns:
 43 |                 expected_cols[def_id].add(key)
 44 |             expected_rows[def_id] += len(values)
 45 | 
 46 |     for def_id in def_ids:
 47 |         actual_metrics[def_id] = metrics.filter_metrics(def_id)
 48 | 
 49 |     for def_id, expected_metric in actual_metrics.items():
 50 |         expected_shape = (expected_rows[def_id], len(expected_cols[def_id]))
 51 |         assert expected_shape == actual_metrics[def_id].shape
 52 |         assert expected_cols[def_id] == set(actual_metrics[def_id].columns)
 53 | 
 54 |     with pytest.raises(DefinitionIDError):
 55 |         metrics.filter_metrics("bad_definition_id")
 56 | 
 57 | 
 58 | def test_normalize_values(mock_data_dir):
 59 |     metrics = {
 60 |         "capture_id": "1234",
 61 |         "annotation_id": None,
 62 |         "sequence_id": "2345",
 63 |         "step": 50,
 64 |         "metric_definition": "193ce072-0e49-4ea4-a99f-7ca837e3a6ce",
 65 |         "values": [
 66 |             {
 67 |                 "label_id": 1,
 68 |                 "label_name": "book_dorkdiaries_aladdin",
 69 |                 "count": 1,
 70 |             },
 71 |             {
 72 |                 "label_id": 2,
 73 |                 "label_name": "candy_minipralines_lindt",
 74 |                 "count": 2,
 75 |             },
 76 |         ],
 77 |     }
 78 |     expected = [
 79 |         {
 80 |             "label_id": 1,
 81 |             "label_name": "book_dorkdiaries_aladdin",
 82 |             "count": 1,
 83 |             "capture_id": "1234",
 84 |             "annotation_id": None,
 85 |             "step": 50,
 86 |             "sequence_id": "2345",
 87 |         },
 88 |         {
 89 |             "label_id": 2,
 90 |             "label_name": "candy_minipralines_lindt",
 91 |             "count": 2,
 92 |             "capture_id": "1234",
 93 |             "annotation_id": None,
 94 |             "step": 50,
 95 |             "sequence_id": "2345",
 96 |         },
 97 |     ]
 98 |     flatten_metrics = Metrics._normalize_values(metrics)
 99 |     for i, metric in enumerate(expected):
100 |         for k in metric:
101 |             assert metric[k] == flatten_metrics[i][k]
102 | 


--------------------------------------------------------------------------------
/tests/unity_perception/test_references.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import tempfile
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from datasetinsights.datasets.unity_perception import (
 8 |     AnnotationDefinitions,
 9 |     MetricDefinitions,
10 | )
11 | from datasetinsights.datasets.unity_perception.tables import (
12 |     SCHEMA_VERSION,
13 |     glob,
14 | )
15 | from datasetinsights.datasets.unity_perception.validation import (
16 |     DuplicateRecordError,
17 |     NoRecordError,
18 | )
19 | 
20 | 
21 | def test_annotation_definitions(mock_data_dir):
22 |     definition = AnnotationDefinitions(
23 |         str(mock_data_dir), version=SCHEMA_VERSION
24 |     )
25 | 
26 |     json_file = next(glob(mock_data_dir, AnnotationDefinitions.FILE_PATTERN))
27 |     records = json.load(open(json_file, "r", encoding="utf8"))[
28 |         AnnotationDefinitions.TABLE_NAME
29 |     ]
30 | 
31 |     def_ids = [r["id"] for r in records]
32 |     for (i, def_id) in enumerate(def_ids):
33 |         record = records[i]
34 | 
35 |         assert definition.get_definition(def_id) == record
36 | 
37 | 
38 | def test_annotation_definitions_find_by_name():
39 |     def1 = {
40 |         "id": 1,
41 |         "name": "good name",
42 |         "description": "does not matter",
43 |         "format": "JSON",
44 |         "spec": [],
45 |     }
46 |     def2 = {
47 |         "id": 2,
48 |         "name": "another good name",
49 |         "description": "does not matter",
50 |         "format": "JSON",
51 |         "spec": [],
52 |     }
53 |     ann_def = {
54 |         "version": SCHEMA_VERSION,
55 |         "annotation_definitions": [def1, def2],
56 |     }
57 | 
58 |     with tempfile.TemporaryDirectory() as tmp_dir:
59 |         with open(Path(tmp_dir) / "annotation_definitions.json", "w") as f:
60 |             json.dump(ann_def, f)
61 |         definition = AnnotationDefinitions(tmp_dir, version=SCHEMA_VERSION)
62 | 
63 |     pattern = r"^good\sname$"
64 |     assert definition.find_by_name(pattern) == def1
65 | 
66 |     pattern = "good name"
67 |     with pytest.raises(DuplicateRecordError):
68 |         definition.find_by_name(pattern)
69 | 
70 |     pattern = "w;fhohfoewh"
71 |     with pytest.raises(NoRecordError):
72 |         definition.find_by_name(pattern)
73 | 
74 | 
75 | def test_metric_definitions(mock_data_dir):
76 |     definition = MetricDefinitions(str(mock_data_dir), version=SCHEMA_VERSION)
77 | 
78 |     json_file = next(glob(mock_data_dir, MetricDefinitions.FILE_PATTERN))
79 |     records = json.load(open(json_file, "r"))[MetricDefinitions.TABLE_NAME]
80 | 
81 |     def_ids = [r["id"] for r in records]
82 |     for (i, def_id) in enumerate(def_ids):
83 |         record = records[i]
84 | 
85 |         assert definition.get_definition(def_id) == record
86 | 


--------------------------------------------------------------------------------