├── .dockerignore ├── .flake8 ├── .github ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── questions-about-datasetinsights.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── linting-and-unittests.yaml │ ├── publish-docker-hub.yaml │ ├── publish-pypi.yaml │ └── synk-scan.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── Dockerfile ├── LICENCE ├── Makefile ├── README.md ├── datasetinsights ├── __init__.py ├── __main__.py ├── commands │ ├── __init__.py │ ├── convert.py │ └── download.py ├── constants.py ├── dashboard.py ├── datasets │ ├── __init__.py │ ├── exceptions.py │ ├── synthetic.py │ ├── transformers │ │ ├── __init__.py │ │ ├── base.py │ │ └── coco.py │ └── unity_perception │ │ ├── __init__.py │ │ ├── captures.py │ │ ├── exceptions.py │ │ ├── metrics.py │ │ ├── references.py │ │ ├── tables.py │ │ └── validation.py ├── io │ ├── __init__.py │ ├── bbox.py │ ├── download.py │ ├── downloader │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gcs_downloader.py │ │ └── http_downloader.py │ ├── exceptions.py │ └── gcs.py └── stats │ ├── __init__.py │ ├── constants.py │ ├── image_analysis │ ├── __init__.py │ ├── laplacian.py │ ├── spectral_analysis.py │ └── wavelet.py │ ├── keypoints_stats.py │ ├── object_detection_stats.py │ ├── statistics.py │ └── visualization │ ├── __init__.py │ ├── app.py │ ├── bbox2d_plot.py │ ├── bbox3d_plot.py │ ├── constants.py │ ├── font │ ├── DroidSansFallback.ttf │ └── LICENSE-2.0.txt │ ├── keypoints_plot.py │ ├── object_detection.py │ ├── overview.py │ ├── plots.py │ └── stylesheet.css ├── docs ├── Makefile ├── README.md ├── requirements.txt └── source │ ├── Synthetic_Dataset_Schema.md │ ├── _images │ ├── captures_steps_timestamps.png │ ├── image_0.png │ ├── image_2.png │ ├── image_3.png │ ├── image_4.png │ ├── kubeflow │ │ ├── evaluate_pipeline_graph.png │ │ ├── evaluate_the_model.png │ │ ├── notebook.png │ │ ├── notebook_docker_cpu_memory.png │ │ ├── notebook_gpu_volume.png │ │ ├── train_on_real_world_dataset.png │ │ ├── train_on_synthdet_sample.png │ │ ├── train_on_synthetic_and_real_world_dataset.png │ │ ├── train_on_synthetic_dataset_unity_simulation.png │ │ ├── train_pipeline_graph.jpg │ │ └── upload_pipeline.png │ └── synthetic_data_pipeline_dataset_evaluation.png │ ├── _templates │ ├── module.rst_t │ ├── package.rst_t │ └── toc.rst_t │ ├── conf.py │ ├── datasetinsights.datasets.rst │ ├── datasetinsights.datasets.transformers.rst │ ├── datasetinsights.datasets.unity_perception.rst │ ├── datasetinsights.io.downloader.rst │ ├── datasetinsights.io.rst │ ├── datasetinsights.rst │ ├── datasetinsights.stats.rst │ ├── datasetinsights.stats.visualization.rst │ ├── index.rst │ └── modules.rst ├── notebooks ├── Human_Keypoint_Pose.ipynb ├── Image_Analysis.ipynb ├── Object_Detection_Stats.ipynb └── Perception_Statistics.ipynb ├── poetry.lock ├── pyproject.toml └── tests ├── datasets ├── test_coco_transformers.py ├── test_statistics.py └── test_synthetic.py ├── mock_data ├── calib000000.txt ├── coco │ ├── annotations │ │ ├── instances.json │ │ └── keypoints.json │ └── images │ │ ├── camera_001.png │ │ ├── camera_125709864006893838062514269195103918838.png │ │ └── camera_61855733451949387398181790757513827492.png ├── no_annotations_or_metrics │ └── Dataset │ │ ├── annotation_definitions.json │ │ ├── captures_000.json │ │ ├── captures_001.json │ │ ├── egos.json │ │ ├── metric_definitions.json │ │ ├── metrics_000.json │ │ └── sensors.json ├── simrun │ ├── Dataset │ │ ├── annotation_definitions.json │ │ ├── captures_000.json │ │ ├── captures_001.json │ │ ├── egos.json │ │ ├── metric_definitions.json │ │ ├── metrics_000.json │ │ └── sensors.json │ ├── README.md │ ├── annotations │ │ ├── instance_segmantation_000.png │ │ ├── lidar_semantic_segmentation_000.pcd │ │ ├── sementic_segmantation_000.png │ │ └── sementic_segmantation_001.png │ └── captures │ │ ├── camera_000.png │ │ ├── camera_001.png │ │ └── lidar_000.pcd ├── simrun_keypoint_dataset │ ├── annotation_definitions.json │ ├── annotations │ │ ├── keypoint_000.png │ │ └── keypoint_001.png │ ├── captures_000.json │ ├── egos.json │ ├── metric_definitions.json │ ├── metrics_000.json │ └── sensors.json └── simrun_manifest.csv ├── test_bbox.py ├── test_create_downloader.py ├── test_dashboard.py ├── test_download_command.py ├── test_gcs.py ├── test_http_downloader.py ├── test_image_analysis.py ├── test_keypoints_stats.py ├── test_main_entrypoint.py ├── test_object_detection_stats.py ├── test_visual.py └── unity_perception ├── conftest.py ├── test_captures.py ├── test_metrics.py └── test_references.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # CI 6 | .codeclimate.yml 7 | .travis.yml 8 | .taskcluster.yml 9 | 10 | # Docker 11 | docker-compose.yml 12 | .docker 13 | 14 | # Byte-compiled / optimized / DLL files 15 | **/__pycache__/ 16 | **/*.py[cod] 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | .pytest_cache 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Virtual environment 71 | .env/ 72 | .venv/ 73 | venv/ 74 | 75 | # PyCharm 76 | .idea 77 | 78 | # IDE 79 | **/.ropeproject 80 | **/.swp 81 | .vscode 82 | .ipynb_checkpoints 83 | 84 | # Place project specific ignores here 85 | runs 86 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | ignore = 4 | E133, 5 | E203, 6 | W503, 7 | W504, 8 | W605, 9 | F541 10 | exclude = 11 | .git, 12 | __pycache__, 13 | datasetinsights/data/datasets/protos/ 14 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, religion, or sexual identity 11 | and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the 27 | overall community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or 32 | advances of any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official e-mail address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | . 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series 87 | of actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or 94 | permanent ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within 114 | the community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.0, available at 120 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 121 | 122 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 123 | enforcement ladder](https://github.com/mozilla/diversity). 124 | 125 | [homepage]: https://www.contributor-covenant.org 126 | 127 | For answers to common questions about this code of conduct, see the FAQ at 128 | https://www.contributor-covenant.org/faq. Translations are available at 129 | https://www.contributor-covenant.org/translations. 130 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug with datasetinsights 4 | labels: bug 5 | 6 | --- 7 | 8 | **Describe the Bug:** 9 | [A clear and concise description of what the bug is.] 10 | 11 | **How to Reproduce?** 12 | [What are the steps that would reproduce the bug that you encountered.] 13 | 14 | **What did you expect to happen:** 15 | 16 | **Console logs / stack traces** 17 | Please wrap in [triple backticks (```)](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make it easier to read. 18 | 19 | **Screenshots** 20 | [If applicable, add screenshots to help explain your problem.] 21 | 22 | **Anything else you would like to add:** 23 | [Miscellaneous information that will assist in solving the issue.] 24 | 25 | **Environment:** 26 | 27 | - OS + version: [e.g. Ubuntu 20.04.1 LTS] 28 | - datasetinsights version 29 | - _Environment_: (which example environment you used to reproduce the error) 30 | - Other environment settings 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | labels: enhancement 5 | 6 | --- 7 | 8 | **Why you need this feature:** 9 | [Is your feature request related to a problem? Please describe in details] 10 | 11 | 12 | **Describe the solution you'd like:** 13 | [A clear and concise description of what you want to happen.] 14 | 15 | 16 | **Anything else you would like to add:** 17 | [Miscellaneous information that will assist in solving the issue.] 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/questions-about-datasetinsights.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Questions about datasetinsights 3 | about: Ask your question or about any confusion that you have about this project 4 | labels: question 5 | 6 | --- 7 | 8 | **Question:** 9 | [You can ask any question about this project.] 10 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Peer Review Information 2 | 3 | Add information on any code, feature, documentation changes here. 4 | 5 | # Pull Request Check List 6 | 7 | 9 | 10 | - [ ] Added **tests** for changed code. 11 | - [ ] Updated **documentation** for changed code. 12 | -------------------------------------------------------------------------------- /.github/workflows/linting-and-unittests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | linting: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 3.8 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: "3.8" 21 | - name: Linting 22 | run: | 23 | pip install pre-commit 24 | pre-commit run --all-files 25 | tests: 26 | # reference from https://github.com/python-poetry/poetry/blob/master/.github/workflows/main.yml 27 | runs-on: ubuntu-latest 28 | strategy: 29 | matrix: 30 | python-version: ["3.8", "3.9", "3.10"] 31 | 32 | steps: 33 | - uses: actions/checkout@v2 34 | - name: Set up Python ${{ matrix.python-version }} 35 | uses: actions/setup-python@v2 36 | with: 37 | python-version: ${{ matrix.python-version }} 38 | - name: Get full Python version 39 | id: full-python-version 40 | shell: bash 41 | run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") 42 | - name: Install poetry 43 | shell: bash 44 | run: | 45 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python - 46 | echo "$HOME/.poetry/bin" >> $GITHUB_PATH 47 | - name: Configure poetry 48 | shell: bash 49 | run: poetry config virtualenvs.in-project true 50 | - name: Set up cache 51 | uses: actions/cache@v2 52 | id: cache 53 | with: 54 | path: .venv 55 | key: venv-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }} 56 | - name: Ensure cache is healthy 57 | if: steps.cache.outputs.cache-hit == 'true' 58 | shell: bash 59 | run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv 60 | - name: Install dependencies 61 | run: poetry install 62 | shell: bash 63 | - name: Run pytest 64 | run: poetry run pytest 65 | -------------------------------------------------------------------------------- /.github/workflows/publish-docker-hub.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | push_to_registry: 7 | name: Push Docker image to Docker Hub 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Check out the repo 11 | uses: actions/checkout@v2 12 | - name: Push to Docker Hub 13 | uses: docker/build-push-action@v1 14 | with: 15 | username: ${{ secrets.DOCKERHUB_USERNAME }} 16 | password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} 17 | repository: unitytechnologies/datasetinsights 18 | tags: latest 19 | tag_with_ref: true 20 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to pypi 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | env: 8 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 9 | 10 | jobs: 11 | 12 | build-and-publish: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | 17 | - uses: actions/checkout@v2 18 | - name: Set up Python 3.8 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: "3.8" 22 | - name: Get full Python version 23 | id: full-python-version 24 | shell: bash 25 | run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") 26 | - name: Install poetry 27 | shell: bash 28 | run: | 29 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python - 30 | echo "$HOME/.poetry/bin" >> $GITHUB_PATH 31 | - name: Set env 32 | run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV 33 | - name : Configure poetry 34 | shell: bash 35 | run: poetry config pypi-token.pypi $PYPI_TOKEN 36 | - name: Set poetry version 37 | shell: bash 38 | run: poetry version $RELEASE_VERSION 39 | - name: build 40 | shell: bash 41 | run: poetry build 42 | - name: publish 43 | shell: bash 44 | run: poetry publish 45 | -------------------------------------------------------------------------------- /.github/workflows/synk-scan.yaml: -------------------------------------------------------------------------------- 1 | name: Scan Python project using Snyk 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | security: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@master 16 | - name: Run Snyk to check for vulnerabilities 17 | uses: snyk/actions/python@master 18 | env: 19 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} 20 | with: 21 | command: monitor 22 | args: --all-projects --exclude=docs --command=python3 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Compressed files # 11 | #################### 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store* 32 | ehthumbs.db 33 | Icon? 34 | Thumbs.db 35 | *.bak* 36 | 37 | # IDE Project files # 38 | ###################### 39 | *.sublime-* 40 | *.Rproj 41 | .Rproj.user 42 | .Rhistory 43 | *.xcodeproj 44 | *.idea 45 | 46 | # Python # 47 | ########### 48 | # Byte-compiled / optimized / DLL files 49 | __pycache__/ 50 | *.py[cod] 51 | *$py.class 52 | 53 | # C extensions 54 | *.so 55 | 56 | # Distribution / packaging 57 | .Python 58 | build/ 59 | develop-eggs/ 60 | dist/ 61 | downloads/ 62 | eggs/ 63 | .eggs/ 64 | lib/ 65 | lib64/ 66 | parts/ 67 | sdist/ 68 | var/ 69 | wheels/ 70 | pip-wheel-metadata/ 71 | share/python-wheels/ 72 | *.egg-info/ 73 | .installed.cfg 74 | *.egg 75 | MANIFEST 76 | 77 | # Unit test / coverage reports 78 | htmlcov/ 79 | .tox/ 80 | .nox/ 81 | .coverage 82 | .coverage.* 83 | .cache 84 | nosetests.xml 85 | coverage.xml 86 | *.cover 87 | .hypothesis/ 88 | .pytest_cache/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # IPython 94 | profile_default/ 95 | ipython_config.py 96 | 97 | # pyenv 98 | .python-version 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Editor 110 | .vscode 111 | 112 | # For this Project # 113 | ###################### 114 | runs/ 115 | checkpoints/ 116 | metrics/ 117 | coco_data 118 | perception_data 119 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | exclude: > 4 | (?x)^( 5 | .*_pb2.py| 6 | .*_pb2_grpc.py 7 | )$ 8 | repos: 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v2.4.0 11 | hooks: 12 | - id: trailing-whitespace 13 | - id: end-of-file-fixer 14 | - id: check-yaml 15 | - id: check-added-large-files 16 | - id: check-merge-conflict 17 | - repo: https://github.com/psf/black 18 | rev: 22.3.0 19 | hooks: 20 | - id: black 21 | - repo: https://gitlab.com/pycqa/flake8 22 | rev: 3.8.1 23 | hooks: 24 | - id: flake8 25 | - repo: https://github.com/timothycrosley/isort 26 | rev: 5.1.0 27 | hooks: 28 | - id: isort 29 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | formats: all 3 | build: 4 | image: stable 5 | python: 6 | version: 3.8 7 | install: 8 | - requirements: docs/requirements.txt 9 | - method: pip 10 | path: . 11 | sphinx: 12 | builder: html 13 | configuration: docs/source/conf.py 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Table of contents 2 | 3 | - [Table of contents](#table-of-contents) 4 | - [Contributing to datasetinsights](#contributing-to-datasetinsights) 5 | - [Developing datasetinsights](#developing-datasetinsights) 6 | - [Add new dependencies](#add-new-dependencies) 7 | - [Codebase structure](#codebase-structure) 8 | - [Unit testing](#unit-testing) 9 | - [Style Guide](#style-guide) 10 | - [Writing documentation](#writing-documentation) 11 | - [Building documentation](#building-documentation) 12 | 13 | ## Contributing to datasetinsights 14 | 15 | We encourage contributions to the datasetinsights repo, including but not limited to following categories: 16 | 17 | 1. You want to improve the documentation of existing module. 18 | 2. You want to provide bug-fix for an outstanding issue. 19 | 3. You want to implement a new feature to support new type of perception package outputs. 20 | 21 | ## Developing datasetinsights 22 | 23 | Here are some steps to setup datasetinsights virtual environment with on your machine: 24 | 25 | 1. Install [poetry](https://python-poetry.org/), [git](https://git-scm.com/) and [pre-commit](https://pre-commit.com/) 26 | 2. Create a virtual environment. We recommend using [miniconda](https://docs.conda.io/en/latest/miniconda.html) 27 | 28 | ```bash 29 | conda create -n dins-dev python=3.8 30 | conda activate dins-dev 31 | ``` 32 | 33 | 3. Clone a copy of datasetinsights from source: 34 | 35 | ```bash 36 | git clone https://github.com/Unity-Technologies/datasetinsights.git 37 | cd datasetinsights 38 | ``` 39 | 40 | 4. Install datasetinsights in `develop` mode: 41 | 42 | ```bash 43 | poetry install 44 | ``` 45 | 46 | This will symlink the Python files from the current local source tree into the installed virtual environment install. 47 | The `develop` mode also includes Python packages such as [pytest](https://docs.pytest.org/en/latest/) and [black](https://black.readthedocs.io/en/stable/). 48 | 49 | 5. Install pre-commit [hook](https://pre-commit.com/#3-install-the-git-hook-scripts) to `.git` folder. 50 | 51 | ```bash 52 | pre-commit install 53 | # pre-commit installed at .git/hooks/pre-commit 54 | ``` 55 | 56 | ### Add new dependencies 57 | 58 | Adding new Python dependencies to datasetinsights environment using poetry like: 59 | 60 | ```bash 61 | poetry add numpy@^1.18.4 62 | ``` 63 | 64 | Make sure you only add the desired packages instead of adding all dependencies. 65 | Let package management system resolve for dependencies. 66 | See [poetry add](https://python-poetry.org/docs/cli/#add) for detail instructions. 67 | 68 | ## Codebase structure 69 | 70 | The datasetinsights package contains the following modules: 71 | 72 | - [commands](datasetinsights/commands) This module contains the cli commands. 73 | - [datasets](datasetinsights/datasets) This module contains different datasets. The dataset classes contain knowledge on how the dataset should be loaded into memory. 74 | - [io](datasetinsights/io) This module contains functionality that relates to writing/downloading/uploading to/from different sources. 75 | - [stats](datasetinsights/stats) This module contains code for visualizing and gathering statistics on the dataset 76 | 77 | ## Unit testing 78 | 79 | We use [pytest](https://docs.pytest.org/en/latest/) to run tests located under `tests/`. Run the entire test suite with 80 | 81 | ```bash 82 | pytest 83 | ``` 84 | 85 | or run individual test files, like: 86 | 87 | ```bash 88 | pytest tests/test_visual.py 89 | ``` 90 | 91 | for individual test suites. 92 | 93 | ## Style Guide 94 | 95 | We follow Black code [style](https://black.readthedocs.io/en/stable/the_black_code_style.html) for this repository. 96 | The max line length is set at 80. 97 | We enforce this code style using [Black](https://black.readthedocs.io/en/stable/) to format Python code. 98 | In addition to Black, we use [isort](https://github.com/timothycrosley/isort) to sort Python imports. 99 | 100 | Before submitting a pull request, run: 101 | 102 | ```bash 103 | pre-commit run --all-files 104 | ``` 105 | 106 | Fix all issues that were highlighted by flake8. If you want to skip exceptions such as long url lines in docstring, add `# noqa: E501 ` for the specific line violation. See [this](https://flake8.pycqa.org/en/3.1.1/user/ignoring-errors.html) to learn more about how to ignore flake8 errors. 107 | 108 | Some editors support automatically formatting on save. For example, in [vscode](https://code.visualstudio.com/docs/python/editing#_formatting) 109 | 110 | ## Writing documentation 111 | 112 | Datasetinsights uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for formatting docstrings. 113 | Length of line inside docstrings block must be limited to 80 characters with exceptions such as long urls or tables. 114 | 115 | ### Building documentation 116 | 117 | Follow instructions [here](docs/README.md). 118 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y \ 5 | build-essential \ 6 | curl \ 7 | libsm6 \ 8 | libxext6 \ 9 | libxrender-dev \ 10 | libgl1-mesa-dev \ 11 | libffi-dev \ 12 | libzmq3-dev \ 13 | python3.8-dev \ 14 | python3-pip \ 15 | && ln -s /usr/bin/python3.8 /usr/local/bin/python 16 | 17 | RUN python -m pip install --upgrade pip 18 | RUN python -m pip install setuptools==60.2.0 cryptography==36.0.1 poetry==1.1.12 notebook==6.4.8 19 | 20 | # Add Tini 21 | ENV TINI_VERSION v0.18.0 22 | ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/local/bin/tini 23 | RUN chmod +x /usr/local/bin/tini 24 | 25 | WORKDIR /datasetinsights 26 | VOLUME /data /root/.config 27 | 28 | COPY poetry.lock pyproject.toml ./ 29 | RUN poetry config virtualenvs.create false \ 30 | && poetry install --no-root 31 | 32 | COPY . ./ 33 | # Run poetry install again to install datasetinsights 34 | RUN poetry config virtualenvs.create false \ 35 | && poetry install 36 | 37 | # Use -g to ensure all child process received SIGKILL 38 | ENTRYPOINT ["tini", "-g", "--"] 39 | 40 | CMD sh -c "jupyter notebook --notebook-dir=/ --ip=0.0.0.0 --no-browser --allow-root --port=8888 --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.allow_origin='*' --NotebookApp.base_url=${NB_PREFIX}" 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help 2 | 3 | help: 4 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 5 | 6 | .DEFAULT_GOAL := help 7 | 8 | GCP_PROJECT_ID := unity-ai-thea-test 9 | TAG ?= latest 10 | 11 | build: ## Build datasetinsights docker image 12 | @echo "Building docker image for datasetinsights with tag: $(TAG)" 13 | @docker build -t datasetinsights:$(TAG) . 14 | 15 | push: ## Push datasetinsights docker image to registry 16 | @echo "Uploading docker image to GCS registry with tag: $(TAG)" 17 | @docker tag datasetinsights:$(TAG) gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG) && \ 18 | docker push gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG) 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dataset Insights 2 | 3 | [![PyPI python](https://img.shields.io/pypi/pyversions/datasetinsights)](https://pypi.org/project/datasetinsights) 4 | [![PyPI version](https://badge.fury.io/py/datasetinsights.svg)](https://pypi.org/project/datasetinsights) 5 | [![Downloads](https://pepy.tech/badge/datasetinsights)](https://pepy.tech/project/datasetinsights) 6 | [![Tests](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml/badge.svg?branch=master&event=push)](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml?query=branch%3Amaster+event%3Apush) 7 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) 8 | 9 | Unity Dataset Insights is a python package for downloading, parsing and analyzing synthetic datasets generated using the Unity [Perception package](https://github.com/Unity-Technologies/com.unity.perception). 10 | 11 | ## Installation 12 | 13 | Datasetinsights is published to PyPI. You can simply run `pip install datasetinsights` command under a supported python environments: 14 | 15 | ## Getting Started 16 | 17 | ### Dataset Statistics 18 | 19 | We provide a sample [notebook](notebooks/Perception_Statistics.ipynb) to help you load synthetic datasets generated using [Perception package](https://github.com/Unity-Technologies/com.unity.perception) and visualize dataset statistics. We plan to support other sample Unity projects in the future. 20 | 21 | ### Load Datasets 22 | 23 | The [Unity Perception](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.datasets.unity_perception.html#datasetinsights-datasets-unity-perception) package provides datasets under this [schema](https://datasetinsights.readthedocs.io/en/latest/Synthetic_Dataset_Schema.html#synthetic-dataset-schema). The datasetinsighs package also provide convenient python modules to parse datasets. 24 | 25 | For example, you can load `AnnotationDefinitions` into a python dictionary by providing the corresponding annotation definition ID: 26 | 27 | ```python 28 | from datasetinsights.datasets.unity_perception import AnnotationDefinitions 29 | 30 | annotation_def = AnnotationDefinitions(data_root=dest, version="my_schema_version") 31 | definition_dict = annotation_def.get_definition(def_id="my_definition_id") 32 | ``` 33 | 34 | Similarly, for `MetricDefinitions`: 35 | ```python 36 | from datasetinsights.datasets.unity_perception import MetricDefinitions 37 | 38 | metric_def = MetricDefinitions(data_root=dest, version="my_schema_version") 39 | definition_dict = metric_def.get_definition(def_id="my_definition_id") 40 | ``` 41 | 42 | The `Captures` table provide the collection of simulation captures and annotations. You can load these records directly as a Pandas `DataFrame`: 43 | 44 | ```python 45 | from datasetinsights.datasets.unity_perception import Captures 46 | 47 | captures = Captures(data_root=dest, version="my_schema_version") 48 | captures_df = captures.filter(def_id="my_definition_id") 49 | ``` 50 | 51 | 52 | The `Metrics` table can store simulation metrics for a capture or annotation. You can also load these records as a Pandas `DataFrame`: 53 | 54 | ```python 55 | from datasetinsights.datasets.unity_perception import Metrics 56 | 57 | metrics = Metrics(data_root=dest, version="my_schema_version") 58 | metrics_df = metrics.filter_metrics(def_id="my_definition_id") 59 | ``` 60 | 61 | ### Download Datasets 62 | 63 | You can download the datasets using the [download](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.commands.html#datasetinsights-commands-download) command: 64 | 65 | ```bash 66 | datasetinsights download --source-uri= --output=$HOME/data 67 | ``` 68 | 69 | The download command supports HTTP(s), and GCS. 70 | 71 | Alternatively, you can download dataset directly from python [interface](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.io.downloader.html#module-datasetinsights.io.downloader). 72 | 73 | `GCSDatasetDownloader` can download a dataset from GCS locations. 74 | ```python 75 | from datasetinsights.io.downloader import GCSDatasetDownloader 76 | 77 | source_uri=gs://url/to/file.zip # or gs://url/to/folder 78 | dest = "~/data" 79 | downloader = GCSDatasetDownloader() 80 | downloader.download(source_uri=source_uri, output=dest) 81 | ``` 82 | 83 | `HTTPDatasetDownloader` can a dataset from any HTTP(S) url. 84 | ```python 85 | from datasetinsights.io.downloader import HTTPDatasetDownloader 86 | 87 | source_uri=http://url.to.file.zip 88 | dest = "~/data" 89 | downloader = HTTPDatasetDownloader() 90 | downloader.download(source_uri=source_uri, output=dest) 91 | ``` 92 | 93 | ### Convert Datasets 94 | 95 | If you are interested in converting the synthetic dataset to COCO format for 96 | annotations that COCO supports, you can run the `convert` command: 97 | 98 | ```bash 99 | datasetinsights convert -i -o -f COCO-Instances 100 | ``` 101 | or 102 | ```bash 103 | datasetinsights convert -i -o -f COCO-Keypoints 104 | ``` 105 | 106 | You will need to provide 2D bounding box definition ID in the synthetic dataset. We currently only support 2D bounding box and human keypoint annotations for COCO format. 107 | 108 | ## Docker 109 | 110 | You can use the pre-build docker image [unitytechnologies/datasetinsights](https://hub.docker.com/r/unitytechnologies/datasetinsights) to interact with datasets. 111 | 112 | ## Documentation 113 | 114 | You can find the API documentation on [readthedocs](https://datasetinsights.readthedocs.io/en/latest/). 115 | 116 | ## Contributing 117 | 118 | Please let us know if you encounter a bug by filing an issue. To learn more about making a contribution to Dataset Insights, please see our Contribution [page](CONTRIBUTING.md). 119 | 120 | ## License 121 | 122 | Dataset Insights is licensed under the Apache License, Version 2.0. See [LICENSE](LICENCE) for the full license text. 123 | 124 | ## Citation 125 | If you find this package useful, consider citing it using: 126 | ``` 127 | @misc{datasetinsights2020, 128 | title={Unity {D}ataset {I}nsights Package}, 129 | author={{Unity Technologies}}, 130 | howpublished={\url{https://github.com/Unity-Technologies/datasetinsights}}, 131 | year={2020} 132 | } 133 | ``` 134 | -------------------------------------------------------------------------------- /datasetinsights/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/datasetinsights/__init__.py -------------------------------------------------------------------------------- /datasetinsights/__main__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | 5 | from datasetinsights.commands import Entrypoint 6 | from datasetinsights.constants import CONTEXT_SETTINGS 7 | 8 | logging.basicConfig( 9 | level=logging.INFO, 10 | format=( 11 | "%(levelname)s | %(asctime)s | %(name)s | %(threadName)s | " 12 | "%(message)s" 13 | ), 14 | datefmt="%Y-%m-%d %H:%M:%S", 15 | ) 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | @click.command( 20 | cls=Entrypoint, 21 | help="Dataset Insights.", 22 | context_settings=CONTEXT_SETTINGS, 23 | ) 24 | @click.option( 25 | "-v", 26 | "--verbose", 27 | is_flag=True, 28 | default=False, 29 | help="Enables verbose mode.", 30 | ) 31 | def entrypoint(verbose): 32 | if verbose: 33 | root_logger = logging.getLogger() 34 | root_logger.setLevel(logging.DEBUG) 35 | 36 | 37 | if __name__ == "__main__": 38 | entrypoint() 39 | -------------------------------------------------------------------------------- /datasetinsights/commands/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click 4 | 5 | 6 | class Entrypoint(click.MultiCommand): 7 | """Click MultiCommand Entrypoint For Datasetinsights CLI""" 8 | 9 | def list_commands(self, ctx): 10 | """Dynamically get the list of commands.""" 11 | rv = [] 12 | for filename in os.listdir(os.path.dirname(__file__)): 13 | if filename.endswith(".py") and not filename.startswith("__init__"): 14 | rv.append(filename[:-3]) 15 | rv.sort() 16 | 17 | return rv 18 | 19 | def get_command(self, ctx, name): 20 | """Dynamically get the command.""" 21 | ns = {} 22 | fn = os.path.join(os.path.dirname(__file__), name + ".py") 23 | if not os.path.exists(fn): 24 | return None 25 | with open(fn) as f: 26 | code = compile(f.read(), fn, "exec") 27 | eval(code, ns, ns) 28 | 29 | return ns["cli"] 30 | -------------------------------------------------------------------------------- /datasetinsights/commands/convert.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | 5 | import datasetinsights.constants as const 6 | from datasetinsights.datasets.transformers import get_dataset_transformer 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @click.command(context_settings=const.CONTEXT_SETTINGS) 12 | @click.option( 13 | "-i", 14 | "--input", 15 | type=click.Path(exists=True, file_okay=False), 16 | required=True, 17 | help="Directory of the Synthetic dataset.", 18 | ) 19 | @click.option( 20 | "-o", 21 | "--output", 22 | type=click.Path(file_okay=False, writable=True), 23 | required=True, 24 | help="Directory of the converted dataset.", 25 | ) 26 | @click.option( 27 | "-f", 28 | "--format", 29 | required=True, 30 | help=( 31 | "The output dataset format. " 32 | "Currently only 'COCO-Instances' and 'COCO-Keypoints' is supported." 33 | ), 34 | ) 35 | def cli(input, output, format): 36 | """Convert dataset from Perception format to target format.""" 37 | ctx = click.get_current_context() 38 | logger.debug(f"Called convert command with parameters: {ctx.params}") 39 | 40 | transformer = get_dataset_transformer(format=format, data_root=input) 41 | transformer.execute(output=output) 42 | -------------------------------------------------------------------------------- /datasetinsights/commands/download.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | 4 | import click 5 | 6 | import datasetinsights.constants as const 7 | from datasetinsights.io.downloader.base import create_dataset_downloader 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class SourceURI(click.ParamType): 13 | """Represents the Source URI Parameter type. 14 | 15 | This extends click.ParamType that allows click framework to validates 16 | supported source URI according to the prefix pattern. 17 | 18 | Raises: 19 | click.BadParameter: if the validation failed. 20 | """ 21 | 22 | name = "source_uri" 23 | PREFIX_PATTERN = r"^gs://|^http(s)?://|^usim://" 24 | 25 | def convert(self, value, param, ctx): 26 | """Validate source URI and Converts the value.""" 27 | match = re.search(self.PREFIX_PATTERN, value) 28 | if not match: 29 | message = ( 30 | f"The source uri {value} is not supported. " 31 | f"Pattern: {self.PREFIX_PATTERN}" 32 | ) 33 | self.fail(message, param, ctx) 34 | 35 | return value 36 | 37 | 38 | @click.command( 39 | context_settings=const.CONTEXT_SETTINGS, 40 | ) 41 | @click.option( 42 | "-s", 43 | "--source-uri", 44 | type=SourceURI(), 45 | required=True, 46 | help=( 47 | "URI of where this data should be downloaded. " 48 | f"Supported source uri patterns {SourceURI.PREFIX_PATTERN}" 49 | ), 50 | ) 51 | @click.option( 52 | "-o", 53 | "--output", 54 | type=click.Path(exists=True, file_okay=False, writable=True), 55 | default=const.DEFAULT_DATA_ROOT, 56 | help="Directory on localhost where datasets should be downloaded.", 57 | ) 58 | @click.option( 59 | "-b", 60 | "--include-binary", 61 | is_flag=True, 62 | default=False, 63 | help=( 64 | "Whether to download binary files such as images or LIDAR point " 65 | "clouds. This flag applies to Datasets where metadata " 66 | "(e.g. annotation json, dataset catalog, ...) can be separated from " 67 | "binary files." 68 | ), 69 | ) 70 | @click.option( 71 | "--access-token", 72 | type=str, 73 | default=None, 74 | help="Unity Simulation access token. " 75 | "This will override synthetic datasets source-uri for Unity Simulation", 76 | ) 77 | @click.option( 78 | "--checksum-file", 79 | type=str, 80 | default=None, 81 | help="Dataset checksum text file path. " 82 | "Path can be a HTTP(S) url or a local file path. This will help check the " 83 | "integrity of the downloaded dataset.", 84 | ) 85 | def cli( 86 | source_uri, 87 | output, 88 | include_binary, 89 | access_token, 90 | checksum_file, 91 | ): 92 | """Download datasets to localhost from known locations. 93 | 94 | The download command can support downloading from 3 types of sources 95 | 96 | 1. Download from Unity Simulation: 97 | 98 | You can specify project_id, run_execution_id, access_token in source-uri: 99 | 100 | \b 101 | datasetinsights download \\ 102 | --source-uri=usim://@/ \\ 103 | --output=$HOME/data 104 | 105 | Alternatively, you can also override access_token such as: 106 | 107 | \b 108 | datasetinsights download \\ 109 | --source-uri=usim:/// \\ 110 | --output=$HOME/data \\ 111 | --access-token= 112 | 113 | 2. Downloading from a public http(s) url: 114 | 115 | \b 116 | datasetinsights download \\ 117 | --source-uri=http://url/to/file.zip \\ 118 | --output=$HOME/data 119 | 120 | 3. Downloading from a GCS url: 121 | 122 | \b 123 | datasetinsights download \\ 124 | --source-uri=gs://url/to/file.zip \\ 125 | --output=$HOME/data 126 | 127 | or download all objects under the same directory: 128 | 129 | \b 130 | datasetinsights download \\ 131 | --source-uri=gs://url/to/directory \\ 132 | --output=$HOME/data 133 | """ 134 | ctx = click.get_current_context() 135 | logger.debug(f"Called download command with parameters: {ctx.params}") 136 | 137 | downloader = create_dataset_downloader( 138 | source_uri=source_uri, access_token=access_token 139 | ) 140 | downloader.download( 141 | source_uri=source_uri, 142 | output=output, 143 | include_binary=include_binary, 144 | checksum_file=checksum_file, 145 | ) 146 | -------------------------------------------------------------------------------- /datasetinsights/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_DATA_ROOT = "/data" 2 | 3 | # Default perception schema version 4 | DEFAULT_PERCEPTION_VERSION = "0.0.1" 5 | 6 | 7 | # Default Timing text for codetiming.Timer decorator 8 | TIMING_TEXT = "[{name}] elapsed time: {:0.4f} seconds." 9 | 10 | # Click CLI context settings 11 | CONTEXT_SETTINGS = { 12 | "help_option_names": ["-h", "--help"], 13 | "show_default": True, 14 | "ignore_unknown_options": True, 15 | "allow_extra_args": True, 16 | } 17 | -------------------------------------------------------------------------------- /datasetinsights/dashboard.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | import dash_core_components as dcc 6 | import dash_html_components as html 7 | from dash.dependencies import Input, Output 8 | 9 | import datasetinsights.stats.visualization.overview as overview 10 | from datasetinsights.stats.visualization.app import get_app 11 | from datasetinsights.stats.visualization.object_detection import ( 12 | render_object_detection_layout, 13 | ) 14 | 15 | app = get_app() 16 | 17 | 18 | def main_layout(): 19 | """Method for generating main app layout. 20 | 21 | Returns: 22 | html layout: main layout design with tabs for overview statistics 23 | and object detection. 24 | """ 25 | app_layout = html.Div( 26 | [ 27 | html.H1( 28 | children="Dataset Insights", 29 | style={ 30 | "textAlign": "center", 31 | "padding": 20, 32 | "background": "lightgrey", 33 | }, 34 | ), 35 | html.Div( 36 | [ 37 | dcc.Tabs( 38 | id="page_tabs", 39 | value="dataset_overview", 40 | children=[ 41 | dcc.Tab( 42 | label="Overview", 43 | value="dataset_overview", 44 | ), 45 | dcc.Tab( 46 | label="Object Detection", 47 | value="object_detection", 48 | ), 49 | ], 50 | ), 51 | html.Div(id="main_page_tabs"), 52 | ] 53 | ), 54 | # Sharing data between callbacks using hidden division. 55 | # These hidden dcc and html components are for storing data-root 56 | # into the division. This is further used in callbacks made in the 57 | # object_detection module. This is a temporary hack and can be found 58 | # in example 1 of sharing data between callback dash tutorial. 59 | # ref: https://dash.plotly.com/sharing-data-between-callbacks 60 | # TODO: Fix this using a better solution to share data. 61 | dcc.Dropdown(id="dropdown", style={"display": "none"}), 62 | html.Div(id="data_root_value", style={"display": "none"}), 63 | ] 64 | ) 65 | return app_layout 66 | 67 | 68 | @app.callback( 69 | Output("data_root_value", "children"), [Input("dropdown", "value")] 70 | ) 71 | def store_data_root(value): 72 | """Method for storing data-root value in a hidden division. 73 | 74 | Returns: 75 | json : data-root encoded in json to be stored in data_root_value div. 76 | """ 77 | json_data_root = json.dumps(data_root) 78 | 79 | return json_data_root 80 | 81 | 82 | @app.callback( 83 | Output("main_page_tabs", "children"), 84 | [Input("page_tabs", "value"), Input("data_root_value", "children")], 85 | ) 86 | def render_content(value, json_data_root): 87 | """Method for rendering dashboard layout based 88 | on the selected tab value. 89 | 90 | Args: 91 | value(str): selected tab value 92 | json_data_root: data root stored in hidden div in json format. 93 | 94 | Returns: 95 | html layout: layout for the selected tab. 96 | """ 97 | # read data root value from the data_root_value division 98 | data_root = json.loads(json_data_root) 99 | if value == "dataset_overview": 100 | return overview.html_overview(data_root) 101 | elif value == "object_detection": 102 | return render_object_detection_layout(data_root) 103 | 104 | 105 | def check_path(path): 106 | """Method for checking if the given data-root path is valid or not.""" 107 | if os.path.isdir(path): 108 | return path 109 | else: 110 | raise ValueError(f"Path {path} not found") 111 | 112 | 113 | if __name__ == "__main__": 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument("--data-root", help="Path to the data root") 116 | args = parser.parse_args() 117 | data_root = check_path(args.data_root) 118 | app.layout = main_layout() 119 | app.run_server(debug=True) 120 | -------------------------------------------------------------------------------- /datasetinsights/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/datasetinsights/datasets/__init__.py -------------------------------------------------------------------------------- /datasetinsights/datasets/exceptions.py: -------------------------------------------------------------------------------- 1 | class DatasetNotFoundError(Exception): 2 | """Raise when a dataset file can't be found.""" 3 | -------------------------------------------------------------------------------- /datasetinsights/datasets/synthetic.py: -------------------------------------------------------------------------------- 1 | """ Simulation Dataset Catalog 2 | """ 3 | 4 | 5 | import logging 6 | 7 | from pyquaternion import Quaternion 8 | 9 | from datasetinsights.io.bbox import BBox2D, BBox3D 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def read_bounding_box_3d(annotation, label_mappings=None): 15 | """Convert dictionary representations of 3d bounding boxes into objects 16 | of the BBox3d class 17 | 18 | Args: 19 | annotation (List[dict]): 3D bounding box annotation 20 | label_mappings (dict): a dict of {label_id: label_name} mapping 21 | 22 | Returns: 23 | A list of 3d bounding box objects 24 | """ 25 | 26 | bboxes = [] 27 | 28 | for b in annotation: 29 | label_id = b["label_id"] 30 | translation = ( 31 | b["translation"]["x"], 32 | b["translation"]["y"], 33 | b["translation"]["z"], 34 | ) 35 | size = (b["size"]["x"], b["size"]["y"], b["size"]["z"]) 36 | rotation = b["rotation"] 37 | rotation = Quaternion( 38 | x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"] 39 | ) 40 | 41 | if label_mappings and label_id not in label_mappings: 42 | continue 43 | box = BBox3D( 44 | translation=translation, 45 | size=size, 46 | label=label_id, 47 | sample_token=0, 48 | score=1, 49 | rotation=rotation, 50 | ) 51 | bboxes.append(box) 52 | 53 | return bboxes 54 | 55 | 56 | def read_bounding_box_2d(annotation, label_mappings=None): 57 | """Convert dictionary representations of 2d bounding boxes into objects 58 | of the BBox2D class 59 | 60 | Args: 61 | annotation (List[dict]): 2D bounding box annotation 62 | label_mappings (dict): a dict of {label_id: label_name} mapping 63 | 64 | Returns: 65 | A list of 2D bounding box objects 66 | """ 67 | bboxes = [] 68 | for b in annotation: 69 | label_id = b["label_id"] 70 | x = b["x"] 71 | y = b["y"] 72 | w = b["width"] 73 | h = b["height"] 74 | if label_mappings and label_id not in label_mappings: 75 | continue 76 | box = BBox2D(label=label_id, x=x, y=y, w=w, h=h) 77 | bboxes.append(box) 78 | 79 | return bboxes 80 | -------------------------------------------------------------------------------- /datasetinsights/datasets/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import get_dataset_transformer 2 | from .coco import COCOInstancesTransformer, COCOKeypointsTransformer 3 | 4 | __all__ = [ 5 | "COCOInstancesTransformer", 6 | "COCOKeypointsTransformer", 7 | "get_dataset_transformer", 8 | ] 9 | -------------------------------------------------------------------------------- /datasetinsights/datasets/transformers/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | def get_dataset_transformer(format, **kwargs): 5 | """ 6 | Returns instantiated transformer object based on the provided conversion 7 | format from a registry. 8 | 9 | Args: 10 | format (str): Conversion format to be used for dataset transformation. 11 | 12 | Returns: Transformer object instance. 13 | 14 | """ 15 | if format in DatasetTransformer.REGISTRY.keys(): 16 | transformer = DatasetTransformer.REGISTRY[format] 17 | else: 18 | raise ValueError( 19 | f"Transformer not found for conversion format '{format}'" 20 | ) 21 | 22 | return transformer(**kwargs) 23 | 24 | 25 | class DatasetTransformer(ABC): 26 | """Base class for all dataset transformer.""" 27 | 28 | REGISTRY = {} 29 | 30 | @classmethod 31 | def __init_subclass__(cls, format=None, **kwargs): 32 | if format: 33 | cls.REGISTRY[format] = cls 34 | else: 35 | raise NotImplementedError( 36 | f"Subclass needs to have class keyword argument named " 37 | f"transformer." 38 | ) 39 | super().__init_subclass__(**kwargs) 40 | 41 | @abstractmethod 42 | def execute(self, output, **kwargs): 43 | raise NotImplementedError("Subclass needs to implement this method") 44 | -------------------------------------------------------------------------------- /datasetinsights/datasets/unity_perception/__init__.py: -------------------------------------------------------------------------------- 1 | from .captures import Captures 2 | from .metrics import Metrics 3 | from .references import AnnotationDefinitions, Egos, MetricDefinitions, Sensors 4 | 5 | __all__ = [ 6 | "AnnotationDefinitions", 7 | "Captures", 8 | "Egos", 9 | "Metrics", 10 | "MetricDefinitions", 11 | "Sensors", 12 | ] 13 | -------------------------------------------------------------------------------- /datasetinsights/datasets/unity_perception/exceptions.py: -------------------------------------------------------------------------------- 1 | class DefinitionIDError(Exception): 2 | """Raise when a given definition id can't be found.""" 3 | -------------------------------------------------------------------------------- /datasetinsights/datasets/unity_perception/metrics.py: -------------------------------------------------------------------------------- 1 | """Load Synthetic dataset Metrics 2 | """ 3 | import json 4 | 5 | import dask.bag as db 6 | 7 | from datasetinsights.constants import DEFAULT_DATA_ROOT 8 | 9 | from .exceptions import DefinitionIDError 10 | from .tables import DATASET_TABLES, SCHEMA_VERSION, glob 11 | from .validation import verify_version 12 | 13 | 14 | class Metrics: 15 | """Load metrics table 16 | 17 | Metrics store extra metadata that can be used to describe a particular 18 | sequence, capture or annotation. Metric records are stored as arbitrary 19 | number (M) of key-value pairs. 20 | For more detail, see schema design doc: 21 | :ref:`metrics` 22 | 23 | Attributes: 24 | metrics (dask.bag.core.Bag): a collection of metrics records 25 | Examples: 26 | >>> metrics = Metrics(data_root="/data") 27 | >>> metrics_df = metrics.filter_metrics(def_id="my_definition_id") 28 | #metrics_df now contains all the metrics data corresponding to 29 | "my_definition_id" 30 | 31 | One example of metrics_df (first row shown below): 32 | 33 | +---------------+------------------+---------------------+ 34 | | label_id(int) | instance_id(int) | visible_pixels(int) | 35 | +===============+==================+=====================+ 36 | | 2 | 2 | 2231 | 37 | +---------------+------------------+---------------------+ 38 | 39 | """ 40 | 41 | TABLE_NAME = "metrics" 42 | FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file 43 | 44 | def __init__(self, data_root=DEFAULT_DATA_ROOT, version=SCHEMA_VERSION): 45 | """Initialize Metrics 46 | 47 | Args: 48 | data_root (str): the root directory of the dataset containing 49 | metrics 50 | version (str): desired schema version 51 | """ 52 | self.metrics = self._load_metrics(data_root, version) 53 | 54 | def _load_metrics(self, data_root, version): 55 | """ 56 | `:ref:`metrics` 57 | 58 | 59 | Args: 60 | data_root: (str): the root directory of the dataset containing 61 | metrics 62 | version (str): desired schema version 63 | 64 | Returns: 65 | dask.bag.core.Bag 66 | """ 67 | metrics_files = db.from_sequence(glob(data_root, self.FILE_PATTERN)) 68 | metrics = metrics_files.map( 69 | lambda path: Metrics._load_json(path, self.TABLE_NAME, version) 70 | ).flatten() 71 | 72 | return metrics 73 | 74 | @staticmethod 75 | def _normalize_values(metric): 76 | """Filter unnecessary info from metric. 77 | 1-level faltten of metrics.values column. 78 | """ 79 | values = metric["values"] 80 | for value in values: 81 | value["capture_id"] = metric["capture_id"] 82 | value["annotation_id"] = metric["annotation_id"] 83 | value["sequence_id"] = metric["sequence_id"] 84 | value["step"] = metric["step"] 85 | 86 | return values 87 | 88 | def filter_metrics(self, def_id): 89 | """Get all metrics filtered by a given metric definition id 90 | 91 | Args: 92 | def_id (str): metric definition id used to filter results 93 | Raises: 94 | DefinitionIDError: raised if no metrics records match the given 95 | def_id 96 | Returns (pd.DataFrame): 97 | Columns: "label_id", "capture_id", "annotation_id", "sequence_id", 98 | "step" 99 | """ 100 | metrics = ( 101 | self.metrics.filter( 102 | lambda metric: metric["metric_definition"] == def_id 103 | ) 104 | .map(Metrics._normalize_values) 105 | .flatten() 106 | ) 107 | if metrics.count().compute() == 0: 108 | msg = ( 109 | f"Can't find metrics records associated with the given " 110 | f"definition id {def_id}." 111 | ) 112 | raise DefinitionIDError(msg) 113 | 114 | return metrics.to_dataframe().compute() 115 | 116 | @staticmethod 117 | def _load_json(filename, table_name, version): 118 | """Load records from json files into a dict""" 119 | with open(filename, "r", encoding="utf8") as file: 120 | data = json.load(file) 121 | verify_version(data, version) 122 | 123 | return data[table_name] 124 | -------------------------------------------------------------------------------- /datasetinsights/datasets/unity_perception/tables.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import pathlib 4 | from collections import namedtuple 5 | from enum import Enum 6 | 7 | import pandas as pd 8 | 9 | from .validation import verify_version 10 | 11 | logger = logging.getLogger(__name__) 12 | SCHEMA_VERSION = "0.0.1" # Synthetic dataset schema version 13 | 14 | 15 | class FileType(Enum): 16 | BINARY = "binary" 17 | REFERENCE = "reference" 18 | METRIC = "metric" 19 | CAPTURE = "capture" 20 | 21 | 22 | Table = namedtuple("Table", "file pattern filetype") 23 | DATASET_TABLES = { 24 | "annotation_definitions": Table( 25 | "**/annotation_definitions.json", 26 | r"(?:\w|-|/)*annotation_definitions.json", 27 | FileType.REFERENCE, 28 | ), 29 | "captures": Table( 30 | "**/captures_*.json", 31 | r"(?:\w|-|/)*captures_[0-9]+.json", 32 | FileType.CAPTURE, 33 | ), 34 | "egos": Table("**/egos.json", r"(?:\w|-|/)*egos.json", FileType.REFERENCE), 35 | "metric_definitions": Table( 36 | "**/metric_definitions.json", 37 | r"(?:\w|-|/)*metric_definitions.json", 38 | FileType.REFERENCE, 39 | ), 40 | "metrics": Table( 41 | "**/metrics_*.json", r"(?:\w|-|/)*metrics_[0-9]+.json", FileType.METRIC 42 | ), 43 | "sensors": Table( 44 | "**/sensors.json", r"(?:\w|-|/)*sensors.json", FileType.REFERENCE 45 | ), 46 | } 47 | 48 | 49 | def glob(data_root, pattern): 50 | """Find all matching files in a directory. 51 | 52 | Args: 53 | data_root (str): directory containing capture files 54 | pattern (str): Unix file pattern 55 | 56 | Yields: 57 | str: matched filenames in a directory 58 | """ 59 | path = pathlib.Path(data_root) 60 | for fp in path.glob(pattern): 61 | yield fp 62 | 63 | 64 | def load_table(json_file, table_name, version, **kwargs): 65 | """Load records from json files into a pandas table 66 | 67 | Args: 68 | json_file (str): filename to json. 69 | table_name (str): table name in the json file to be loaded 70 | version (str): requested version of this table 71 | **kwargs: arbitrary keyword arguments to be passed to pandas' 72 | json_normalize method. 73 | 74 | Returns: 75 | a pandas dataframe of the loaded table. 76 | 77 | Raises: 78 | VersionError: If the version in json file does not match the requested 79 | version. 80 | """ 81 | logger.debug(f"Loading table {table_name} from {json_file}") 82 | data = json.load(open(json_file, "r", encoding="utf8")) 83 | verify_version(data, version) 84 | table = pd.json_normalize(data[table_name], **kwargs) 85 | 86 | return table 87 | -------------------------------------------------------------------------------- /datasetinsights/datasets/unity_perception/validation.py: -------------------------------------------------------------------------------- 1 | """ Validate Simulation Data 2 | """ 3 | 4 | 5 | class VersionError(Exception): 6 | """Raise when the data file version does not match""" 7 | 8 | pass 9 | 10 | 11 | class DuplicateRecordError(Exception): 12 | """Raise when the definition file has duplicate definition id""" 13 | 14 | pass 15 | 16 | 17 | class NoRecordError(Exception): 18 | """Raise when no record is found matching a given definition id""" 19 | 20 | pass 21 | 22 | 23 | def verify_version(json_data, version): 24 | """Verify json schema version 25 | 26 | Args: 27 | json_data (json): a json object loaded from file. 28 | version (str): string of the requested version. 29 | 30 | Raises: 31 | VersionError: If the version in json file does not match the requested 32 | version. 33 | """ 34 | loaded = json_data["version"] 35 | if loaded != version: 36 | raise VersionError(f"Version mismatch. Expected version: {version}") 37 | 38 | 39 | def check_duplicate_records(table, column, table_name): 40 | """Check if table has duplicate records for a given column 41 | 42 | Args: 43 | table (pd.DataFrame): a pandas dataframe 44 | column (str): the column where no duplication is allowed 45 | table_name (str): table name 46 | 47 | Raises: 48 | DuplicateRecordError: If duplicate records are found in a column 49 | """ 50 | if table[column].nunique() != len(table): 51 | raise DuplicateRecordError( 52 | f"Duplicate record was found in {column} of table {table_name}. " 53 | f"This column is expected to be unique. Violating this requirement " 54 | f"might cause ambiguity when the records are loaded." 55 | ) 56 | -------------------------------------------------------------------------------- /datasetinsights/io/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox import BBox2D 2 | from .downloader import create_dataset_downloader 3 | 4 | __all__ = [ 5 | "BBox2D", 6 | "create_dataset_downloader", 7 | ] 8 | -------------------------------------------------------------------------------- /datasetinsights/io/download.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import logging 3 | import os 4 | import re 5 | import tempfile 6 | import zlib 7 | from pathlib import Path 8 | 9 | import requests 10 | from requests.adapters import HTTPAdapter 11 | from requests.packages.urllib3.util.retry import Retry 12 | 13 | from .exceptions import ChecksumError, DownloadError 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | # Timeout of requests (in seconds) 18 | DEFAULT_TIMEOUT = 1800 19 | # Retry after failed request 20 | DEFAULT_MAX_RETRIES = 5 21 | 22 | 23 | class TimeoutHTTPAdapter(HTTPAdapter): 24 | def __init__(self, timeout, *args, **kwargs): 25 | self.timeout = timeout 26 | super().__init__(*args, **kwargs) 27 | 28 | def send(self, request, **kwargs): 29 | kwargs["timeout"] = self.timeout 30 | return super().send(request, **kwargs) 31 | 32 | 33 | def download_file(source_uri: str, dest_path: str, file_name: str = None): 34 | """Download a file specified from a source uri 35 | 36 | Args: 37 | source_uri (str): source url where the file should be downloaded 38 | dest_path (str): destination path of the file 39 | file_name (str): file name of the file to be downloaded 40 | 41 | Returns: 42 | String of destination path. 43 | """ 44 | logger.debug(f"Trying to download file from {source_uri} -> {dest_path}") 45 | adapter = TimeoutHTTPAdapter( 46 | timeout=DEFAULT_TIMEOUT, max_retries=Retry(total=DEFAULT_MAX_RETRIES) 47 | ) 48 | with requests.Session() as http: 49 | http.mount("https://", adapter) 50 | try: 51 | response = http.get(source_uri) 52 | response.raise_for_status() 53 | except requests.exceptions.RequestException as ex: 54 | logger.error(ex) 55 | err_msg = ( 56 | f"The request download from {source_uri} -> {dest_path} can't " 57 | f"be completed." 58 | ) 59 | 60 | raise DownloadError(err_msg) 61 | else: 62 | dest_path = Path(dest_path) 63 | if not file_name: 64 | file_name = _parse_filename(response, source_uri) 65 | dest_path = dest_path / file_name 66 | dest_path.parent.mkdir(parents=True, exist_ok=True) 67 | with open(dest_path, "wb") as f: 68 | f.write(response.content) 69 | 70 | return dest_path 71 | 72 | 73 | def checksum_matches(filepath, expected_checksum, algorithm="CRC32"): 74 | """Check if the checksum matches 75 | 76 | Args: 77 | filepath (str): the doaloaded file path 78 | expected_checksum (int): expected checksum of the file 79 | algorithm (str): checksum algorithm. Defaults to CRC32 80 | 81 | Returns: 82 | True if the file checksum matches. 83 | """ 84 | computed = compute_checksum(filepath, algorithm) 85 | return computed == expected_checksum 86 | 87 | 88 | def validate_checksum(filepath, expected_checksum, algorithm="CRC32"): 89 | """Validate checksum of the downloaded file. 90 | 91 | Args: 92 | filepath (str): the doaloaded file path 93 | expected_checksum (int): expected checksum of the file 94 | algorithm (str): checksum algorithm. Defaults to CRC32 95 | 96 | Raises: 97 | ChecksumError if the file checksum does not match. 98 | """ 99 | if not checksum_matches(filepath, expected_checksum, algorithm): 100 | raise ChecksumError 101 | 102 | 103 | def compute_checksum(filepath, algorithm="CRC32"): 104 | """Compute the checksum of a file. 105 | 106 | Args: 107 | filepath (str): the doaloaded file path 108 | algorithm (str): checksum algorithm. Defaults to CRC32 109 | 110 | Returns: 111 | int: the checksum value 112 | """ 113 | if algorithm == "CRC32": 114 | chs = _crc32_checksum(filepath) 115 | elif algorithm == "MD5": 116 | chs = _md5_checksum(filepath) 117 | else: 118 | raise ValueError("Unsupported checksum algorithm!") 119 | 120 | return chs 121 | 122 | 123 | def _crc32_checksum(filepath): 124 | """Calculate the checksum of a file using CRC32.""" 125 | with open(filepath, "rb") as f: 126 | checksum = zlib.crc32(f.read()) 127 | 128 | return checksum 129 | 130 | 131 | def _md5_checksum(filename): 132 | """Calculate the checksum of a file using MD5.""" 133 | md5 = hashlib.md5() 134 | with open(filename, "rb") as f: 135 | for chunk in iter(lambda: f.read(4096), b""): 136 | md5.update(chunk) 137 | return md5.hexdigest() 138 | 139 | 140 | def get_checksum_from_file(filepath): 141 | """This method return checksum of the file whose filepath is given. 142 | 143 | Args: 144 | filepath (str): Path of the checksum file. 145 | Path can be HTTP(s) url or local path. 146 | 147 | Raises: 148 | ValueError: Raises this error if filepath is not local or not 149 | HTTP or HTTPS url. 150 | 151 | """ 152 | 153 | if filepath.startswith(("http://", "https://")): 154 | with tempfile.TemporaryDirectory() as tmp: 155 | checksum_file_path = os.path.join(tmp, "checksum.txt") 156 | file_path = download_file( 157 | source_uri=filepath, dest_path=checksum_file_path 158 | ) 159 | return _read_checksum_from_txt(file_path) 160 | 161 | elif os.path.isfile(filepath): 162 | return _read_checksum_from_txt(filepath) 163 | 164 | else: 165 | raise ValueError(f"Can not get checksum from path: {filepath}") 166 | 167 | 168 | def _read_checksum_from_txt(filepath): 169 | """This method reads checksum from a txt file and returns it. 170 | 171 | Args: 172 | filepath (str): Local filepath of the checksum file. 173 | 174 | Returns: 175 | str: checksum value from the checksum file. 176 | 177 | """ 178 | with open(filepath) as file: 179 | checksum = file.read() 180 | return checksum 181 | 182 | 183 | def _parse_filename(response, uri): 184 | file_name = _get_filename_from_response(response) 185 | if file_name is None: 186 | file_name = _get_file_name_from_uri(uri) 187 | return file_name 188 | 189 | 190 | def _get_filename_from_response(response): 191 | """Gets filename from requests response object 192 | 193 | Args: 194 | response: requests.Response() object that contains the server's 195 | response to the HTTP request. 196 | 197 | Returns: 198 | filename (str): Name of the file to be downloaded 199 | """ 200 | cd = response.headers.get("content-disposition") 201 | if not cd: 202 | return None 203 | file_name = re.findall("filename=(.+)", cd) 204 | if len(file_name) == 0: 205 | return None 206 | return file_name[0] 207 | 208 | 209 | def _get_file_name_from_uri(uri): 210 | """Gets filename from URI 211 | 212 | Args: 213 | uri (str): URI 214 | 215 | """ 216 | return uri.split("/")[-1] 217 | -------------------------------------------------------------------------------- /datasetinsights/io/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import create_dataset_downloader 2 | from .gcs_downloader import GCSDatasetDownloader 3 | from .http_downloader import HTTPDatasetDownloader 4 | 5 | __all__ = [ 6 | "HTTPDatasetDownloader", 7 | "create_dataset_downloader", 8 | "GCSDatasetDownloader", 9 | ] 10 | -------------------------------------------------------------------------------- /datasetinsights/io/downloader/base.py: -------------------------------------------------------------------------------- 1 | import re 2 | from abc import ABC, abstractmethod 3 | 4 | _registry = {} 5 | 6 | 7 | def _find_downloader(source_uri): 8 | """ 9 | This function returns the correct DatasetDownloader 10 | from a registry based on the source-uri provided 11 | 12 | Args: 13 | source_uri: URI of where this data should be downloaded. 14 | 15 | Returns: The dataset downloader class that is registered with the 16 | source-uri protocol. 17 | 18 | """ 19 | protocols = "|".join(_registry.keys()) 20 | pattern = re.compile(f"({protocols})") 21 | 22 | protocol = pattern.findall(source_uri) 23 | 24 | if source_uri.startswith(("https://", "http://")): 25 | protocol = "http://" 26 | elif protocol: 27 | protocol = protocol[0] 28 | else: 29 | raise ValueError(f"Downloader not found for source-uri '{source_uri}'") 30 | 31 | return _registry.get(protocol) 32 | 33 | 34 | def create_dataset_downloader(source_uri, **kwargs): 35 | """ 36 | This function instantiates the dataset downloader 37 | after finding it with the source-uri provided 38 | 39 | Args: 40 | source_uri: URI used to look up the correct dataset downloader 41 | **kwargs: 42 | 43 | Returns: The dataset downloader instance matching the source-uri. 44 | 45 | """ 46 | downloader_class = _find_downloader(source_uri=source_uri) 47 | return downloader_class(**kwargs) 48 | 49 | 50 | class DatasetDownloader(ABC): 51 | """This is the base class for all dataset downloaders 52 | The DatasetDownloader can be subclasses in the following way 53 | 54 | class NewDatasetDownloader(DatasetDownloader, protocol="protocol://") 55 | 56 | Here the 'protocol://' should match the prefix that the method download 57 | source_uri supports. Example http:// gs:// 58 | 59 | """ 60 | 61 | def __init__(self, **kwargs): 62 | pass 63 | 64 | @classmethod 65 | def __init_subclass__(cls, protocol=None, **kwargs): 66 | if protocol: 67 | _registry[protocol] = cls 68 | else: 69 | raise NotImplementedError( 70 | f"Subclass needs to have class keyword argument named protocol." 71 | ) 72 | super().__init_subclass__(**kwargs) 73 | 74 | @abstractmethod 75 | def download(self, source_uri, output, **kwargs): 76 | """This method downloads a dataset stored at the source_uri and stores it 77 | in the output directory 78 | 79 | Args: 80 | source_uri: URI that points to the dataset that should be downloaded 81 | output: path to local folder where the dataset should be stored 82 | """ 83 | raise NotImplementedError("Subclass needs to implement this method") 84 | -------------------------------------------------------------------------------- /datasetinsights/io/downloader/gcs_downloader.py: -------------------------------------------------------------------------------- 1 | from datasetinsights.io.downloader.base import DatasetDownloader 2 | from datasetinsights.io.gcs import GCSClient 3 | 4 | 5 | class GCSDatasetDownloader(DatasetDownloader, protocol="gs://"): 6 | """This class is used to download data from GCS""" 7 | 8 | def __init__(self, **kwargs): 9 | """initiating GCSDownloader""" 10 | self.client = GCSClient() 11 | 12 | def download(self, source_uri=None, output=None, **kwargs): 13 | """ 14 | 15 | Args: 16 | source_uri: This is the downloader-uri that indicates where on 17 | GCS the dataset should be downloaded from. 18 | The expected source-uri follows these patterns 19 | gs://bucket/folder or gs://bucket/folder/data.zip 20 | 21 | output: This is the path to the directory 22 | where the download will store the dataset. 23 | """ 24 | self.client.download(local_path=output, url=source_uri) 25 | -------------------------------------------------------------------------------- /datasetinsights/io/downloader/http_downloader.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from datasetinsights.io.download import ( 5 | download_file, 6 | get_checksum_from_file, 7 | validate_checksum, 8 | ) 9 | from datasetinsights.io.downloader.base import DatasetDownloader 10 | from datasetinsights.io.exceptions import ChecksumError 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class HTTPDatasetDownloader(DatasetDownloader, protocol="http://"): 16 | """This class is used to download data from any HTTP or HTTPS public url 17 | and perform function such as downloading the dataset and checksum 18 | validation if checksum file path is provided. 19 | """ 20 | 21 | def download(self, source_uri, output, checksum_file=None, **kwargs): 22 | """This method is used to download the dataset from HTTP or HTTPS url. 23 | 24 | Args: 25 | source_uri (str): This is the downloader-uri that indicates where 26 | the dataset should be downloaded from. 27 | 28 | output (str): This is the path to the directory where the download 29 | will store the dataset. 30 | 31 | checksum_file (str): This is path of the txt file that contains 32 | checksum of the dataset to be downloaded. It 33 | can be HTTP or HTTPS url or local path. 34 | 35 | Raises: 36 | ChecksumError: This will raise this error if checksum doesn't 37 | matches 38 | 39 | """ 40 | dataset_path = download_file(source_uri, output) 41 | 42 | if checksum_file: 43 | logger.debug("Reading checksum from checksum file.") 44 | checksum = get_checksum_from_file(checksum_file) 45 | try: 46 | logger.debug("Validating checksum!!") 47 | validate_checksum(dataset_path, int(checksum)) 48 | except ChecksumError as e: 49 | logger.info("Checksum mismatch. Deleting the downloaded file.") 50 | os.remove(dataset_path) 51 | raise e 52 | -------------------------------------------------------------------------------- /datasetinsights/io/exceptions.py: -------------------------------------------------------------------------------- 1 | class DownloadError(Exception): 2 | """Raise when download file failed.""" 3 | 4 | 5 | class ChecksumError(Exception): 6 | """Raises when the downloaded file checksum is not correct.""" 7 | 8 | 9 | class InvalidTrackerError(Exception): 10 | """Raises when unknown tracker requested .""" 11 | -------------------------------------------------------------------------------- /datasetinsights/stats/__init__.py: -------------------------------------------------------------------------------- 1 | from datasetinsights.stats.keypoints_stats import ( 2 | get_average_skeleton, 3 | get_scale_keypoints, 4 | get_visible_keypoints_dict, 5 | ) 6 | from datasetinsights.stats.object_detection_stats import ( 7 | convert_coco_annotations_to_df, 8 | get_bbox_heatmap, 9 | get_bbox_per_img_dict, 10 | get_bbox_relative_size_list, 11 | ) 12 | 13 | from .statistics import RenderedObjectInfo 14 | from .visualization.plots import ( 15 | bar_plot, 16 | grid_plot, 17 | histogram_plot, 18 | model_performance_box_plot, 19 | model_performance_comparison_box_plot, 20 | plot_bboxes, 21 | plot_keypoints, 22 | rotation_plot, 23 | ) 24 | 25 | __all__ = [ 26 | "bar_plot", 27 | "grid_plot", 28 | "histogram_plot", 29 | "plot_bboxes", 30 | "model_performance_box_plot", 31 | "model_performance_comparison_box_plot", 32 | "rotation_plot", 33 | "RenderedObjectInfo", 34 | "plot_keypoints", 35 | "convert_coco_annotations_to_df", 36 | "get_bbox_heatmap", 37 | "get_bbox_per_img_dict", 38 | "get_bbox_relative_size_list", 39 | "get_average_skeleton", 40 | "get_scale_keypoints", 41 | "get_visible_keypoints_dict", 42 | ] 43 | -------------------------------------------------------------------------------- /datasetinsights/stats/constants.py: -------------------------------------------------------------------------------- 1 | # Human pose skeleton 2 | COCO_SKELETON = [ 3 | [16, 14], 4 | [14, 12], 5 | [17, 15], 6 | [15, 13], 7 | [12, 13], 8 | [6, 12], 9 | [7, 13], 10 | [6, 7], 11 | [6, 8], 12 | [7, 9], 13 | [8, 10], 14 | [9, 11], 15 | [2, 3], 16 | [1, 2], 17 | [1, 3], 18 | [2, 4], 19 | [3, 5], 20 | [4, 6], 21 | [5, 7], 22 | ] 23 | 24 | # Human pose keypoints 25 | COCO_KEYPOINTS = [ 26 | "nose", 27 | "left_eye", 28 | "right_eye", 29 | "left_ear", 30 | "right_ear", 31 | "left_shoulder", 32 | "right_shoulder", 33 | "left_elbow", 34 | "right_elbow", 35 | "left_wrist", 36 | "right_wrist", 37 | "left_hip", 38 | "right_hip", 39 | "left_knee", 40 | "right_knee", 41 | "left_ankle", 42 | "right_ankle", 43 | ] 44 | -------------------------------------------------------------------------------- /datasetinsights/stats/image_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | from .laplacian import ( 2 | get_bbox_fg_bg_var_laplacian, 3 | get_bbox_var_laplacian, 4 | get_final_mask, 5 | get_seg_fg_bg_var_laplacian, 6 | laplacian_img, 7 | ) 8 | from .spectral_analysis import get_average_psd_1d, get_psd1d, get_psd2d 9 | from .wavelet import get_wt_coeffs_var 10 | 11 | __all__ = [ 12 | "get_bbox_var_laplacian", 13 | "get_bbox_fg_bg_var_laplacian", 14 | "laplacian_img", 15 | "get_seg_fg_bg_var_laplacian", 16 | "get_final_mask", 17 | "get_average_psd_1d", 18 | "get_psd1d", 19 | "get_psd2d", 20 | "get_wt_coeffs_var", 21 | ] 22 | -------------------------------------------------------------------------------- /datasetinsights/stats/image_analysis/laplacian.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | 7 | def laplacian_img(img_path: str) -> np.ndarray: 8 | """ 9 | Converts image to grayscale, computes laplacian and returns it. 10 | Args: 11 | img_path (str): Path of image 12 | 13 | Returns: 14 | np.ndarray: numpy array of Laplacian of the image 15 | """ 16 | image = cv2.imread(img_path) 17 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 18 | laplacian = cv2.Laplacian(gray, cv2.CV_64F) 19 | laplacian = laplacian.astype("float") 20 | return laplacian 21 | 22 | 23 | def get_bbox_var_laplacian( 24 | laplacian: np.ndarray, x: int, y: int, w: int, h: int 25 | ) -> np.ndarray: 26 | """ 27 | Calculates bbox's variance of Laplacian 28 | Args: 29 | laplacian (np.ndarray): Laplacian of the image 30 | x (int): the upper-left coordinate of the bounding box 31 | y (int): the upper-left coordinate of the bounding box 32 | w (int): width of bbox 33 | h (int): height of bbox 34 | 35 | Returns: 36 | Variance of Laplacian of bbox 37 | """ 38 | bbox_var = laplacian[y : y + h, x : x + w] 39 | return np.nanvar(bbox_var) 40 | 41 | 42 | def get_bbox_fg_bg_var_laplacian( 43 | laplacian: np.ndarray, annotations: List[Dict] 44 | ) -> Tuple[List, np.ndarray]: 45 | """ 46 | Calculates foreground and background variance of laplacian of an image 47 | based on bounding boxes 48 | Args: 49 | laplacian (np.ndarray): Laplacian of the image 50 | annotations (List): List of dictionary of annotations containing bbox 51 | information of the given image laplacian 52 | 53 | Returns: 54 | bbox_var_lap (List): List of variance of laplacian of all bbox in the 55 | image 56 | img_var_laplacian (np.ndarray): Variance of Laplacian of background 57 | of the image 58 | 59 | """ 60 | bbox_var_lap = [] 61 | img_laplacian = laplacian 62 | 63 | for ann in annotations: 64 | x, y, w, h = ann["bbox"] 65 | bbox_area = w * h 66 | if bbox_area >= 1200: # ignoring small bbox sizes 67 | bbox_var = get_bbox_var_laplacian( 68 | img_laplacian, int(x), int(y), int(w), int(h) 69 | ) 70 | img_laplacian[int(y) : int(y + h), int(x) : int(x + w)] = np.nan 71 | bbox_var_lap.append(bbox_var) 72 | 73 | img_var_laplacian = np.nanvar(img_laplacian) 74 | 75 | return bbox_var_lap, img_var_laplacian 76 | 77 | 78 | def get_final_mask(masks: List[np.ndarray]) -> np.ndarray: 79 | """ 80 | Get one masks from multiple mask of an image 81 | Args: 82 | masks (List[np.ndarray]): List of binary masks of an image 83 | 84 | Returns: 85 | final_mask = Final binary mask representing union of all masks of an 86 | images 87 | """ 88 | final_mask = np.zeros_like(masks[0]) 89 | for mask in masks: 90 | final_mask = np.bitwise_or(final_mask, mask) 91 | return final_mask 92 | 93 | 94 | def get_seg_fg_bg_var_laplacian( 95 | laplacian: np.ndarray, final_mask: np.ndarray 96 | ) -> Tuple[np.ndarray, np.ndarray]: 97 | """ 98 | Calculates foreground and background variance of laplacian of an image 99 | based on segmentation information 100 | Args: 101 | laplacian (np.ndarray): Laplacian of the image 102 | final_mask (np.ndarray): Binary mask of the image in which 1 is 103 | instances of the image 104 | 105 | Returns: 106 | fg_var_lap = Foreground var of laplacian 107 | bg_var_lap = Background var of laplacian 108 | 109 | """ 110 | fg = np.where(final_mask == 0, laplacian, np.nan) 111 | bg = np.where(final_mask == 1, laplacian, np.nan) 112 | fg_var_lap = np.nanvar(fg) 113 | bg_var_lap = np.nanvar(bg) 114 | 115 | return fg_var_lap, bg_var_lap 116 | -------------------------------------------------------------------------------- /datasetinsights/stats/image_analysis/spectral_analysis.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | import numpy as np 4 | from PIL import Image 5 | from scipy import ndimage 6 | from tqdm import tqdm 7 | 8 | 9 | def get_psd2d(image: np.ndarray) -> np.ndarray: 10 | """ 11 | Args: 12 | image (np.ndarray): Grayscale Image 13 | 14 | Returns: 15 | np.ndarray: 2D PSD of the image 16 | """ 17 | h, w = image.shape 18 | fourier_image = np.fft.fft2(image) 19 | N = h * w * 2 20 | psd2d = (1 / N) * np.abs(fourier_image) ** 2 21 | psd2d = np.fft.fftshift(psd2d) 22 | return psd2d 23 | 24 | 25 | def get_psd1d(psd_2d: np.ndarray) -> np.ndarray: 26 | """ 27 | Args: 28 | psd_2d (np.ndarray): 2D PSD of the image 29 | 30 | Returns: 31 | np.ndarray: 1D PSD of the given 2D PSD 32 | """ 33 | h = psd_2d.shape[0] 34 | w = psd_2d.shape[1] 35 | wc = w // 2 36 | hc = h // 2 37 | 38 | # create an array of integer radial distances from the center 39 | y, x = np.ogrid[-h // 2 : h // 2, -w // 2 : w // 2] 40 | r = np.hypot(x, y).astype(int) 41 | idx = np.arange(0, min(wc, hc)) 42 | psd_1d = ndimage.sum(psd_2d, r, index=idx) 43 | return psd_1d 44 | 45 | 46 | def _load_img(img_path: str): 47 | img = Image.open(img_path) 48 | img = img.convert("RGB") 49 | img = img.convert("L") 50 | return np.array(img) 51 | 52 | 53 | def _load_images_from_dir(img_dir: str, img_type: str = "png"): 54 | image_paths = glob.glob(img_dir + f"/*.{img_type}") 55 | img_array = [] 56 | for img_path in image_paths: 57 | img = _load_img(img_path) 58 | img_array.append(img) 59 | return img_array 60 | 61 | 62 | def get_average_psd_1d(img_dir: str, img_type: str = "png"): 63 | """ 64 | Get average PSD of entire dataset. 65 | Args: 66 | img_dir (str): Path of image directory 67 | img_type (str): Image tpye (PNG, JPG, etc) 68 | 69 | Returns: 70 | avg_psd_1d (np.ndarray): Avg PSD 1D 71 | std_psd_1d (np.ndarray): Standard deviation of PSD 72 | 73 | """ 74 | images = _load_images_from_dir(img_dir, img_type) 75 | total_psd_1d = [] 76 | max_len = float("-inf") 77 | 78 | for image in tqdm(images): 79 | psd_2d = get_psd2d(image) 80 | psd_1d = get_psd1d(psd_2d) 81 | max_len = max(max_len, len(psd_1d)) 82 | total_psd_1d.append(psd_1d) 83 | 84 | for i in range(len(total_psd_1d)): 85 | if len(total_psd_1d[i]) < max_len: 86 | _len = max_len - len(total_psd_1d[i]) 87 | nan_arr = np.empty(_len) 88 | nan_arr[:] = np.nan 89 | total_psd_1d[i] = np.append(total_psd_1d[i], nan_arr) 90 | 91 | total_psd_1d = np.asarray(total_psd_1d, dtype=float) 92 | 93 | avg_psd_1d = np.nanmean(total_psd_1d, axis=0) 94 | std_psd_1d = np.nanstd(total_psd_1d, axis=0) 95 | 96 | return avg_psd_1d, std_psd_1d 97 | -------------------------------------------------------------------------------- /datasetinsights/stats/image_analysis/wavelet.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | 4 | import numpy as np 5 | import pywt 6 | from PIL import Image 7 | from tqdm import tqdm 8 | 9 | 10 | def get_wt_coeffs_var(img_dir: str, img_type: str = "png", num_img=None): 11 | """ 12 | 13 | Args: 14 | img_dir (str): Path of image directory 15 | img_type (str): Image tpye (PNG, JPG, etc) 16 | num_img (int): Number of images to use for the calculation 17 | 18 | Returns: 19 | List of variance of Horizontal, Vertical and Diagonal details 20 | 21 | """ 22 | images = glob.glob(img_dir + f"/*.{img_type}") 23 | 24 | if num_img and num_img < len(images): 25 | images = random.sample(images, num_img) 26 | 27 | horizontal_coeff, vertical_coeff, diagonal_coeff = [], [], [] 28 | 29 | for img in tqdm(images): 30 | im = Image.open(img).convert("L") 31 | _, (cH, cV, cD) = pywt.dwt2(im, "haar", mode="periodization") 32 | horizontal_coeff.append(np.array(cH).var()) 33 | vertical_coeff.append(np.array(cV).var()) 34 | diagonal_coeff.append(np.array(cD).var()) 35 | 36 | return horizontal_coeff, vertical_coeff, diagonal_coeff 37 | -------------------------------------------------------------------------------- /datasetinsights/stats/keypoints_stats.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Any, Dict, List, Tuple 3 | 4 | import numpy as np 5 | 6 | from datasetinsights.stats.constants import COCO_KEYPOINTS, COCO_SKELETON 7 | 8 | 9 | def _is_torso_visible_or_labeled(kp: List) -> bool: 10 | """ 11 | True if torso (left hip, right hip, left shoulder, 12 | right shoulder) is visible else False 13 | """ 14 | if len(kp) != 51: 15 | raise ValueError( 16 | "keypoint list doesn't fit the format of " 17 | "COCO human keypoints (17 keypoints)" 18 | ) 19 | return ( 20 | (kp[17] == 1 or kp[17] == 2) 21 | and (kp[20] == 1 or kp[20] == 2) 22 | and (kp[41] == 1 or kp[41] == 2) 23 | and (kp[38] == 1 or kp[38] == 2) 24 | ) 25 | 26 | 27 | def _get_kp_where_torso_visible(annotations: List) -> List: 28 | """ 29 | List of keypoint where torso is visible or labeled 30 | """ 31 | keypoints = [] 32 | for ann in annotations: 33 | if _is_torso_visible_or_labeled(ann): 34 | keypoints.append(ann) 35 | return keypoints 36 | 37 | 38 | def _calc_mid(p1: Tuple[Any, Any], p2: Tuple[Any, Any]): 39 | """ 40 | Calculate mid point of two points 41 | """ 42 | return (p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2 43 | 44 | 45 | def _calc_dist(p1: Tuple[Any, Any], p2: Tuple[Any, Any]) -> float: 46 | """ 47 | Calculate distance between two points 48 | """ 49 | return math.sqrt(((p1[0] - p2[0]) ** 2) + ((p1[1] - p2[1]) ** 2)) 50 | 51 | 52 | def _translate_and_scale_xy(X: np.ndarray, Y: np.ndarray): 53 | """ 54 | Return keypoints axis list X and Y after performing translation and scaling. 55 | """ 56 | left_hip, right_hip = (X[11], Y[11]), (X[12], Y[12]) 57 | left_shoulder, right_shoulder = (X[5], Y[5]), (X[6], Y[6]) 58 | 59 | # Translate all points according to mid_hip being at 0,0 60 | mid_hip = _calc_mid(right_hip, left_hip) 61 | X = np.where(X > 0.0, X - mid_hip[0], 0.0) 62 | Y = np.where(Y > 0.0, Y - mid_hip[1], 0.0) 63 | 64 | # Calculate scale factor 65 | scale = ( 66 | _calc_dist(left_shoulder, left_hip) 67 | + _calc_dist(right_shoulder, right_hip) 68 | ) / 2 69 | 70 | return X / scale, Y / scale 71 | 72 | 73 | def get_scale_keypoints(annotations: List) -> Dict: 74 | """ 75 | Process keypoints annotations to extract information for pose plots. 76 | Args: 77 | annotations (list): List of keypoints lists with format 78 | [x1, y1, v1, x2, y2, v2, ...] with the order of COCO_KEYPOINTS 79 | Returns: 80 | Dict: Processed key-value pair of keypoints name -> (x,y) list. 81 | """ 82 | keypoints = _get_kp_where_torso_visible(annotations) 83 | 84 | processed_kp_dict = {} 85 | for name in COCO_KEYPOINTS: 86 | processed_kp_dict[name] = {"x": [], "y": []} 87 | 88 | for kp in keypoints: 89 | # Separate x and y keypoints 90 | x_kp, y_kp = np.array(kp[0::3]), np.array(kp[1::3]) 91 | x_kp, y_kp = _translate_and_scale_xy(x_kp, y_kp) 92 | 93 | # save keypoints to dict 94 | idx = 0 95 | for xi, yi in zip(x_kp, y_kp): 96 | if xi == 0 and yi == 0: 97 | pass 98 | elif xi > 2.5 or xi < -2.5 or yi > 2.5 or yi < -2.5: 99 | pass 100 | else: 101 | processed_kp_dict[COCO_KEYPOINTS[idx]]["x"].append(xi) 102 | processed_kp_dict[COCO_KEYPOINTS[idx]]["y"].append(yi) 103 | idx += 1 104 | 105 | return processed_kp_dict 106 | 107 | 108 | def _get_avg_kp(kp_dict: Dict): 109 | """ 110 | Return average value of keypoints axis list X and Y. 111 | """ 112 | x_avg, y_avg = [], [] 113 | for key in COCO_KEYPOINTS: 114 | kp_x = np.array(kp_dict[key]["x"]) 115 | kp_y = np.array(kp_dict[key]["y"]) 116 | x_avg.append(np.mean(kp_x)) 117 | y_avg.append(np.mean(kp_y)) 118 | return x_avg, y_avg 119 | 120 | 121 | def get_average_skeleton(kp_dict: Dict, skeleton=COCO_SKELETON) -> List: 122 | """ 123 | return skeleton (a list of connected human joints) of 124 | average keypoints values. 125 | Args: 126 | kp_dict (dict): key-value pair of keypoints name -> (x,y) list 127 | Returns: 128 | list: list of skeleton connections. 129 | """ 130 | x, y = _get_avg_kp(kp_dict) 131 | s = [] 132 | for p1, p2 in skeleton: 133 | s.append([(x[p1 - 1], y[p1 - 1]), (x[p2 - 1], y[p2 - 1])]) 134 | return s 135 | 136 | 137 | def get_visible_keypoints_dict(keypoint_list: List) -> Dict: 138 | """ 139 | Args: 140 | keypoint_list (List): List of keypoints lists with format 141 | [x1, y1, v1, x2, y2, v2, ...] with the order of COCO_KEYPOINTS 142 | Returns: 143 | labeled_kpt_dict (Dict): Labeled keypoints dictionary where 144 | key is the keypoint and and val is the probability of that 145 | keypoint to occur in the bbox given that kp is labeled. 146 | """ 147 | total_instances = len(keypoint_list) 148 | keypoints = COCO_KEYPOINTS 149 | 150 | kp_visibility_list = np.array(keypoint_list)[:, 2::3] 151 | kp_visibility_list = np.where(kp_visibility_list == 0.0, 0.0, 1.0) 152 | 153 | labeled_kpt_dict = {} 154 | for i, key in enumerate(keypoints): 155 | labeled_kpt_dict[key] = sum(kp_visibility_list[:, i]) / total_instances 156 | 157 | return labeled_kpt_dict 158 | -------------------------------------------------------------------------------- /datasetinsights/stats/object_detection_stats.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | 8 | def convert_coco_annotations_to_df(filename: str) -> pd.DataFrame: 9 | """ 10 | Converts coco annotation file to pandas df for processing. 11 | Args: 12 | filename (str): Annotation file path 13 | Returns: 14 | coco dataframe (pd.DataFrame): dataframe of annotation info. 15 | """ 16 | coco_json = json.load(open(filename, "r")) 17 | 18 | df_image = pd.DataFrame(coco_json["images"]) 19 | df_annotation = pd.DataFrame(coco_json["annotations"]) 20 | 21 | df_coco = df_annotation.merge(df_image, left_on="image_id", right_on="id") 22 | 23 | return df_coco 24 | 25 | 26 | def get_bbox_relative_size_list(annotation_df: pd.DataFrame) -> np.ndarray: 27 | """ 28 | Args: 29 | annotation_df (pd.DataFrame): dataframe with image and 30 | bbox_annotation in each row,(columns include: width 31 | (image width), height (image height), area (bbox size)) 32 | Returns: 33 | bbox_relative_size_list (np.ndarray): List of all bbox 34 | sizes relative to its image size 35 | """ 36 | bbox_size = annotation_df["area"] 37 | image_size = annotation_df["width"] * annotation_df["height"] 38 | bbox_relative_size = np.sqrt(bbox_size / image_size) 39 | 40 | return bbox_relative_size 41 | 42 | 43 | def get_bbox_heatmap(annotation_df: pd.DataFrame) -> np.ndarray: 44 | """ 45 | Args: 46 | annotation_df (pd.DataFrame): dataframe with image 47 | and bbox_annotation in each row, (columns include: 48 | width (image width), height (image height), 49 | bbox ([top_left_x, top_left_y, width, height])) 50 | Returns: 51 | bbox_heatmap (np.ndarray): numpy array of size of 52 | the max sized image in the dataset with values describing 53 | bbox intensity over the entire dataset images 54 | at a particular pixel. 55 | """ 56 | max_width = max(annotation_df["width"]) 57 | max_height = max(annotation_df["height"]) 58 | bbox_heatmap = np.zeros([max_height, max_width, 1]) 59 | 60 | for bbox in annotation_df["bbox"]: 61 | bbox = np.array(bbox).astype(int) 62 | bbox_heatmap[ 63 | bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2], : 64 | ] += 1 65 | 66 | return bbox_heatmap 67 | 68 | 69 | def get_bbox_per_img_dict(annotation_df: pd.DataFrame) -> Dict: 70 | """ 71 | Args: 72 | annotation_df (pd.DataFrame): dataframe with each annotation 73 | in each row, (columns include: iscrowd (bool), image_id (image id)) 74 | Returns: 75 | Dict: Dictionary of number of bbox per image where key is the number 76 | of bbox and val is the probability of that number of bbox images in 77 | the dataset. 78 | """ 79 | annotated_persons_df = annotation_df[(annotation_df["iscrowd"] == 0)] 80 | 81 | persons_in_img_df = pd.DataFrame( 82 | {"cnt": annotated_persons_df[["image_id"]].value_counts()} 83 | ) 84 | persons_in_img_df.reset_index(level=[0], inplace=True) 85 | 86 | # group by counter so we will get the dataframe with number of 87 | # annotated people in a single image 88 | 89 | persons_in_img_cnt_df = persons_in_img_df.groupby(["cnt"]).count() 90 | 91 | # extract arrays 92 | x_occurences = persons_in_img_cnt_df.index.values 93 | y_images = persons_in_img_cnt_df["image_id"].values 94 | total_images = sum(y_images) 95 | 96 | bbox_num_dict = {} 97 | for key, value in zip(x_occurences, y_images): 98 | bbox_num_dict[key] = value / total_images 99 | return bbox_num_dict 100 | -------------------------------------------------------------------------------- /datasetinsights/stats/statistics.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import datasetinsights.constants as const 4 | from datasetinsights.datasets.unity_perception import MetricDefinitions, Metrics 5 | from datasetinsights.datasets.unity_perception.tables import SCHEMA_VERSION 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class RenderedObjectInfo: 11 | """Rendered Object Info in Captures 12 | 13 | This metric stores common object info captured by a sensor in the simulation 14 | environment. It can be used to calculate object statistics such as 15 | object count, object rotation and visible pixels. 16 | 17 | Attributes: 18 | raw_table (pd.DataFrame): rendered object info stored with a tidy 19 | pandas dataframe. Columns "label_id", "instance_id", "visible_pixels", 20 | "capture_id, "label_name". 21 | 22 | Examples: 23 | 24 | .. code-block:: python 25 | 26 | >>> # set the data root path to where data was stored 27 | >>> data_root = "$HOME/data" 28 | >>> # use rendered object info definition id 29 | >>> definition_id = "659c6e36-f9f8-4dd6-9651-4a80e51eabc4" 30 | >>> roinfo = RenderedObjectInfo(data_root, definition_id) 31 | #total object count per label dataframe 32 | >>> roinfo.total_counts() 33 | label_id label_name count 34 | 1 object1 10 35 | 2 object2 21 36 | #object count per capture dataframe 37 | >>> roinfo.per_capture_counts() 38 | capture_id count 39 | qwerty 10 40 | asdfgh 21 41 | """ 42 | 43 | LABEL = "label_id" 44 | LABEL_READABLE = "label_name" 45 | INDEX_COLUMN = "capture_id" 46 | VALUE_COLUMN = "values" 47 | COUNT_COLUMN = "count" 48 | 49 | def __init__( 50 | self, 51 | data_root=const.DEFAULT_DATA_ROOT, 52 | version=SCHEMA_VERSION, 53 | def_id=None, 54 | ): 55 | """Initialize RenderedObjectInfo 56 | 57 | Args: 58 | data_root (str): root directory where the dataset was stored 59 | version (str): synthetic dataset schema version 60 | def_id (str): rendered object info definition id 61 | """ 62 | filtered_metrics = Metrics(data_root, version).filter_metrics(def_id) 63 | label_mappings = self._read_label_mappings(data_root, version, def_id) 64 | self.raw_table = self._read_filtered_metrics( 65 | filtered_metrics, label_mappings 66 | ) 67 | 68 | def num_captures(self): 69 | """Total number of captures 70 | 71 | Returns: 72 | integer: Total number of captures 73 | """ 74 | return self.raw_table[self.INDEX_COLUMN].nunique() 75 | 76 | @staticmethod 77 | def _read_label_mappings(data_root, version, def_id): 78 | """Read label_mappings from a metric_definition record. 79 | 80 | Args: 81 | data_root (str): root directory where the dataset was stored 82 | version (str): synthetic dataset schema version 83 | def_id (str): rendered object info definition id 84 | 85 | Returns: 86 | dict: The mappings of {label_id: label_name} 87 | """ 88 | definition = MetricDefinitions(data_root, version).get_definition( 89 | def_id 90 | ) 91 | name = RenderedObjectInfo.LABEL 92 | readable_name = RenderedObjectInfo.LABEL_READABLE 93 | 94 | return {d[name]: d[readable_name] for d in definition["spec"]} 95 | 96 | @staticmethod 97 | def _read_filtered_metrics(filtered_metrics, label_mappings): 98 | """Read label_mappings from a metric_definition record. 99 | 100 | Args: 101 | filtered_metrics (pd.DataFrame): A pandas dataframe for metrics 102 | filtered by definition id. 103 | label_mappings (dict): the mappings of {label_id: label_name} 104 | 105 | Returns: 106 | pd.DataFrame: rendered object info stored with a tidy 107 | pandas dataframe. Columns "label_id", "instance_id", 108 | "visible_pixels", "capture_id, "label_name". 109 | """ 110 | filtered_metrics[RenderedObjectInfo.LABEL_READABLE] = filtered_metrics[ 111 | RenderedObjectInfo.LABEL 112 | ].map(label_mappings) 113 | # Remove metrics data not defined in label_mappings 114 | filtered_metrics.dropna( 115 | subset=[RenderedObjectInfo.LABEL_READABLE], inplace=True 116 | ) 117 | 118 | return filtered_metrics 119 | 120 | def total_counts(self): 121 | """Aggregate Total Object Counts Per Label 122 | 123 | Returns: 124 | pd.DataFrame: Total object counts table. 125 | Columns "label_id", "label_name", "count" 126 | """ 127 | agg = ( 128 | self.raw_table.groupby([self.LABEL, self.LABEL_READABLE]) 129 | .size() 130 | .to_frame(name=self.COUNT_COLUMN) 131 | .reset_index() 132 | ) 133 | 134 | return agg 135 | 136 | def per_capture_counts(self): 137 | """Aggregate Object Counts Per Label 138 | 139 | Returns: 140 | pd.DataFrame: Total object counts table. 141 | Columns "capture_id", "count" 142 | """ 143 | agg = ( 144 | self.raw_table.groupby(self.INDEX_COLUMN) 145 | .size() 146 | .to_frame(name=self.COUNT_COLUMN) 147 | .reset_index() 148 | ) 149 | 150 | return agg 151 | -------------------------------------------------------------------------------- /datasetinsights/stats/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .plots import grid_plot, plot_bboxes 2 | 3 | __all__ = ["plot_bboxes", "grid_plot"] 4 | -------------------------------------------------------------------------------- /datasetinsights/stats/visualization/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import dash 4 | 5 | 6 | def _init_app(): 7 | """Intializes the dash app.""" 8 | 9 | this_dir = os.path.dirname(os.path.abspath(__file__)) 10 | css_file = os.path.join(this_dir, "stylesheet.css") 11 | app = dash.Dash( 12 | __name__, 13 | external_stylesheets=[css_file], 14 | suppress_callback_exceptions=True, 15 | ) 16 | return app 17 | 18 | 19 | _app = _init_app() 20 | 21 | 22 | def get_app(): 23 | return _app 24 | -------------------------------------------------------------------------------- /datasetinsights/stats/visualization/bbox2d_plot.py: -------------------------------------------------------------------------------- 1 | """ Use a bounding box library to plot pretty bounding boxes 2 | with a simple Python API. This library helps to display pretty bounding boxes 3 | with a chosen set of colors. 4 | Reference: https://github.com/nalepae/bounding-box 5 | """ 6 | import os as _os 7 | import pathlib 8 | import random 9 | from hashlib import md5 as _md5 10 | 11 | import cv2 as _cv2 12 | import numpy as _np 13 | from PIL import ImageFont 14 | 15 | FONT_PATH = _os.path.join( 16 | pathlib.Path(__file__).parent.absolute(), "font", "DroidSansFallback.ttf" 17 | ) 18 | _COLOR_NAME_TO_RGB = dict( 19 | navy=((0, 38, 63), (119, 193, 250)), 20 | blue=((0, 120, 210), (173, 220, 252)), 21 | aqua=((115, 221, 252), (0, 76, 100)), 22 | teal=((15, 205, 202), (0, 0, 0)), 23 | olive=((52, 153, 114), (25, 58, 45)), 24 | green=((0, 204, 84), (15, 64, 31)), 25 | lime=((1, 255, 127), (0, 102, 53)), 26 | yellow=((255, 216, 70), (103, 87, 28)), 27 | orange=((255, 125, 57), (104, 48, 19)), 28 | red=((255, 47, 65), (131, 0, 17)), 29 | maroon=((135, 13, 75), (239, 117, 173)), 30 | fuchsia=((246, 0, 184), (103, 0, 78)), 31 | purple=((179, 17, 193), (241, 167, 244)), 32 | gray=((168, 168, 168), (0, 0, 0)), 33 | silver=((220, 220, 220), (0, 0, 0)), 34 | ) 35 | _COLOR_NAMES = list(_COLOR_NAME_TO_RGB) 36 | _DEFAULT_COLOR_NAME = "green" 37 | 38 | 39 | def add_single_bbox_on_image( 40 | image, bbox, label, color, font_size=100, box_line_width=15 41 | ): 42 | """Add single bounding box with label on a given image. 43 | 44 | Args: 45 | image (numpy array): a numpy array for an image. 46 | bbox (BBox2D): a canonical bounding box. 47 | color (str): a color name for one bounding box. 48 | If color = None, it will randomly assign a color for each box. 49 | font_size (int): font size for each label. Defaults to 100. 50 | box_line_width (int): line width of the bounding boxes. Defaults to 15. 51 | """ 52 | left, top = (bbox.x, bbox.y) 53 | right, bottom = (bbox.x + bbox.w, bbox.y + bbox.h) 54 | 55 | _add_single_bbox_on_image( 56 | image, 57 | left, 58 | top, 59 | right, 60 | bottom, 61 | label=label, 62 | color=color, 63 | font_size=font_size, 64 | box_line_width=box_line_width, 65 | ) 66 | 67 | 68 | def _rgb_to_bgr(color): 69 | return list(reversed(color)) 70 | 71 | 72 | def _color_image(image, font_color, background_color): 73 | return background_color + (font_color - background_color) * image / 255 74 | 75 | 76 | def _get_label_image( 77 | text, font_color_tuple_bgr, background_color_tuple_bgr, font_size=100 78 | ): 79 | """Add text and background color for one label. 80 | 81 | Args: 82 | text (str): label name. 83 | font_color_tuple_bgr (tuple): font RGB color. 84 | background_color_tuple_bgr (tuple): background RGB color. 85 | font_size (int): font size for the label text. 86 | 87 | Returns: 88 | numpy array: a numpy array for a rendered label. 89 | """ 90 | _FONT = ImageFont.truetype(FONT_PATH, font_size) 91 | text_image = _FONT.getmask(text) 92 | shape = list(reversed(text_image.size)) 93 | bw_image = _np.array(text_image).reshape(shape) 94 | 95 | image = [ 96 | _color_image(bw_image, font_color, background_color)[None, ...] 97 | for font_color, background_color in zip( 98 | font_color_tuple_bgr, background_color_tuple_bgr 99 | ) 100 | ] 101 | 102 | return _np.concatenate(image).transpose(1, 2, 0) 103 | 104 | 105 | def _add_single_bbox_on_image( 106 | image, 107 | left, 108 | top, 109 | right, 110 | bottom, 111 | label=None, 112 | color=None, 113 | font_size=100, 114 | box_line_width=15, 115 | ): 116 | """Add single bounding box with label on a given image. 117 | 118 | Add single bounding box and a label text with label on a given image. If the 119 | label text exceeds the original image border, it would be cropped. 120 | """ 121 | try: 122 | left, top, right, bottom = int(left), int(top), int(right), int(bottom) 123 | except ValueError: 124 | raise TypeError("'left', 'top', 'right' & 'bottom' must be a number") 125 | 126 | if label and not color: 127 | hex_digest = _md5(label.encode()).hexdigest() 128 | color_index = int(hex_digest, 16) % len(_COLOR_NAME_TO_RGB) 129 | color = _COLOR_NAMES[color_index] 130 | elif not label: 131 | color = random.choice(_COLOR_NAMES) 132 | 133 | colors = [list(item) for item in _COLOR_NAME_TO_RGB[color]] 134 | color, color_text = colors 135 | 136 | _cv2.rectangle(image, (left, top), (right, bottom), color, box_line_width) 137 | 138 | if label: 139 | label_image = _get_label_image(label, color_text, color, font_size) 140 | _add_label_on_image(label_image, image, left, top, color) 141 | 142 | 143 | def _add_label_on_image(label_image, image, left, top, color): 144 | """Add a label on a bounding box. 145 | 146 | Add a label on a bounding box. Crop the label image if it cross the image 147 | border. 148 | """ 149 | image_height, image_width, _ = image.shape 150 | label_height, label_width, _ = label_image.shape 151 | rectangle_height, rectangle_width = 1 + label_height, 1 + label_width 152 | 153 | rectangle_bottom = top 154 | rectangle_left = max(0, min(left - 1, image_width - rectangle_width)) 155 | 156 | rectangle_top = rectangle_bottom - rectangle_height 157 | rectangle_right = rectangle_left + rectangle_width 158 | 159 | label_top = rectangle_top + 1 160 | 161 | if rectangle_top < 0: 162 | rectangle_top = top 163 | rectangle_bottom = rectangle_top + label_height + 1 164 | 165 | label_top = rectangle_top 166 | 167 | label_left = rectangle_left + 1 168 | label_bottom = label_top + label_height 169 | label_right = label_left + label_width 170 | 171 | rec_left_top = (rectangle_left, rectangle_top) 172 | rec_right_bottom = (rectangle_right, rectangle_bottom) 173 | 174 | _cv2.rectangle(image, rec_left_top, rec_right_bottom, color, -1) 175 | _fix_label_at_image_edge( 176 | label_image, label_left, label_top, label_right, label_bottom, image 177 | ) 178 | 179 | 180 | def _fix_label_at_image_edge( 181 | label_image, label_left, label_top, label_right, label_bottom, image 182 | ): 183 | """Fix the label at image edge. 184 | 185 | Crop the label image if it cross the image border. 186 | """ 187 | image_height, image_width, _ = image.shape 188 | label_height, label_width, _ = label_image.shape 189 | label_top = max(0, label_top) 190 | label_bottom = min(image_height, label_bottom) 191 | label_left = max(0, label_left) 192 | label_right = min(image_width, label_right) 193 | label_actual_width = label_right - label_left 194 | label_actual_height = label_bottom - label_top 195 | label_actual_size = label_actual_width * label_actual_height 196 | if label_actual_size < label_height * label_width: 197 | image[label_top:label_bottom, label_left:label_right, :] = label_image[ 198 | : (label_bottom - label_top), : (label_right - label_left), : 199 | ] 200 | else: 201 | image[label_top:label_bottom, label_left:label_right, :] = label_image 202 | -------------------------------------------------------------------------------- /datasetinsights/stats/visualization/constants.py: -------------------------------------------------------------------------------- 1 | MAX_SAMPLES = 10000 2 | RENDERED_OBJECT_INFO_DEFINITION_ID = "659c6e36-f9f8-4dd6-9651-4a80e51eabc4" 3 | USER_PARAMETERS_DEFINITION_ID = "3f06bcec-1f23-4387-a1fd-5af54ee29c16" 4 | FOREGROUND_PLACEMENT_INFO_DEFINITION_ID = "061e08cc-4428-4926-9933-a6732524b52b" 5 | LIGHTING_INFO_DEFINITION_ID = "939248ee-668a-4e98-8e79-e7909f034a47" 6 | BOUNDING_BOX_2D_DEFINITION_ID = "c31620e3-55ff-4af6-ae86-884aa0daa9b2" 7 | -------------------------------------------------------------------------------- /datasetinsights/stats/visualization/font/DroidSansFallback.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/datasetinsights/stats/visualization/font/DroidSansFallback.ttf -------------------------------------------------------------------------------- /datasetinsights/stats/visualization/keypoints_plot.py: -------------------------------------------------------------------------------- 1 | """ Helper keypoints library to plot keypoint joints and skeletons with a 2 | simple Python API. 3 | """ 4 | 5 | 6 | def _get_color_from_color_node(color): 7 | """Gets the color from the color node in the template. 8 | 9 | Args: 10 | color (tuple): The color's channel values expressed in a range from 0..1 11 | 12 | Returns: The color for the node. 13 | 14 | """ 15 | r = int(color["r"] * 255) 16 | g = int(color["g"] * 255) 17 | b = int(color["b"] * 255) 18 | a = int(color["a"] * 255) 19 | return r, g, b, a 20 | 21 | 22 | def _get_color_for_bone(bone): 23 | """Gets the color for the bone from the template. A bone is a visual 24 | connection between two keypoints in the keypoint list of the figure. 25 | 26 | bone 27 | { 28 | joint1: Index into the keypoint list for the first joint. 29 | joint2: Index into the keypoint list for the second joint. 30 | color { 31 | r: Value (0..1) of the red channel. 32 | g: Value (0..1) of the green channel. 33 | b: Value (0..1) of the blue channel. 34 | a: Value (0..1) of the alpha channel. 35 | } 36 | } 37 | 38 | Args: 39 | bone: The active bone. 40 | 41 | Returns: The color of the bone. 42 | 43 | """ 44 | if "color" in bone: 45 | return _get_color_from_color_node(bone["color"]) 46 | else: 47 | return 255, 0, 255, 255 48 | 49 | 50 | def _get_color_for_keypoint(template, keypoint): 51 | """Gets the color for the keypoint from the template. A keypoint is a 52 | location of interest inside of a figure. Keypoints are connected 53 | together with bones. The configuration of keypoint locations and bone 54 | connections are defined in a template file. 55 | 56 | keypoint_template { 57 | template_id: The UUID of the template. 58 | template_name: Human readable name of the template. 59 | key_points [ List of joints defined in this template 60 | { 61 | label: The label of the joint. 62 | index: The index of the joint. 63 | color { 64 | r: Value (0..1) for the red channel. 65 | g: Value (0..1) for the green channel. 66 | b: Value (0..1) for the blue channel. 67 | a: Value (0..1) for the alpha channel. 68 | } 69 | }, ... 70 | ] 71 | skeleton [ List of skeletal connections 72 | { 73 | joint1: The first joint of the connection. 74 | joint2: The second joint of the connection. 75 | color { 76 | r: Value (0..1) for the red channel. 77 | g: Value (0..1) for the green channel. 78 | b: Value (0..1) for the blue channel. 79 | a: Value (0..1) for the alpha channel. 80 | } 81 | }, ... 82 | ] 83 | } 84 | 85 | Args: 86 | template: The active template. 87 | keypoint: The active keypoint. 88 | 89 | Returns: The color for the keypoint. 90 | 91 | """ 92 | node = template["key_points"][keypoint["index"]] 93 | 94 | if "color" in node: 95 | return _get_color_from_color_node(node["color"]) 96 | else: 97 | return 0, 0, 255, 255 98 | 99 | 100 | def draw_keypoints_for_figure(image, figure, draw, templates, visual_width=6): 101 | """Draws keypoints for a figure on an image. 102 | 103 | keypoints { 104 | label_id: Integer identifier of the label. 105 | instance_id: UUID of the instance. 106 | template_guid: UUID of the keypoint template. 107 | pose: String label for current pose. 108 | keypoints [ 109 | { 110 | index: Index of keypoint in template. 111 | x: X subpixel coordinate of keypoint. 112 | y: Y subpixel coordinate of keypoint 113 | state: 0: keypoint does not exist, 114 | 1: keypoint exists but is not visible, 115 | 2: keypoint exists and is visible. 116 | }, ... 117 | ] 118 | } 119 | 120 | Args: 121 | image (PIL Image): a PIL image. 122 | figure: The figure to draw. 123 | draw (PIL ImageDraw): PIL image draw interface. 124 | templates (list): a list of keypoint templates. 125 | visual_width (int): the visual width of the joints. 126 | 127 | Returns: a PIL image with keypoints for a figure drawn on it. 128 | 129 | """ 130 | # find the template for this 131 | for template in templates: 132 | if template["template_id"] == figure["template_guid"]: 133 | break 134 | else: 135 | return image 136 | 137 | # load the spec 138 | skeleton = template["skeleton"] 139 | 140 | for bone in skeleton: 141 | j1 = figure["keypoints"][bone["joint1"]] 142 | j2 = figure["keypoints"][bone["joint2"]] 143 | 144 | if j1["state"] == 2 and j2["state"] == 2: 145 | x1 = int(j1["x"]) 146 | y1 = int(j1["y"]) 147 | x2 = int(j2["x"]) 148 | y2 = int(j2["y"]) 149 | 150 | color = _get_color_for_bone(bone) 151 | draw.line((x1, y1, x2, y2), fill=color, width=visual_width) 152 | 153 | for k in figure["keypoints"]: 154 | state = k["state"] 155 | if state == 2: 156 | x = k["x"] 157 | y = k["y"] 158 | 159 | color = _get_color_for_keypoint(template, k) 160 | 161 | half_width = visual_width / 2 162 | 163 | draw.ellipse( 164 | ( 165 | x - half_width, 166 | y - half_width, 167 | x + half_width, 168 | y + half_width, 169 | ), 170 | fill=color, 171 | outline=color, 172 | ) 173 | 174 | return image 175 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | 3 | BUILDDIR = build 4 | SOURCEDIR = source 5 | TEMPLATEDIR = $(SOURCEDIR)/_templates 6 | 7 | .PHONY: help clean html 8 | 9 | help: 10 | @echo "Please use \`make ' where is one of" 11 | @echo " html to make standalone HTML files" 12 | 13 | clean: 14 | rm -rf $(BUILDDIR) 15 | 16 | apidoc: 17 | sphinx-apidoc --templatedir=$(TEMPLATEDIR) -o $(SOURCEDIR) -d 2 ../datasetinsights/ \ 18 | ../datasetinsights/commands \ 19 | ../datasetinsights/dashboard.py \ 20 | ../datasetinsights/constants.py 21 | 22 | html: 23 | sphinx-build -b html $(SOURCEDIR) $(BUILDDIR)/html 24 | @echo 25 | @echo "Build finished. The HTML doumentation pages are in $(BUILDDIR)/html." 26 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Building documentation 2 | ====================== 3 | 4 | Run the following commands from `docs` directory. 5 | 6 | Automatic generate of Sphinx sources using [sphinx-apidoc](https://www.sphinx-doc.org/en/master/man/sphinx-apidoc.html) 7 | 8 | ```bash 9 | make apidoc 10 | ``` 11 | 12 | This command only applies to newly created modules. It will not update modules that already exist. You will have to modify `docs/datasetinsighs.module_name` manually. 13 | 14 | To build html files, run 15 | 16 | ```bash 17 | make html 18 | ``` 19 | 20 | You can browse the documentation by opening `build/html/index.html` file directly in any web browser. 21 | 22 | Cleanup build html files 23 | 24 | ```bash 25 | make clean 26 | ``` 27 | 28 | Known issues 29 | ------------ 30 | 31 | 1. Some of the documents are written in markdown format. We use [recommonmark](https://github.com/readthedocs/recommonmark) to generate documentations. It uses [CommonMark](http://commonmark.org/) to convert markdown files to rst files. Due to it's limitation, links to headers cannot have `_` or `.`. If the header has either of those characters, they should be replaced by dashes `-`. e.g. if you have a header `#### annotation_definitions.json` in the markdown file, to link to that header the markdown needs to be `[click link](#annotation-definitions-json)` 32 | 33 | 2. `Readthedocs.org` does not currently support [poetry](https://python-poetry.org/) officially. Until then, we have to manually generated a `docs/requirements.txt` file when new requirements is added to the repo. This file can be generated using command: 34 | 35 | ```bash 36 | poetry export --dev --without-hashes -f requirements.txt > docs/requirements.txt 37 | ``` 38 | -------------------------------------------------------------------------------- /docs/source/_images/captures_steps_timestamps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/captures_steps_timestamps.png -------------------------------------------------------------------------------- /docs/source/_images/image_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_0.png -------------------------------------------------------------------------------- /docs/source/_images/image_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_2.png -------------------------------------------------------------------------------- /docs/source/_images/image_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_3.png -------------------------------------------------------------------------------- /docs/source/_images/image_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/image_4.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/evaluate_pipeline_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/evaluate_pipeline_graph.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/evaluate_the_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/evaluate_the_model.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/notebook.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/notebook_docker_cpu_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/notebook_docker_cpu_memory.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/notebook_gpu_volume.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/notebook_gpu_volume.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/train_on_real_world_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_real_world_dataset.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/train_on_synthdet_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_synthdet_sample.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/train_on_synthetic_and_real_world_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_synthetic_and_real_world_dataset.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/train_on_synthetic_dataset_unity_simulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_on_synthetic_dataset_unity_simulation.png -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/train_pipeline_graph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/train_pipeline_graph.jpg -------------------------------------------------------------------------------- /docs/source/_images/kubeflow/upload_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/kubeflow/upload_pipeline.png -------------------------------------------------------------------------------- /docs/source/_images/synthetic_data_pipeline_dataset_evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/docs/source/_images/synthetic_data_pipeline_dataset_evaluation.png -------------------------------------------------------------------------------- /docs/source/_templates/module.rst_t: -------------------------------------------------------------------------------- 1 | {%- if show_headings %} 2 | {{- basename | e | heading }} 3 | 4 | {% endif -%} 5 | .. automodule:: {{ qualname }} 6 | {%- for option in automodule_options %} 7 | :{{ option }}: 8 | {%- endfor %} 9 | -------------------------------------------------------------------------------- /docs/source/_templates/package.rst_t: -------------------------------------------------------------------------------- 1 | {%- macro automodule(modname, options) -%} 2 | .. automodule:: {{ modname }} 3 | {%- for option in options %} 4 | :{{ option }}: 5 | {%- endfor %} 6 | {%- endmacro %} 7 | 8 | {%- macro toctree(docnames) -%} 9 | .. toctree:: 10 | :maxdepth: {{ maxdepth }} 11 | {% for docname in docnames %} 12 | {{ docname }} 13 | {%- endfor %} 14 | {%- endmacro %} 15 | 16 | {%- if is_namespace %} 17 | {{- pkgname | e | heading }} 18 | {% else %} 19 | {{- pkgname | e | heading }} 20 | {% endif %} 21 | 22 | {%- if modulefirst and not is_namespace %} 23 | {{ automodule(pkgname, automodule_options) }} 24 | {% endif %} 25 | 26 | {%- if subpackages %} 27 | 28 | {{ toctree(subpackages) }} 29 | {% endif %} 30 | 31 | {%- if submodules %} 32 | {% if separatemodules %} 33 | {{ toctree(submodules) }} 34 | {%- else %} 35 | {%- for submodule in submodules %} 36 | {% if show_headings %} 37 | {{- submodule | e | heading(2) }} 38 | {% endif %} 39 | {{ automodule(submodule, automodule_options) }} 40 | {% endfor %} 41 | {%- endif %} 42 | {% endif %} 43 | 44 | {%- if not modulefirst and not is_namespace %} 45 | 46 | {{ automodule(pkgname, automodule_options) }} 47 | {% endif %} 48 | -------------------------------------------------------------------------------- /docs/source/_templates/toc.rst_t: -------------------------------------------------------------------------------- 1 | {{ header | heading }} 2 | 3 | .. toctree:: 4 | :maxdepth: {{ maxdepth }} 5 | {% for docname in docnames %} 6 | {{ docname }} 7 | {%- endfor %} 8 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | import os 7 | import sys 8 | 9 | import pkg_resources 10 | 11 | sys.path.insert(0, os.path.abspath("../..")) 12 | 13 | 14 | # -- Project information ----------------------------------------------------- 15 | 16 | project = "datasetinsights" 17 | copyright = "2020, Unity Technologies" 18 | author = "Unity Technologies" 19 | 20 | # The full version, including alpha/beta/rc tags 21 | release = pkg_resources.get_distribution(project).version 22 | napoleon_google_docstring = True 23 | 24 | # -- General configuration --------------------------------------------------- 25 | 26 | master_doc = "index" 27 | 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | "recommonmark", 34 | "sphinx.ext.autosectionlabel", 35 | "sphinx_rtd_theme", 36 | "sphinx.ext.napoleon", 37 | "sphinx_click", 38 | ] 39 | 40 | source_suffix = { 41 | ".rst": "restructuredtext", 42 | ".txt": "markdown", 43 | ".md": "markdown", 44 | } 45 | 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ["_templates"] 49 | 50 | # List of patterns, relative to source directory, that match files and 51 | # directories to ignore when looking for source files. 52 | # This pattern also affects html_static_path and html_extra_path. 53 | exclude_patterns = [] 54 | 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | 58 | # The theme to use for HTML and HTML Help pages. See the documentation for 59 | # a list of builtin themes. 60 | # 61 | html_theme = "sphinx_rtd_theme" 62 | 63 | # Add any paths that contain custom static files (such as style sheets) here, 64 | # relative to this directory. They are copied after the builtin static files, 65 | # so a file named "default.css" will overwrite the builtin "default.css". 66 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.datasets.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.datasets 2 | ======================== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | datasetinsights.datasets.unity_perception 9 | 10 | 11 | datasetinsights.datasets.exceptions 12 | ----------------------------------- 13 | 14 | .. automodule:: datasetinsights.datasets.exceptions 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | datasetinsights.datasets.synthetic 20 | ---------------------------------- 21 | 22 | .. automodule:: datasetinsights.datasets.synthetic 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | 28 | 29 | .. automodule:: datasetinsights.datasets 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.datasets.transformers.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.datasets.transformers 2 | ===================================== 3 | 4 | 5 | datasetinsights.datasets.transformers.coco 6 | ------------------------------------------ 7 | 8 | .. automodule:: datasetinsights.datasets.transformers.coco 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | 14 | 15 | .. automodule:: datasetinsights.datasets.transformers 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.datasets.unity_perception.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.datasets.unity\_perception 2 | ========================================== 3 | 4 | 5 | datasetinsights.datasets.unity\_perception.captures 6 | --------------------------------------------------- 7 | 8 | .. automodule:: datasetinsights.datasets.unity_perception.captures 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | datasetinsights.datasets.unity\_perception.exceptions 14 | ----------------------------------------------------- 15 | 16 | .. automodule:: datasetinsights.datasets.unity_perception.exceptions 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | datasetinsights.datasets.unity\_perception.metrics 22 | -------------------------------------------------- 23 | 24 | .. automodule:: datasetinsights.datasets.unity_perception.metrics 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | datasetinsights.datasets.unity\_perception.references 30 | ----------------------------------------------------- 31 | 32 | .. automodule:: datasetinsights.datasets.unity_perception.references 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | 37 | datasetinsights.datasets.unity\_perception.tables 38 | ------------------------------------------------- 39 | 40 | .. automodule:: datasetinsights.datasets.unity_perception.tables 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | datasetinsights.datasets.unity\_perception.validation 46 | ----------------------------------------------------- 47 | 48 | .. automodule:: datasetinsights.datasets.unity_perception.validation 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | 54 | 55 | .. automodule:: datasetinsights.datasets.unity_perception 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.io.downloader.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.io.downloader 2 | ============================= 3 | 4 | 5 | datasetinsights.io.downloader.base 6 | ---------------------------------- 7 | 8 | .. automodule:: datasetinsights.io.downloader.base 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | datasetinsights.io.downloader.gcs\_downloader 14 | --------------------------------------------- 15 | 16 | .. automodule:: datasetinsights.io.downloader.gcs_downloader 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | datasetinsights.io.downloader.http\_downloader 22 | ---------------------------------------------- 23 | 24 | .. automodule:: datasetinsights.io.downloader.http_downloader 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | 31 | .. automodule:: datasetinsights.io.downloader 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.io.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.io 2 | ================== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | datasetinsights.io.downloader 9 | 10 | 11 | datasetinsights.io.bbox 12 | ----------------------- 13 | 14 | .. automodule:: datasetinsights.io.bbox 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | datasetinsights.io.download 20 | --------------------------- 21 | 22 | .. automodule:: datasetinsights.io.download 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | datasetinsights.io.exceptions 28 | ----------------------------- 29 | 30 | .. automodule:: datasetinsights.io.exceptions 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | datasetinsights.io.gcs 36 | ---------------------- 37 | 38 | .. automodule:: datasetinsights.io.gcs 39 | :members: 40 | :undoc-members: 41 | :show-inheritance: 42 | 43 | 44 | 45 | .. automodule:: datasetinsights.io 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.rst: -------------------------------------------------------------------------------- 1 | datasetinsights 2 | =============== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | datasetinsights.datasets 9 | datasetinsights.io 10 | datasetinsights.stats 11 | 12 | 13 | .. automodule:: datasetinsights 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.stats.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.stats 2 | ===================== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | datasetinsights.stats.visualization 9 | 10 | 11 | datasetinsights.stats.statistics 12 | -------------------------------- 13 | 14 | .. automodule:: datasetinsights.stats.statistics 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | 20 | 21 | .. automodule:: datasetinsights.stats 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | -------------------------------------------------------------------------------- /docs/source/datasetinsights.stats.visualization.rst: -------------------------------------------------------------------------------- 1 | datasetinsights.stats.visualization 2 | =================================== 3 | 4 | 5 | datasetinsights.stats.visualization.app 6 | --------------------------------------- 7 | 8 | .. automodule:: datasetinsights.stats.visualization.app 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | datasetinsights.stats.visualization.bbox2d\_plot 14 | ------------------------------------------------ 15 | 16 | .. automodule:: datasetinsights.stats.visualization.bbox2d_plot 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | datasetinsights.stats.visualization.bbox3d\_plot 22 | ------------------------------------------------ 23 | 24 | .. automodule:: datasetinsights.stats.visualization.bbox3d_plot 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | datasetinsights.stats.visualization.constants 30 | --------------------------------------------- 31 | 32 | .. automodule:: datasetinsights.stats.visualization.constants 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | 37 | datasetinsights.stats.visualization.keypoints\_plot 38 | --------------------------------------------------- 39 | 40 | .. automodule:: datasetinsights.stats.visualization.keypoints_plot 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | datasetinsights.stats.visualization.object\_detection 46 | ----------------------------------------------------- 47 | 48 | .. automodule:: datasetinsights.stats.visualization.object_detection 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | datasetinsights.stats.visualization.overview 54 | -------------------------------------------- 55 | 56 | .. automodule:: datasetinsights.stats.visualization.overview 57 | :members: 58 | :undoc-members: 59 | :show-inheritance: 60 | 61 | datasetinsights.stats.visualization.plots 62 | ----------------------------------------- 63 | 64 | .. automodule:: datasetinsights.stats.visualization.plots 65 | :members: 66 | :undoc-members: 67 | :show-inheritance: 68 | 69 | 70 | 71 | .. automodule:: datasetinsights.stats.visualization 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Thea documentation master file, created by 2 | sphinx-quickstart on Mon Apr 27 17:25:16 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Dataset Insights 7 | ================ 8 | 9 | Unity Dataset Insights is a python package for downloading, parsing and analyzing synthetic datasets generated using the Unity `Perception SDK `_. 10 | 11 | Installation 12 | ------------ 13 | 14 | Dataset Insights maintains a pip package for easy installation. It can work in any standard Python environment using :code:`pip install datasetinsights` command. We support Python 3 (3.7 and 3.8). 15 | 16 | Getting Started 17 | --------------- 18 | 19 | Dataset Statistics 20 | ~~~~~~~~~~~~~~~~~~ 21 | We provide a sample `notebook `_ to help you load synthetic datasets generated using `Perception package `_ and visualize dataset statistics. We plan to support other sample Unity projects in the future. 22 | 23 | Dataset Download 24 | ~~~~~~~~~~~~~~~~~~ 25 | 26 | You can download the datasets from HTTP(s), GCS, and Unity simulation projects using the download command from `CLI` or `API`. 27 | 28 | `CLI `_ 29 | 30 | .. code-block:: bash 31 | 32 | datasetinsights download \ 33 | --source-uri= \ 34 | --output=$HOME/data 35 | 36 | `API `_ 37 | 38 | 39 | GCSDatasetDownloader downloads a dataset from GCS location. 40 | 41 | .. code-block:: python3 42 | 43 | from datasetinsights.io.downloader import GCSDatasetDownloader 44 | 45 | source_uri=gs://url/to/file.zip or gs://url/to/folder 46 | dest = "~/data" 47 | downloader = GCSDatasetDownloader() 48 | downloader.download(source_uri=source_uri, output=data_root) 49 | 50 | HTTPDatasetDownloader downloads a dataset from any HTTP(S) location. 51 | 52 | .. code-block:: python3 53 | 54 | from datasetinsights.io.downloader import HTTPDatasetDownloader 55 | 56 | source_uri=http://url.to.file.zip 57 | dest = "~/data" 58 | downloader = HTTPDatasetDownloader() 59 | downloader.download(source_uri=source_uri, output=data_root) 60 | 61 | Dataset Explore 62 | ~~~~~~~~~~~~~~~~~~ 63 | 64 | You can explore the dataset `schema `_ by using following API: 65 | 66 | `Unity Perception `_ 67 | 68 | AnnotationDefinitions and MetricDefinitions loads synthetic dataset definition tables and return a dictionary containing the definitions. 69 | 70 | .. code-block:: python3 71 | 72 | from datasetinsights.datasets.unity_perception import AnnotationDefinitions, 73 | MetricDefinitions 74 | annotation_def = AnnotationDefinitions(data_root=dest, version="my_schema_version") 75 | definition_dict = annotation_def.get_definition(def_id="my_definition_id") 76 | 77 | metric_def = MetricDefinitions(data_root=dest, version="my_schema_version") 78 | definition_dict = metric_def.get_definition(def_id="my_definition_id") 79 | 80 | Captures loads synthetic dataset captures tables and return a pandas dataframe with captures and annotations columns. 81 | 82 | .. code-block:: python3 83 | 84 | from datasetinsights.datasets.unity_perception import Captures 85 | captures = Captures(data_root=dest, version="my_schema_version") 86 | captures_df = captures.filter(def_id="my_definition_id") 87 | 88 | Metrics loads synthetic dataset metrics table which holds extra metadata that can be used to describe a particular sequence, capture or annotation and return a pandas dataframe with captures and metrics columns. 89 | 90 | .. code-block:: python3 91 | 92 | from datasetinsights.datasets.unity_perception import Metrics 93 | metrics = Metrics(data_root=dest, version="my_schema_version") 94 | metrics_df = metrics.filter_metrics(def_id="my_definition_id") 95 | 96 | Contents 97 | ======== 98 | 99 | .. toctree:: 100 | :maxdepth: 3 101 | 102 | modules 103 | 104 | 105 | .. toctree:: 106 | :maxdepth: 1 107 | :hidden: 108 | :caption: Getting Started 109 | 110 | SynthDet Guide 111 | 112 | 113 | .. toctree:: 114 | :maxdepth: 1 115 | :hidden: 116 | :caption: Synthetic Dataset 117 | 118 | Synthetic_Dataset_Schema 119 | 120 | 121 | Indices and tables 122 | ================== 123 | 124 | * :ref:`genindex` 125 | * :ref:`modindex` 126 | * :ref:`search` 127 | 128 | Citation 129 | ================== 130 | If you find this package useful, consider citing it using: 131 | 132 | :: 133 | 134 | @misc{datasetinsights2020, 135 | title={Unity {D}ataset {I}nsights Package}, 136 | author={{Unity Technologies}}, 137 | howpublished={\url{https://github.com/Unity-Technologies/datasetinsights}}, 138 | year={2020} 139 | } 140 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | datasetinsights 2 | =============== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | datasetinsights 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "datasetinsights" 3 | version = "1.0.0" 4 | description = "Synthetic dataset insights." 5 | license = "Apache-2.0" 6 | authors = [ 7 | "Unity AI Perception Team " 8 | ] 9 | readme = "README.md" 10 | homepage = "https://github.com/Unity-Technologies/datasetinsights" 11 | repository = "https://github.com/Unity-Technologies/datasetinsights" 12 | documentation = "https://datasetinsights.readthedocs.io/en/latest/" 13 | classifiers = [ 14 | "Development Status :: 3 - Alpha", 15 | "Environment :: Console", 16 | "Framework :: Jupyter", 17 | "Operating System :: OS Independent", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 22 | "Topic :: Scientific/Engineering :: Visualization", 23 | "Topic :: Software Development :: Libraries :: Python Modules", 24 | "Topic :: Utilities" 25 | ] 26 | include = [ 27 | "LICENSE", 28 | ] 29 | 30 | 31 | [tool.poetry.dependencies] 32 | python = ">=3.8 <3.11" 33 | cython = "^0.29.14" 34 | google-cloud-storage = "^1.24.1" 35 | numpy = "^1.17" 36 | plotly = ">=5.0.0" 37 | pyquaternion = "^0.9.5" 38 | codetiming = "^1.2.0" 39 | pandas = "^1.0.1" 40 | tqdm = "^4.45.0" 41 | dask = {extras = ["complete"], version = "^2.14.0"} 42 | dash = "^2.3.1" 43 | click = "8.0.4" 44 | opencv-python = "^4.4.0.42" 45 | matplotlib = "^3.3.1" 46 | scipy = "^1.8.0" 47 | PyWavelets = "^1.3.0" 48 | pycocotools = "^2.0.4" 49 | seaborn = "^0.11.2" 50 | 51 | 52 | [tool.poetry.dev-dependencies] 53 | black = "22.3.0" 54 | flake8 = "^3.7.9" 55 | pytest = "^6.0.2" 56 | pytest-cov = "^2.8.1" 57 | responses = "^0.10.9" 58 | isort = "^4.3.21" 59 | sphinx-rtd-theme = "^0.5.0" 60 | recommonmark = "^0.6.0" 61 | sphinx-click = "^2.5.0" 62 | 63 | 64 | [tool.isort] 65 | multi_line_output = 3 66 | include_trailing_comma = true 67 | force_grid_wrap = 0 68 | use_parentheses = true 69 | line_length = 80 70 | 71 | [tool.black] 72 | line-length = 80 73 | target-version = ["py37"] 74 | include = '\.pyi?$' 75 | exclude = ''' 76 | 77 | ( 78 | /( 79 | \.eggs # exclude a few common directories in the 80 | | \.git # root of the project 81 | | \.hg 82 | | \.mypy_cache 83 | | \.tox 84 | | \.venv 85 | | _build 86 | | buck-out 87 | | build 88 | | dist 89 | | protos 90 | )/ 91 | ) 92 | ''' 93 | 94 | [tool.pytest.ini_options] 95 | addopts = "--cov=datasetinsights -rxXs --verbose" 96 | testpaths = [ 97 | "tests" 98 | ] 99 | 100 | [tool.poetry.scripts] 101 | datasetinsights = "datasetinsights.__main__:entrypoint" 102 | 103 | [build-system] 104 | requires = ["poetry>=1.0.5"] 105 | build-backend = "poetry.masonry.api" 106 | -------------------------------------------------------------------------------- /tests/datasets/test_coco_transformers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | from pathlib import Path 4 | 5 | from datasetinsights.datasets.transformers import ( 6 | COCOInstancesTransformer, 7 | COCOKeypointsTransformer, 8 | ) 9 | 10 | 11 | def assert_json_equals(file1, file2): 12 | with open(file1, "r") as f1: 13 | j1 = json.dumps(json.load(f1), sort_keys=True, indent=4) 14 | with open(file2, "r") as f2: 15 | j2 = json.dumps(json.load(f2), sort_keys=True, indent=4) 16 | 17 | assert j1 == j2 18 | 19 | 20 | def test_coco_instances_transformer(): 21 | parent_dir = Path(__file__).parent.parent.absolute() 22 | mock_data_dir = parent_dir / "mock_data" / "simrun" 23 | mock_coco_dir = parent_dir / "mock_data" / "coco" 24 | 25 | transformer = COCOInstancesTransformer(str(mock_data_dir)) 26 | 27 | with tempfile.TemporaryDirectory() as tmp_dir: 28 | transformer.execute(tmp_dir) 29 | output_file = Path(tmp_dir) / "annotations" / "instances.json" 30 | expected_file = mock_coco_dir / "annotations" / "instances.json" 31 | output_image_folder = Path(tmp_dir) / "images" 32 | 33 | assert output_file.exists() 34 | assert output_image_folder.exists() 35 | assert list(output_image_folder.glob("*")) 36 | assert_json_equals(expected_file, output_file) 37 | 38 | 39 | def test_coco_keypoints_transformer(): 40 | parent_dir = Path(__file__).parent.parent.absolute() 41 | mock_data_dir = parent_dir / "mock_data" / "simrun_keypoint_dataset" 42 | mock_coco_dir = parent_dir / "mock_data" / "coco" 43 | 44 | transformer = COCOKeypointsTransformer(str(mock_data_dir)) 45 | 46 | with tempfile.TemporaryDirectory() as tmp_dir: 47 | transformer.execute(tmp_dir) 48 | output_file = Path(tmp_dir) / "annotations" / "keypoints.json" 49 | expected_file = mock_coco_dir / "annotations" / "keypoints.json" 50 | output_image_folder = Path(tmp_dir) / "images" 51 | 52 | assert output_file.exists() 53 | assert output_image_folder.exists() 54 | assert list(output_image_folder.glob("*")) 55 | assert_json_equals(expected_file, output_file) 56 | -------------------------------------------------------------------------------- /tests/datasets/test_statistics.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from datasetinsights.stats.statistics import RenderedObjectInfo 4 | 5 | 6 | def test_read_filtered_metrics(): 7 | metrics = pd.DataFrame( 8 | { 9 | "capture_id": [ 10 | "", 11 | "1231", 12 | "1231", 13 | "1231", 14 | "2324", 15 | "323523", 16 | "323523", 17 | ], 18 | "label_id": [0, 1, 2, 3, 1, 2, 3], 19 | "label_name": ["", "car", "bike", "child", "car", "bike", "child"], 20 | "value": [0, 2, 3, 1, 1, 1, 4], 21 | } 22 | ) 23 | mappings = {1: "car", 2: "bike", 3: "child"} 24 | expected = pd.DataFrame( 25 | { 26 | "capture_id": ["1231", "1231", "1231", "2324", "323523", "323523"], 27 | "label_id": [1, 2, 3, 1, 2, 3], 28 | "label_name": ["car", "bike", "child", "car", "bike", "child"], 29 | "value": [2, 3, 1, 1, 1, 4], 30 | } 31 | ) 32 | 33 | agg = RenderedObjectInfo._read_filtered_metrics(metrics, mappings) 34 | agg = agg.reset_index(drop=True) 35 | pd.testing.assert_frame_equal(agg, expected, check_like=True) 36 | -------------------------------------------------------------------------------- /tests/datasets/test_synthetic.py: -------------------------------------------------------------------------------- 1 | from datasetinsights.datasets.synthetic import read_bounding_box_2d 2 | from datasetinsights.io.bbox import BBox2D 3 | 4 | 5 | def test_read_bounding_box_2d(): 6 | annotation = [ 7 | { 8 | "instance_id": "...", 9 | "label_id": 27, 10 | "label_name": "car", 11 | "x": 30, 12 | "y": 50, 13 | "width": 100, 14 | "height": 100, 15 | } 16 | ] 17 | definition = { 18 | "id": 1243, 19 | "name": "...", 20 | "description": "...", 21 | "format": "JSON", 22 | "spec": [{"label_id": 27, "label_name": "car"}], 23 | } 24 | label_mappings = { 25 | m["label_id"]: m["label_name"] for m in definition["spec"] 26 | } 27 | bbox = read_bounding_box_2d(annotation, label_mappings) 28 | 29 | assert bbox == [BBox2D(27, 30, 50, 100, 100)] 30 | -------------------------------------------------------------------------------- /tests/mock_data/calib000000.txt: -------------------------------------------------------------------------------- 1 | P0: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 0.000000000000e+00 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.797842000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 4.575831000000e+01 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 -3.454157000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 4.981016000000e-03 4 | P3: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.341081000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 2.330660000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 3.201153000000e-03 5 | R0_rect: 9.999128000000e-01 1.009263000000e-02 -8.511932000000e-03 -1.012729000000e-02 9.999406000000e-01 -4.037671000000e-03 8.470675000000e-03 4.123522000000e-03 9.999556000000e-01 6 | Tr_velo_to_cam: 6.927964000000e-03 -9.999722000000e-01 -2.757829000000e-03 -2.457729000000e-02 -1.162982000000e-03 2.749836000000e-03 -9.999955000000e-01 -6.127237000000e-02 9.999753000000e-01 6.931141000000e-03 -1.143899000000e-03 -3.321029000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | -------------------------------------------------------------------------------- /tests/mock_data/coco/annotations/instances.json: -------------------------------------------------------------------------------- 1 | { 2 | "info": { 3 | "description": "COCO compatible Synthetic Dataset" 4 | }, 5 | "licences": [ 6 | { 7 | "url": "", 8 | "id": 1, 9 | "name": "default" 10 | } 11 | ], 12 | "images": [ 13 | { 14 | "file_name": "camera_91891091516384550185081373185892902457.png", 15 | "height": 240, 16 | "width": 320, 17 | "id": 91891091516384550185081373185892902457 18 | } 19 | ], 20 | "annotations": [ 21 | { 22 | "segmentation": [], 23 | "area": 10000.0, 24 | "iscrowd": 0, 25 | "image_id": 91891091516384550185081373185892902457, 26 | "bbox": [ 27 | 30.0, 28 | 50.0, 29 | 100.0, 30 | 100.0 31 | ], 32 | "category_id": 27, 33 | "id": 244409769007218865362436775986662996774 34 | }, 35 | { 36 | "segmentation": [], 37 | "area": 1000.0, 38 | "iscrowd": 0, 39 | "image_id": 91891091516384550185081373185892902457, 40 | "bbox": [ 41 | 120.0, 42 | 231.0, 43 | 50.0, 44 | 20.0 45 | ], 46 | "category_id": 34, 47 | "id": 328316353567376980370842232520647311162 48 | }, 49 | { 50 | "segmentation": [], 51 | "area": 200.0, 52 | "iscrowd": 0, 53 | "image_id": 91891091516384550185081373185892902457, 54 | "bbox": [ 55 | 132.0, 56 | 83.0, 57 | 10.0, 58 | 20.0 59 | ], 60 | "category_id": 25, 61 | "id": 244577869532652886288531412869200144247 62 | } 63 | ], 64 | "categories": [ 65 | { 66 | "id": 27, 67 | "name": "car", 68 | "supercategory": "default" 69 | }, 70 | { 71 | "id": 34, 72 | "name": "bicycle", 73 | "supercategory": "default" 74 | }, 75 | { 76 | "id": 25, 77 | "name": "person", 78 | "supercategory": "default" 79 | } 80 | ] 81 | } 82 | -------------------------------------------------------------------------------- /tests/mock_data/coco/annotations/keypoints.json: -------------------------------------------------------------------------------- 1 | {"info": {"description": "COCO compatible Synthetic Dataset"}, "licences": [{"url": "", "id": 1, "name": "default"}], "images": [{"file_name": "camera_61855733451949387398181790757513827492.png", "height": 640, "width": 640, "id": 61855733451949387398181790757513827492}, {"file_name": "camera_125709864006893838062514269195103918838.png", "height": 640, "width": 640, "id": 125709864006893838062514269195103918838}], "annotations": [{"segmentation": [], "area": 1035.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [72.0, 0.0, 45.0, 23.0], "keypoints": [442, 66, 1, 438, 61, 1, 441, 60, 1, 429, 59, 1, 439, 53, 2, 417, 75, 1, 442, 63, 2, 419, 92, 1, 468, 52, 2, 434, 112, 1, 489, 53, 2, 414, 119, 2, 423, 119, 2, 435, 145, 2, 425, 152, 2, 425, 185, 2, 396, 181, 2], "num_keypoints": 17, "category_id": 1, "id": 334610669898986761222408873459836779863}, {"segmentation": [], "area": 2064.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [263.0, 0.0, 48.0, 43.0], "keypoints": [570, 308, 2, 573, 304, 2, 567, 304, 2, 577, 304, 2, 562, 302, 2, 587, 318, 2, 557, 321, 2, 608, 327, 2, 549, 338, 2, 617, 339, 2, 550, 358, 2, 579, 353, 2, 567, 354, 2, 578, 390, 2, 591, 378, 2, 565, 410, 2, 594, 413, 2], "num_keypoints": 17, "category_id": 1, "id": 339285116325029240400003306295766986617}, {"segmentation": [], "area": 2240.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [626.0, 18.0, 14.0, 160.0], "keypoints": [308, 236, 2, 312, 236, 2, 304, 235, 2, 319, 245, 2, 296, 242, 2, 325, 266, 2, 288, 268, 2, 342, 267, 2, 259, 270, 2, 329, 250, 2, 265, 250, 2, 317, 320, 2, 292, 320, 2, 319, 375, 2, 283, 374, 2, 323, 418, 2, 281, 419, 2], "num_keypoints": 17, "category_id": 1, "id": 340280744268748444979888511031625297271}, {"segmentation": [], "area": 16830.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [389.0, 44.0, 110.0, 153.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638, 37, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "num_keypoints": 1, "category_id": 1, "id": 333968447710968161883062887274436145021}, {"segmentation": [], "area": 4312.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [97.0, 129.0, 49.0, 88.0], "keypoints": [116, 142, 2, 119, 139, 2, 116, 139, 1, 123, 137, 2, 116, 138, 1, 131, 152, 2, 114, 148, 2, 129, 169, 2, 102, 166, 2, 125, 185, 2, 100, 176, 2, 139, 169, 1, 128, 167, 1, 129, 177, 2, 104, 174, 2, 129, 205, 2, 110, 202, 2], "num_keypoints": 17, "category_id": 1, "id": 333947764713911287440370339802593095505}, {"segmentation": [], "area": 10108.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [546.0, 291.0, 76.0, 133.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 304, 32, 2, 274, 34, 2], "num_keypoints": 2, "category_id": 1, "id": 339595274893044431302040368385860419549}, {"segmentation": [], "area": 18900.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [255.0, 227.0, 90.0, 210.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 13, 2, 112, 12, 2], "num_keypoints": 2, "category_id": 1, "id": 334944599550083874293864550321790824317}, {"segmentation": [], "area": 28400.0, "iscrowd": 0, "image_id": 61855733451949387398181790757513827492, "bbox": [143.0, 228.0, 100.0, 284.0], "keypoints": [174, 258, 2, 182, 250, 2, 174, 251, 2, 199, 247, 2, 178, 248, 1, 224, 281, 2, 174, 285, 2, 232, 320, 2, 163, 327, 2, 215, 362, 2, 150, 367, 2, 222, 372, 2, 190, 373, 1, 209, 432, 2, 165, 427, 1, 220, 493, 2, 173, 495, 1], "num_keypoints": 17, "category_id": 1, "id": 334943387676138995482526044752816615411}, {"segmentation": [], "area": 5900.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [41.0, 0.0, 59.0, 100.0], "keypoints": [62, 235, 2, 47, 224, 1, 48, 223, 2, 34, 225, 1, 34, 225, 2, 25, 287, 1, 23, 303, 2, 11, 375, 1, 9, 401, 2, 28, 448, 1, 39, 486, 2, 26, 425, 1, 29, 423, 2, 63, 566, 1, 46, 577, 2, 0, 0, 0, 0, 0, 0], "num_keypoints": 15, "category_id": 1, "id": 318336697634370290125178793847372611071}, {"segmentation": [], "area": 7743.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [225.0, 95.0, 87.0, 89.0], "keypoints": [267, 102, 1, 266, 100, 1, 269, 100, 1, 265, 100, 1, 272, 101, 1, 261, 111, 2, 277, 111, 2, 246, 109, 2, 291, 109, 2, 234, 108, 2, 304, 106, 2, 262, 131, 2, 271, 133, 2, 259, 147, 2, 267, 157, 2, 257, 167, 2, 264, 178, 2], "num_keypoints": 17, "category_id": 1, "id": 315691618573741103626618763093389048572}, {"segmentation": [], "area": 12152.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [37.0, 265.0, 56.0, 217.0], "keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 38, 2, 0, 0, 0, 65, 86, 2, 60, 16, 2], "num_keypoints": 3, "category_id": 1, "id": 339604711293634020202778213587130907389}, {"segmentation": [], "area": 3731.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [305.0, 426.0, 41.0, 91.0], "keypoints": [586, 439, 2, 583, 434, 1, 584, 435, 2, 577, 433, 1, 579, 437, 2, 566, 450, 1, 570, 460, 2, 565, 466, 1, 561, 480, 2, 579, 482, 2, 563, 501, 2, 558, 485, 1, 558, 494, 2, 578, 513, 2, 558, 531, 2, 575, 546, 2, 523, 539, 2], "num_keypoints": 17, "category_id": 1, "id": 336948836902231858787782549365331982589}, {"segmentation": [], "area": 10472.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [515.0, 421.0, 77.0, 136.0], "keypoints": [319, 439, 2, 319, 438, 1, 320, 435, 1, 323, 439, 2, 327, 431, 1, 326, 452, 2, 334, 442, 2, 314, 456, 2, 337, 444, 1, 309, 456, 1, 337, 448, 1, 338, 467, 2, 340, 461, 2, 327, 486, 2, 329, 464, 1, 328, 508, 2, 341, 480, 2], "num_keypoints": 17, "category_id": 1, "id": 296747512066848616081022417879095443452}, {"segmentation": [], "area": 4851.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [312.0, 563.0, 63.0, 77.0], "keypoints": [359, 582, 2, 356, 576, 1, 355, 578, 2, 350, 572, 1, 347, 575, 2, 333, 595, 1, 344, 599, 2, 328, 613, 1, 341, 617, 2, 329, 633, 1, 356, 617, 2, 324, 635, 1, 0, 0, 0, 351, 624, 2, 0, 0, 0, 362, 638, 2, 0, 0, 0], "num_keypoints": 14, "category_id": 1, "id": 340279443083261501468039155580732538109}, {"segmentation": [], "area": 38700.0, "iscrowd": 0, "image_id": 125709864006893838062514269195103918838, "bbox": [0.0, 190.0, 86.0, 450.0], "keypoints": [43, 285, 1, 37, 280, 1, 45, 280, 1, 27, 280, 1, 49, 281, 2, 12, 313, 1, 63, 314, 1, 0, 0, 0, 81, 343, 2, 14, 363, 1, 86, 369, 2, 32, 378, 1, 52, 373, 1, 36, 431, 1, 59, 414, 1, 63, 469, 1, 73, 469, 1], "num_keypoints": 16, "category_id": 1, "id": 273817934909826610959195516144229154047}], "categories": [{"id": 1, "name": "person", "supercategory": "default", "keypoints": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"], "skeleton": [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]}]} 2 | -------------------------------------------------------------------------------- /tests/mock_data/coco/images/camera_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/coco/images/camera_001.png -------------------------------------------------------------------------------- /tests/mock_data/coco/images/camera_125709864006893838062514269195103918838.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/coco/images/camera_125709864006893838062514269195103918838.png -------------------------------------------------------------------------------- /tests/mock_data/coco/images/camera_61855733451949387398181790757513827492.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/coco/images/camera_61855733451949387398181790757513827492.png -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/annotation_definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "annotation_definitions": [ 4 | { 5 | "id": 1, 6 | "name": "semantic segmentation", 7 | "description": "pixel-wise semantic segmentation label", 8 | "format": "PNG", 9 | "spec": [ 10 | {"label_id": 8, "label_name": "road", "pixel_value": 0}, 11 | {"label_id": 9, "label_name": "sidewalk", "pixel_value": 1}, 12 | {"label_id": 12, "label_name": "building", "pixel_value": 2}, 13 | {"label_id": 13, "label_name": "wall", "pixel_value": 3}, 14 | {"label_id": 14, "label_name": "fence", "pixel_value": 4}, 15 | {"label_id": 18, "label_name": "pole", "pixel_value": 5}, 16 | {"label_id": 20, "label_name": "traffic light", "pixel_value": 6}, 17 | {"label_id": 21, "label_name": "traffic sign", "pixel_value": 7}, 18 | {"label_id": 22, "label_name": "vegetation", "pixel_value": 8}, 19 | {"label_id": 23, "label_name": "terrain", "pixel_value": 9}, 20 | {"label_id": 24, "label_name": "sky", "pixel_value": 10}, 21 | {"label_id": 25, "label_name": "person", "pixel_value": 11}, 22 | {"label_id": 26, "label_name": "rider", "pixel_value": 12}, 23 | {"label_id": 27, "label_name": "car", "pixel_value": 13}, 24 | {"label_id": 28, "label_name": "truck", "pixel_value": 14}, 25 | {"label_id": 29, "label_name": "bus", "pixel_value": 15}, 26 | {"label_id": 32, "label_name": "train", "pixel_value": 16}, 27 | {"label_id": 33, "label_name": "motorcycle", "pixel_value": 17}, 28 | {"label_id": 34, "label_name": "bicycle", "pixel_value": 18} 29 | ] 30 | }, 31 | { 32 | "id": 2, 33 | "name": "3d bounding box", 34 | "description": "3d bounding box annotation of object instances", 35 | "format": "JSON", 36 | "spec": [ 37 | {"label_id": 27, "label_name": "car"}, 38 | {"label_id": 34, "label_name": "bicycle"}, 39 | {"label_id": 25, "label_name": "person"} 40 | ] 41 | }, 42 | { 43 | "id": 3, 44 | "name": "lidar semantic segmention", 45 | "description": "3d point cloud semantic segmentation", 46 | "format": "PCD", 47 | "spec": [ 48 | {"label_id": 27, "label_name": "car", "point_value": 0}, 49 | {"label_id": 34, "label_name": "bicycle", "point_value": 1}, 50 | {"label_id": 25, "label_name": "person", "point_value": 2} 51 | ] 52 | }, 53 | { 54 | "id": 4, 55 | "name": "2d bounding box", 56 | "description": "2d bounding box annotation", 57 | "format": "JSON", 58 | "spec": [ 59 | {"label_id": 27, "label_name": "car"}, 60 | {"label_id": 34, "label_name": "bicycle"}, 61 | {"label_id": 25, "label_name": "person"} 62 | ] 63 | } 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/captures_000.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "captures": [ 4 | { 5 | "id": "e8b44709-dddf-439d-94d2-975460924903", 6 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 7 | "step": 1, 8 | "timestamp": 1, 9 | "sensor": { 10 | "sensor_id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f", 11 | "ego_id": "4f80234d-4342-420f-9187-07004613cd1f", 12 | "modality": "camera", 13 | "translation": [0.2, 1.1, 0.3], 14 | "rotation": [0.3, 0.2, 0.1, 0.5], 15 | "camera_intrinsic": [ 16 | [0.1, 0, 0], 17 | [3.0, 0.1, 0], 18 | [0.5, 0.45, 1] 19 | ] 20 | }, 21 | "ego": { 22 | "ego_id": 1, 23 | "translation": [0.02, 0.0, 0.0], 24 | "rotation": [0.1, 0.1, 0.3, 0.0], 25 | "velocity": [0.1, 0.1, 0.0], 26 | "acceleration": null 27 | }, 28 | "filename": "captures/camera_000.png", 29 | "format": "PNG", 30 | "annotations": [ 31 | ] 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/captures_001.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "captures": [ 4 | { 5 | "id": "4521949a-2a71-4c03-beb0-4f6362676639", 6 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 7 | "step": 2, 8 | "timestamp": 2, 9 | "sensor": { 10 | "sensor_id": 1, 11 | "ego_id": 1, 12 | "modality": "camera", 13 | "translation": [0.2, 1.1, 0.3], 14 | "rotation": [0.3, 0.2, 0.1, 0.5], 15 | "camera_intrinsic": [ 16 | [0.1, 0, 0], 17 | [3.0, 0.1, 0], 18 | [0.5, 0.45, 1] 19 | ] 20 | }, 21 | "ego": { 22 | "ego_id": 1, 23 | "translation": [0.12, 0.1, 0.0], 24 | "rotation": [0.0, 0.15, 0.24, 0.0], 25 | "velocity": [0.0, 0.0, 0.0], 26 | "acceleration": null 27 | }, 28 | "filename": "captures/camera_001.png", 29 | "format": "PNG", 30 | "annotations": [ 31 | ] 32 | }, 33 | { 34 | "id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75", 35 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 36 | "step": 2, 37 | "timestamp": 2, 38 | "sensor": { 39 | "sensor_id": 2, 40 | "ego_id": 1, 41 | "modality": "lidar", 42 | "translation": [0.0, 0.0, 0.0], 43 | "rotation": [0.0, 0.0, 0.0, 0.0], 44 | "camera_intrinsic": null 45 | }, 46 | "ego": { 47 | "ego_id": 1, 48 | "translation": [0.12, 0.1, 0.0], 49 | "rotation": [0.0, 0.15, 0.24, 0.0], 50 | "velocity": [0.0, 0.0, 0.0], 51 | "acceleration": null 52 | }, 53 | "filename": "captures/lidar_000.pcd", 54 | "format": "PCD", 55 | "annotations": [ 56 | ] 57 | } 58 | ] 59 | } 60 | -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/egos.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "egos": [ 4 | { 5 | "id": "4f80234d-4342-420f-9187-07004613cd1f", 6 | "description": "the main car driving in simulation" 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/metric_definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "metric_definitions": [ 4 | { 5 | "id": 1, 6 | "name": "object count", 7 | "description": "count number of objects observed", 8 | "spec": [ 9 | {"label_id": 27, "label_name": "car"}, 10 | {"label_id": 34, "label_name": "bicycle"}, 11 | {"label_id": 25, "label_name": "person"} 12 | ] 13 | }, 14 | { 15 | "id": 2, 16 | "name": "visible pixel", 17 | "description": "visible pixel", 18 | "spec": [ 19 | {"label_id": 21, "label_name": "watch"}, 20 | {"label_id": 28, "label_name": "book"} 21 | ] 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/metrics_000.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "metrics": [ 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /tests/mock_data/no_annotations_or_metrics/Dataset/sensors.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "sensors": [ 4 | { 5 | "id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f", 6 | "ego_id": "4f80234d-4342-420f-9187-07004613cd1f", 7 | "modality": "camera", 8 | "description": "Point Grey Flea 2 (FL2-14S3M-C)" 9 | }, 10 | { 11 | "id": "6fb1a823-5b83-4a79-b566-fe4435ec1942", 12 | "ego_id": "4f80234d-4342-420f-9187-07004613cd1f", 13 | "modality": "lidar", 14 | "description": "Velodyne HDL-64E" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/annotation_definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "annotation_definitions": [ 4 | { 5 | "id": "1", 6 | "name": "semantic segmentation", 7 | "description": "pixel-wise semantic segmentation label", 8 | "format": "PNG", 9 | "spec": [ 10 | {"label_id": 8, "label_name": "road", "pixel_value": 0}, 11 | {"label_id": 9, "label_name": "sidewalk", "pixel_value": 1}, 12 | {"label_id": 12, "label_name": "building", "pixel_value": 2}, 13 | {"label_id": 13, "label_name": "wall", "pixel_value": 3}, 14 | {"label_id": 14, "label_name": "fence", "pixel_value": 4}, 15 | {"label_id": 18, "label_name": "pole", "pixel_value": 5}, 16 | {"label_id": 20, "label_name": "traffic light", "pixel_value": 6}, 17 | {"label_id": 21, "label_name": "traffic sign", "pixel_value": 7}, 18 | {"label_id": 22, "label_name": "vegetation", "pixel_value": 8}, 19 | {"label_id": 23, "label_name": "terrain", "pixel_value": 9}, 20 | {"label_id": 24, "label_name": "sky", "pixel_value": 10}, 21 | {"label_id": 25, "label_name": "person", "pixel_value": 11}, 22 | {"label_id": 26, "label_name": "rider", "pixel_value": 12}, 23 | {"label_id": 27, "label_name": "car", "pixel_value": 13}, 24 | {"label_id": 28, "label_name": "truck", "pixel_value": 14}, 25 | {"label_id": 29, "label_name": "bus", "pixel_value": 15}, 26 | {"label_id": 32, "label_name": "train", "pixel_value": 16}, 27 | {"label_id": 33, "label_name": "motorcycle", "pixel_value": 17}, 28 | {"label_id": 34, "label_name": "bicycle", "pixel_value": 18} 29 | ] 30 | }, 31 | { 32 | "id": "2", 33 | "name": "3d bounding box", 34 | "description": "3d bounding box annotation of object instances", 35 | "format": "JSON", 36 | "spec": [ 37 | {"label_id": 27, "label_name": "car"}, 38 | {"label_id": 34, "label_name": "bicycle"}, 39 | {"label_id": 25, "label_name": "person"} 40 | ] 41 | }, 42 | { 43 | "id": "3", 44 | "name": "lidar semantic segmention", 45 | "description": "3d point cloud semantic segmentation", 46 | "format": "PCD", 47 | "spec": [ 48 | {"label_id": 27, "label_name": "car", "point_value": 0}, 49 | {"label_id": 34, "label_name": "bicycle", "point_value": 1}, 50 | {"label_id": 25, "label_name": "person", "point_value": 2} 51 | ] 52 | }, 53 | { 54 | "id": "4", 55 | "name": "2d bounding box", 56 | "description": "2d bounding box annotation", 57 | "format": "JSON", 58 | "spec": [ 59 | {"label_id": 27, "label_name": "car"}, 60 | {"label_id": 34, "label_name": "bicycle"}, 61 | {"label_id": 25, "label_name": "person"} 62 | ] 63 | } 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/captures_000.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "captures": [ 4 | { 5 | "id": "e8b44709-dddf-439d-94d2-975460924903", 6 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 7 | "step": 1, 8 | "timestamp": 1, 9 | "sensor": { 10 | "sensor_id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f", 11 | "ego_id": "4f80234d-4342-420f-9187-07004613cd1f", 12 | "modality": "camera", 13 | "translation": [0.2, 1.1, 0.3], 14 | "rotation": [0.3, 0.2, 0.1, 0.5], 15 | "camera_intrinsic": [ 16 | [0.1, 0, 0], 17 | [3.0, 0.1, 0], 18 | [0.5, 0.45, 1] 19 | ] 20 | }, 21 | "ego": { 22 | "ego_id": 1, 23 | "translation": [0.02, 0.0, 0.0], 24 | "rotation": [0.1, 0.1, 0.3, 0.0], 25 | "velocity": [0.1, 0.1, 0.0], 26 | "acceleration": null 27 | }, 28 | "filename": "captures/camera_000.png", 29 | "format": "PNG", 30 | "annotations": [ 31 | { 32 | "id": "35cbdf6e-96e5-446e-852d-fe40be79ce77", 33 | "annotation_definition": "1", 34 | "filename": "annotations/semantic_segmentation_000.png", 35 | "values": null 36 | } 37 | ] 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/captures_001.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "captures": [ 4 | { 5 | "id": "4521949a-2a71-4c03-beb0-4f6362676639", 6 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 7 | "step": 2, 8 | "timestamp": 2, 9 | "sensor": { 10 | "sensor_id": 1, 11 | "ego_id": 1, 12 | "modality": "camera", 13 | "translation": [0.2, 1.1, 0.3], 14 | "rotation": [0.3, 0.2, 0.1, 0.5], 15 | "camera_intrinsic": [ 16 | [0.1, 0, 0], 17 | [3.0, 0.1, 0], 18 | [0.5, 0.45, 1] 19 | ] 20 | }, 21 | "ego": { 22 | "ego_id": 1, 23 | "translation": [0.12, 0.1, 0.0], 24 | "rotation": [0.0, 0.15, 0.24, 0.0], 25 | "velocity": [0.0, 0.0, 0.0], 26 | "acceleration": null 27 | }, 28 | "filename": "captures/camera_001.png", 29 | "format": "PNG", 30 | "annotations": [ 31 | { 32 | "id": "a79ab4fb-acf3-47ad-8a6f-20af795e23e1", 33 | "annotation_definition": "1", 34 | "filename": "annotations/semantic_segmentation_001.png", 35 | "values": null 36 | }, 37 | { 38 | "id": "36db01f8-e322-4c81-a650-bec89a7e6100", 39 | "annotation_definition": "2", 40 | "filename": null, 41 | "values": [ 42 | { 43 | "instance_id": "85149ab1-3b75-443b-8540-773b31559a26", 44 | "label_id": 27, 45 | "label_name": "car", 46 | "translation": [24.0, 12.1, 0.0], 47 | "size": [2.0, 3.0, 1.0], 48 | "rotation": [0.0, 1.0, 2.0, 0.0], 49 | "velocity": [0.5, 0.0, 0.0], 50 | "acceleration": null 51 | }, 52 | { 53 | "instance_id": "f2e56dad-9bfd-4930-9dca-bfe08672de3a", 54 | "label_id": 34, 55 | "label_name": "bicycle", 56 | "translation": [5.2, 7.9, 0.0], 57 | "size": [0.3, 0.5, 1.0], 58 | "rotation": [0.0, 1.0, 2.0, 0.0], 59 | "velocity": [0.0, 0.1, 0.0], 60 | "acceleration": null 61 | }, 62 | { 63 | "instance_id": "a52dfb48-e5a4-4008-96b6-80da91caa777", 64 | "label_id": 25, 65 | "label_name": "person", 66 | "translation": [41.2, 1.5, 0.0], 67 | "size": [0.3, 0.3, 1.8], 68 | "rotation": [0.0, 1.0, 2.0, 0.0], 69 | "velocity": [0.05, 0.0, 0.0], 70 | "acceleration": null 71 | } 72 | ] 73 | }, 74 | { 75 | "id": "36db01f8-e322-4c81-a650-bec89a7e6100", 76 | "annotation_definition": "4", 77 | "filename": null, 78 | "values": [ 79 | { 80 | "instance_id": "85149ab1-3b75-443b-8540-773b31559a26", 81 | "label_id": 27, 82 | "label_name": "car", 83 | "x": 30.0, 84 | "y": 50.0, 85 | "width": 100.0, 86 | "height": 100.0 87 | }, 88 | { 89 | "instance_id": "f2e56dad-9bfd-4930-9dca-bfe08672de3a", 90 | "label_id": 34, 91 | "label_name": "bicycle", 92 | "x": 120.0, 93 | "y": 231.0, 94 | "width": 50.0, 95 | "height": 20.0 96 | }, 97 | { 98 | "instance_id": "a52dfb48-e5a4-4008-96b6-80da91caa777", 99 | "label_id": 25, 100 | "label_name": "person", 101 | "x": 132.0, 102 | "y": 83.0, 103 | "width": 10.0, 104 | "height": 20.0 105 | } 106 | ] 107 | } 108 | ] 109 | }, 110 | { 111 | "id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75", 112 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 113 | "step": 2, 114 | "timestamp": 2, 115 | "sensor": { 116 | "sensor_id": 2, 117 | "ego_id": 1, 118 | "modality": "lidar", 119 | "translation": [0.0, 0.0, 0.0], 120 | "rotation": [0.0, 0.0, 0.0, 0.0], 121 | "camera_intrinsic": null 122 | }, 123 | "ego": { 124 | "ego_id": 1, 125 | "translation": [0.12, 0.1, 0.0], 126 | "rotation": [0.0, 0.15, 0.24, 0.0], 127 | "velocity": [0.0, 0.0, 0.0], 128 | "acceleration": null 129 | }, 130 | "filename": "captures/lidar_000.pcd", 131 | "format": "PCD", 132 | "annotations": [ 133 | { 134 | "id": "3b7b2af7-4d9f-4f1d-a9f5-32365c5896c8", 135 | "annotation_definition": "3", 136 | "filename": "annotations/lidar_semantic_segmentation_000.pcd" 137 | } 138 | ] 139 | } 140 | ] 141 | } 142 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/egos.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "egos": [ 4 | { 5 | "id": "4f80234d-4342-420f-9187-07004613cd1f", 6 | "description": "the main car driving in simulation" 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/metric_definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "metric_definitions": [ 4 | { 5 | "id": 1, 6 | "name": "object count", 7 | "description": "count number of objects observed", 8 | "spec": [ 9 | {"label_id": 27, "label_name": "car"}, 10 | {"label_id": 34, "label_name": "bicycle"}, 11 | {"label_id": 25, "label_name": "person"} 12 | ] 13 | }, 14 | { 15 | "id": 2, 16 | "name": "visible pixel", 17 | "description": "visible pixel", 18 | "spec": [ 19 | {"label_id": 21, "label_name": "watch"}, 20 | {"label_id": 28, "label_name": "book"} 21 | ] 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/metrics_000.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "metrics": [ 4 | { 5 | "capture_id": "e8b44709-dddf-439d-94d2-975460924903", 6 | "annotation_id": null, 7 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 8 | "step": 1, 9 | "metric_definition": 1, 10 | "values": [ 11 | {"label_id": 27, "label_name": "car", "count": 5}, 12 | {"label_id": 34, "label_name": "bicycle", "count": 1}, 13 | {"label_id": 25, "label_name": "person", "count": 7} 14 | ] 15 | }, 16 | { 17 | "capture_id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75", 18 | "annotation_id": "35cbdf6e-96e5-446e-852d-fe40be79ce77", 19 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 20 | "step": 1, 21 | "metric_definition": 1, 22 | "values": [ 23 | {"label_id": 27, "label_name": "car", "count": 3}, 24 | {"label_id": 25, "label_name": "person", "count": 2} 25 | ] 26 | }, 27 | { 28 | "capture_id": "3d09bbce-7f7b-4d9c-8c8a-2f75158e0c8e", 29 | "annotation_id": null, 30 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 31 | "step": 1, 32 | "metric_definition": 1, 33 | "values": [ 34 | {"label_id": 27, "label_name": "car", "count": 1}, 35 | {"label_id": 34, "label_name": "bicycle", "count": 2}, 36 | {"label_id": 25, "label_name": "person", "count": 2} 37 | ] 38 | }, 39 | { 40 | "capture_id": "3d09bbce-7f7b-4d9c-8c8a-2f75158e0c8e", 41 | "annotation_id": null, 42 | "sequence_id": "e96b97cd-8130-4ab4-a105-1b911a6d912b", 43 | "step": 1, 44 | "metric_definition": 2, 45 | "values": [ 46 | {"label_id": 21, "visible_pixels": 1}, 47 | {"label_id": 28, "visible_pixels": 2} 48 | ] 49 | } 50 | ] 51 | } 52 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/Dataset/sensors.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "sensors": [ 4 | { 5 | "id": "b4f6a75e-12de-4b4c-9574-5b135cecac6f", 6 | "ego_id": "4f80234d-4342-420f-9187-07004613cd1f", 7 | "modality": "camera", 8 | "description": "Point Grey Flea 2 (FL2-14S3M-C)" 9 | }, 10 | { 11 | "id": "6fb1a823-5b83-4a79-b566-fe4435ec1942", 12 | "ego_id": "4f80234d-4342-420f-9187-07004613cd1f", 13 | "modality": "lidar", 14 | "description": "Velodyne HDL-64E" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/README.md: -------------------------------------------------------------------------------- 1 | Mockup of Synthetic Dataset 2 | 3 | This is a mock dataset that is created according to this schema [design](https://docs.google.com/document/d/1lKPm06z09uX9gZIbmBUMO6WKlIGXiv3hgXb_taPOnU0) 4 | 5 | Included in this mockup: 6 | 7 | - 1 ego car 8 | - 2 sensors: 1 camera and 1 LIDAR 9 | - 19 labels 10 | - 3 captures, 2 metrics, 1 sequence, 2 steps 11 | - the first includes 1 camera capture and 1 semantic segmentation annotation. 12 | - two captures, 1 camera capture and 1 LIDAR capture, are triggered at the same time. For the camera, semantic segmentation, instance segmentation and 3d bounding box annotations are provided. For the LIDAR sensor, semantic segmentation annotation of point cloud is included. 13 | - one of the metric event is emitted for metrics at capture level. The other one is emitted at annotation level. 14 | - 4 types of annotations: semantic segmentation, 2d bounding box, 3d bounding box and LIDAR semantic segmentation. 15 | - 1 type of metric: object count 16 | -------------------------------------------------------------------------------- /tests/mock_data/simrun/annotations/instance_segmantation_000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/instance_segmantation_000.png -------------------------------------------------------------------------------- /tests/mock_data/simrun/annotations/lidar_semantic_segmentation_000.pcd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/lidar_semantic_segmentation_000.pcd -------------------------------------------------------------------------------- /tests/mock_data/simrun/annotations/sementic_segmantation_000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/sementic_segmantation_000.png -------------------------------------------------------------------------------- /tests/mock_data/simrun/annotations/sementic_segmantation_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/annotations/sementic_segmantation_001.png -------------------------------------------------------------------------------- /tests/mock_data/simrun/captures/camera_000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/captures/camera_000.png -------------------------------------------------------------------------------- /tests/mock_data/simrun/captures/camera_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/captures/camera_001.png -------------------------------------------------------------------------------- /tests/mock_data/simrun/captures/lidar_000.pcd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun/captures/lidar_000.pcd -------------------------------------------------------------------------------- /tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_000.png -------------------------------------------------------------------------------- /tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unity-Technologies/datasetinsights/0705bead5e592bdfe59e10a749ed7bf5264a2b68/tests/mock_data/simrun_keypoint_dataset/annotations/keypoint_001.png -------------------------------------------------------------------------------- /tests/mock_data/simrun_keypoint_dataset/egos.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "egos": [ 4 | { 5 | "id": "f20cb747-f561-4963-8171-f699a0aadb3c", 6 | "description": "" 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /tests/mock_data/simrun_keypoint_dataset/metric_definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "metric_definitions": [ 4 | { 5 | "id": "db1b258e-d1d0-41b6-8751-16f601a2e230", 6 | "name": "scenario_iteration", 7 | "description": "Iteration information for dataset sequences" 8 | }, 9 | { 10 | "id": "c0b5e272-9715-4ea2-930e-cfe8cecf1b6e", 11 | "name": "Light position", 12 | "description": "The world-space position of the light" 13 | }, 14 | { 15 | "id": "317735b9-b4a4-4f6b-a4c9-eb846463a583", 16 | "name": "Light rotation", 17 | "description": "The world-space rotation of the light" 18 | }, 19 | { 20 | "id": "1a709e09-81bd-43b5-b8f0-f3952a1af444", 21 | "name": "Light intensity", 22 | "description": "The intensity of the light" 23 | }, 24 | { 25 | "id": "a640e390-fa13-4bb0-b2cd-1e0cb3f43eb9", 26 | "name": "Light color", 27 | "description": "The color of the light" 28 | }, 29 | { 30 | "id": "1529faeb-863f-40c2-840f-5fe4221c1065", 31 | "name": "Camera position", 32 | "description": "The world-space position of the camera" 33 | }, 34 | { 35 | "id": "5199deef-2eb0-42fe-b00d-1d2418aedaff", 36 | "name": "Camera rotation", 37 | "description": "The world-space rotation of the camera" 38 | }, 39 | { 40 | "id": "42e7fa88-084b-423d-ba6e-830c711383e1", 41 | "name": "Camera field of view", 42 | "description": "The field of view of the camera" 43 | }, 44 | { 45 | "id": "11aa1dfc-3495-467c-a998-71d9bfe6980e", 46 | "name": "Camera focal length", 47 | "description": "The focal length of the camera" 48 | }, 49 | { 50 | "id": "14adb394-46c0-47e8-a3f0-99e754483b76", 51 | "name": "random-seed", 52 | "description": "The random seed used to initialize the random state of the simulation. Only triggered once per simulation." 53 | }, 54 | { 55 | "id": "51da3c27-369d-4929-aea6-d01614635ce2", 56 | "name": "object count", 57 | "description": "Counts of objects for each label in the sensor's view", 58 | "spec": [ 59 | { 60 | "label_id": 1, 61 | "label_name": "person" 62 | } 63 | ] 64 | }, 65 | { 66 | "id": "5ba92024-b3b7-41a7-9d3f-c03a6a8ddd01", 67 | "name": "rendered object info", 68 | "description": "Information about each labeled object visible to the sensor", 69 | "spec": [ 70 | { 71 | "label_id": 1, 72 | "label_name": "person" 73 | } 74 | ] 75 | } 76 | ] 77 | } 78 | -------------------------------------------------------------------------------- /tests/mock_data/simrun_keypoint_dataset/sensors.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.0.1", 3 | "sensors": [ 4 | { 5 | "id": "f4644cfe-4219-4936-a686-0e0fbe5b6559", 6 | "ego_id": "f20cb747-f561-4963-8171-f699a0aadb3c", 7 | "modality": "camera", 8 | "description": "" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /tests/mock_data/simrun_manifest.csv: -------------------------------------------------------------------------------- 1 | "run_execution_id","app_param_id","instance_id","attempt_id","file_name","download_uri" 2 | "simrun","18cWX0n","1","1","Annotations/lidar_semantic_segmentation_000.pcd","https://mock.url/Annotations/lidar_semantic_segmentation_000.pcd" 3 | "simrun","18cWX0n","1","1","Annotations/sementic_segmantation_000.png","https://mock.url/Annotations/sementic_segmantation_000.png" 4 | "simrun","18cWX0n","1","1","Annotations/sementic_segmantation_001.png","https://mock.url/Annotations/sementic_segmantation_001.png" 5 | "simrun","18cWX0n","1","1","Captures/camera_000.png","https://mock.url/Captures/camera_000.png" 6 | "simrun","18cWX0n","1","1","Captures/camera_001.png","https://mock.url/Captures/camera_001.png" 7 | "simrun","18cWX0n","1","1","Captures/lidar_000.pcd","https://mock.url/Captures/lidar_000.pcd" 8 | "simrun","18cWX0n","1","1","Dataset/captures_000.json","https://mock.url/Dataset/captures_000.json" 9 | "simrun","18cWX0n","1","1","Dataset/captures_001.json","https://mock.url/Dataset/captures_001.json" 10 | "simrun","18cWX0n","1","1","Dataset/metrics_000.json","https://mock.url/Dataset/metrics_000.json" 11 | "simrun","18cWX0n","1","1","Dataset/annotation_definitions.json","https://mock.url/Dataset/annotation_definitions.json" 12 | "simrun","18cWX0n","1","1","Dataset/metric_definitions.json","https://mock.url/Dataset/metric_definitions.json" 13 | "simrun","18cWX0n","1","1","Dataset/egos.json","https://mock.url/Dataset/egos.json" 14 | "simrun","18cWX0n","1","1","Dataset/sensors.json","https://mock.url/Dataset/sensors.json" 15 | -------------------------------------------------------------------------------- /tests/test_bbox.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from datasetinsights.io.bbox import BBox2D, BBox3D, group_bbox2d_per_label 4 | from datasetinsights.stats.visualization.bbox3d_plot import ( 5 | _project_pt_to_pixel_location, 6 | _project_pt_to_pixel_location_orthographic, 7 | ) 8 | 9 | 10 | def test_group_bbox2d_per_label(): 11 | count1, count2 = 10, 11 12 | bbox1 = BBox2D(label="car", x=1, y=1, w=2, h=3) 13 | bbox2 = BBox2D(label="pedestrian", x=7, y=6, w=3, h=4) 14 | bboxes = [] 15 | bboxes.extend([bbox1] * count1) 16 | bboxes.extend([bbox2] * count2) 17 | bboxes_per_label = group_bbox2d_per_label(bboxes) 18 | assert len(bboxes_per_label["car"]) == count1 19 | assert len(bboxes_per_label["pedestrian"]) == count2 20 | 21 | 22 | def test_group_bbox3d(): 23 | bbox = BBox3D( 24 | label="na", sample_token=0, translation=[0, 0, 0], size=[5, 5, 5] 25 | ) 26 | flb = bbox.front_left_bottom_pt 27 | frb = bbox.front_right_bottom_pt 28 | flt = bbox.front_left_top_pt 29 | frt = bbox.front_right_top_pt 30 | 31 | blb = bbox.back_left_bottom_pt 32 | brb = bbox.back_right_bottom_pt 33 | blt = bbox.back_left_top_pt 34 | brt = bbox.back_right_top_pt 35 | 36 | assert flb[0] == flt[0] == blb[0] == blt[0] == -2.5 37 | assert frb[0] == frt[0] == brb[0] == brt[0] == 2.5 38 | 39 | assert flt[1] == frt[1] == blt[1] == brt[1] == 2.5 40 | assert flb[1] == frb[1] == blb[1] == brb[1] == -2.5 41 | 42 | assert flt[2] == flb[2] == frt[2] == frb[2] == 2.5 43 | assert blt[2] == blb[2] == brt[2] == brb[2] == -2.5 44 | 45 | 46 | def test_project_pt_to_pixel_location(): 47 | pt = [0, 0, 0] 48 | proj = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 49 | img_height = 480 50 | img_width = 640 51 | 52 | pixel_loc = _project_pt_to_pixel_location(pt, proj, img_height, img_width) 53 | assert pixel_loc[0] == 320 54 | assert pixel_loc[1] == 240 55 | 56 | # more interesting case 57 | pt = [0, 0, 70] 58 | proj = numpy.array([[1.299038, 0, 0], [0, 1.7320, 0], [0, 0, -1.0006]]) 59 | 60 | pixel_loc = _project_pt_to_pixel_location(pt, proj, img_height, img_width) 61 | assert pixel_loc[0] == 320 62 | assert pixel_loc[1] == 240 63 | 64 | 65 | def test_project_pt_to_pixel_location_orthographic(): 66 | pt = [0, 0, 0] 67 | proj = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 68 | img_height = 480 69 | img_width = 640 70 | 71 | pixel_loc = _project_pt_to_pixel_location_orthographic( 72 | pt, proj, img_height, img_width 73 | ) 74 | assert pixel_loc[0] == 320 75 | assert pixel_loc[1] == 240 76 | 77 | # more interesting case 78 | pt = [0.3, 0, 0] 79 | proj = numpy.array([[0.08951352, 0, 0], [0, 0.2, 0], [0, 0, -0.0020006]]) 80 | 81 | pixel_loc = _project_pt_to_pixel_location_orthographic( 82 | pt, proj, img_height, img_width 83 | ) 84 | assert pixel_loc[0] == int( 85 | (proj[0][0] * pt[0] + 1) * 0.5 * img_width 86 | ) # 328 87 | assert pixel_loc[1] == img_height // 2 88 | -------------------------------------------------------------------------------- /tests/test_create_downloader.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datasetinsights.io.downloader.base import create_dataset_downloader 4 | from datasetinsights.io.downloader.http_downloader import HTTPDatasetDownloader 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "source_uri", 9 | ["http://", "https://"], 10 | ) 11 | def test_create_dataset_downloader_http_downloader(source_uri): 12 | 13 | # act 14 | downloader = create_dataset_downloader(source_uri=source_uri) 15 | 16 | # assert 17 | assert isinstance(downloader, HTTPDatasetDownloader) 18 | 19 | 20 | def test_create_dataset_downloader_invalid_input(): 21 | # arrange 22 | source_uri = "invalid_protocol://" 23 | # assert 24 | with pytest.raises(ValueError): 25 | # act 26 | create_dataset_downloader(source_uri=source_uri) 27 | 28 | 29 | def test_create_dataset_downloader_none_input(): 30 | # arrange 31 | source_uri = None 32 | # assert 33 | with pytest.raises(TypeError): 34 | # act 35 | create_dataset_downloader(source_uri=source_uri) 36 | -------------------------------------------------------------------------------- /tests/test_dashboard.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from datasetinsights.stats.visualization.object_detection import ScaleFactor 4 | 5 | 6 | def test_generate_scale_data(): 7 | captures = [ 8 | { 9 | "id": "4521949a- 2a71-4c03-beb0-4f6362676639", 10 | "sensor": {"scale": 1.0}, 11 | }, 12 | { 13 | "id": "4b35a47a-3f63-4af3-b0e8-e68cb384ad75", 14 | "sensor": {"scale": 2.0}, 15 | }, 16 | ] 17 | 18 | captures = pd.DataFrame(captures) 19 | actual_scale = ScaleFactor.generate_scale_data(captures) 20 | expected_scale = pd.DataFrame([1.0, 2.0], columns=["scale"]) 21 | pd.testing.assert_frame_equal(expected_scale, actual_scale) 22 | -------------------------------------------------------------------------------- /tests/test_download_command.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | from click.exceptions import BadParameter 5 | from click.testing import CliRunner 6 | 7 | from datasetinsights.commands.download import SourceURI, cli 8 | 9 | 10 | def test_source_uri_validation(): 11 | validate_source_uri = SourceURI() 12 | 13 | gcs_path = "gs://bucket/path/to/folder" 14 | usim_path = "usim://auth@project_id/abdde" 15 | http_path = "http://domain/file.zip" 16 | https_path = "https://domain/file.zip" 17 | 18 | assert validate_source_uri(gcs_path) == gcs_path 19 | assert validate_source_uri(usim_path) == usim_path 20 | assert validate_source_uri(http_path) == http_path 21 | assert validate_source_uri(https_path) == https_path 22 | 23 | with pytest.raises(BadParameter): 24 | validate_source_uri("s3://bucket/file") 25 | validate_source_uri("/path/to/file") 26 | validate_source_uri("dasdklsdk") 27 | validate_source_uri("") 28 | 29 | 30 | @pytest.mark.parametrize( 31 | "args", 32 | [ 33 | ["download", "--source-uri=usim://", "--output=tests/"], 34 | ["download", "--source-uri=http://", "--output=tests/"], 35 | ["download", "--source-uri=https://", "--output=tests/"], 36 | ["download", "--source-uri=gs://", "--output=tests/"], 37 | ], 38 | ) 39 | @patch("datasetinsights.commands.download.create_dataset_downloader") 40 | def test_download_except_called_once(mock_create, args): 41 | # arrange 42 | runner = CliRunner() 43 | # act 44 | runner.invoke(cli, args) 45 | # assert 46 | mock_create.assert_called_once() 47 | mock_create.return_value.download.assert_called_once() 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "args", 52 | [["download"], ["download", "--source-uri=s3://"]], 53 | ) 54 | @patch("datasetinsights.commands.download.create_dataset_downloader") 55 | def test_download_except_not_called(mock_create, args): 56 | # arrange 57 | runner = CliRunner() 58 | # act 59 | runner.invoke(cli, args) 60 | # assert 61 | mock_create.assert_not_called() 62 | mock_create.return_value.download.assert_not_called() 63 | -------------------------------------------------------------------------------- /tests/test_http_downloader.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from datasetinsights.io.downloader.http_downloader import HTTPDatasetDownloader 6 | from datasetinsights.io.exceptions import ChecksumError 7 | 8 | 9 | @patch("datasetinsights.io.downloader.http_downloader.download_file") 10 | def test_download_without_checksum(mock_download_file): 11 | # arrange 12 | source_uri = "http://some/path" 13 | output = "/some/path/" 14 | downloader = HTTPDatasetDownloader() 15 | 16 | # act 17 | downloader.download(source_uri=source_uri, output=output) 18 | 19 | # assert 20 | mock_download_file.assert_called_once() 21 | 22 | 23 | @patch("datasetinsights.io.downloader.http_downloader.download_file") 24 | @patch("datasetinsights.io.downloader.http_downloader.validate_checksum") 25 | @patch("datasetinsights.io.downloader.http_downloader.get_checksum_from_file") 26 | def test_download_with_checksum( 27 | mock_get_checksum_from_file, 28 | mock_validate_check_sum, 29 | mock_download_file, 30 | ): 31 | # arrange 32 | source_uri = "http://some/path" 33 | checksum_file = "/some/checksum_file.txt" 34 | output = "/some/path/" 35 | downloader = HTTPDatasetDownloader() 36 | 37 | # act 38 | downloader.download( 39 | source_uri=source_uri, output=output, checksum_file=checksum_file 40 | ) 41 | 42 | # assert 43 | mock_download_file.assert_called_once() 44 | mock_get_checksum_from_file.assert_called_once() 45 | mock_validate_check_sum.assert_called_once() 46 | 47 | 48 | @patch("os.remove") 49 | @patch("datasetinsights.io.downloader.http_downloader.download_file") 50 | @patch("datasetinsights.io.downloader.http_downloader.validate_checksum") 51 | @patch("datasetinsights.io.downloader.http_downloader.get_checksum_from_file") 52 | def test_download_with_wrong_checksum( 53 | mock_get_checksum_from_file, 54 | mock_validate_checksum, 55 | mock_download_file, 56 | mock_remove, 57 | ): 58 | # arrange 59 | mock_validate_checksum.side_effect = ChecksumError 60 | output = "/some/path" 61 | source_uri = "http://some/path" 62 | checksum_file = "/some/checksum_file.txt" 63 | downloader = HTTPDatasetDownloader() 64 | 65 | # act 66 | with pytest.raises(ChecksumError): 67 | downloader.download( 68 | source_uri=source_uri, output=output, checksum_file=checksum_file 69 | ) 70 | 71 | # assert 72 | mock_get_checksum_from_file.assert_called_once() 73 | mock_download_file.assert_called_once() 74 | mock_remove.assert_called_once() 75 | -------------------------------------------------------------------------------- /tests/test_image_analysis.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import pathlib 4 | 5 | import numpy as np 6 | 7 | from datasetinsights.stats.image_analysis import ( 8 | get_average_psd_1d, 9 | get_bbox_fg_bg_var_laplacian, 10 | get_final_mask, 11 | get_psd2d, 12 | get_seg_fg_bg_var_laplacian, 13 | get_wt_coeffs_var, 14 | laplacian_img, 15 | ) 16 | 17 | 18 | def test_get_bbox_fg_bg_var_laplacian(): 19 | cur_dir = pathlib.Path(__file__).parent.absolute() 20 | img_path = str( 21 | cur_dir 22 | / "mock_data" 23 | / "coco" 24 | / "images" 25 | / "camera_61855733451949387398181790757513827492.png" 26 | ) 27 | ann_path = str( 28 | cur_dir / "mock_data" / "coco" / "annotations" / "keypoints.json" 29 | ) 30 | laplacian = laplacian_img(img_path) 31 | f = open(ann_path) 32 | annotations = json.load(f)["annotations"] 33 | bbox_var_lap, img_var_lap = get_bbox_fg_bg_var_laplacian( 34 | laplacian, annotations 35 | ) 36 | assert len(bbox_var_lap) > 0 37 | assert img_var_lap is not None 38 | 39 | 40 | def test_get_seg_fg_bg_var_laplacian(): 41 | laplacian = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) 42 | final_mask = np.array([[1, 0, 1], [1, 0, 1], [1, 0, 1]]) 43 | expected_fg_var_lap = np.array([2, 5, 8]).var() 44 | expected_bg_var_lap = np.array([1, 3, 4, 6, 7, 9]).var() 45 | 46 | fg_var_lap, bg_var_lap = get_seg_fg_bg_var_laplacian(laplacian, final_mask) 47 | 48 | assert fg_var_lap == expected_fg_var_lap 49 | assert bg_var_lap == expected_bg_var_lap 50 | 51 | 52 | def test_get_final_mask(): 53 | mask_a = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]]) 54 | mask_b = np.array([[1, 1, 1], [0, 0, 0], [0, 0, 0]]) 55 | mask_c = np.array([[0, 0, 0], [0, 0, 0], [0, 1, 0]]) 56 | expected_final_mask = np.array([[1, 1, 1], [0, 0, 0], [0, 1, 0]]) 57 | 58 | final_mask = get_final_mask(masks=[mask_a, mask_b, mask_c]) 59 | 60 | assert np.array_equal(expected_final_mask, final_mask) 61 | 62 | 63 | def test_get_psd2d(): 64 | test_img = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]]) 65 | psd2d = get_psd2d(image=test_img) 66 | 67 | assert psd2d.shape == test_img.shape 68 | 69 | 70 | def test_get_avg_psd(): 71 | cur_dir = pathlib.Path(__file__).parent.absolute() 72 | img_dir_path = str(cur_dir / "mock_data" / "coco" / "images") 73 | avg_psd_1d, std_psd_1d = get_average_psd_1d(img_dir_path) 74 | 75 | assert avg_psd_1d is not None 76 | assert type(std_psd_1d) == np.ndarray 77 | 78 | 79 | def test_get_wt_coeff_var(): 80 | cur_dir = pathlib.Path(__file__).parent.absolute() 81 | img_dir_path = str(cur_dir / "mock_data" / "coco" / "images") 82 | num_img = len(glob.glob(img_dir_path + f"/*.png")) 83 | h, v, d = get_wt_coeffs_var(img_dir_path) 84 | 85 | assert h is not None and len(h) == num_img 86 | assert v is not None and len(v) == num_img 87 | assert d is not None and len(d) == num_img 88 | -------------------------------------------------------------------------------- /tests/test_keypoints_stats.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | from datasetinsights.stats import ( 7 | get_average_skeleton, 8 | get_scale_keypoints, 9 | get_visible_keypoints_dict, 10 | ) 11 | from datasetinsights.stats.constants import COCO_KEYPOINTS, COCO_SKELETON 12 | 13 | 14 | @pytest.fixture() 15 | def _setup_annotations(): 16 | parent_dir = Path.cwd() 17 | json_file = ( 18 | parent_dir 19 | / "tests" 20 | / "mock_data" 21 | / "coco" 22 | / "annotations" 23 | / "keypoints.json" 24 | ) 25 | f = open(json_file) 26 | data = json.load(f) 27 | annotations = data["annotations"] 28 | keypoints_list = [] 29 | for k in annotations: 30 | keypoints_list.append(k["keypoints"]) 31 | yield keypoints_list 32 | keypoints_list = None 33 | 34 | 35 | def test_get_scale_keypoints(_setup_annotations): 36 | annotations = _setup_annotations 37 | processed_kp_dict = get_scale_keypoints(annotations) 38 | 39 | assert set(COCO_KEYPOINTS).issubset(set(processed_kp_dict.keys())) 40 | for keypoint in COCO_KEYPOINTS: 41 | count = sum( 42 | map(lambda x: x > 2.5 or x < -2.5, processed_kp_dict[keypoint]["x"]) 43 | ) 44 | assert count == 0 45 | count = sum( 46 | map(lambda x: x > 2.5 or x < -2.5, processed_kp_dict[keypoint]["y"]) 47 | ) 48 | assert count == 0 49 | 50 | 51 | def test_get_visible_keypoints_dict(_setup_annotations): 52 | keypoint_list = _setup_annotations 53 | 54 | labeled_kpt_dict = get_visible_keypoints_dict(keypoint_list) 55 | for keypoint in COCO_KEYPOINTS: 56 | assert keypoint in labeled_kpt_dict.keys() 57 | for value in labeled_kpt_dict.values(): 58 | assert value < 1 and value >= 0 59 | 60 | 61 | def test_get_scale_keypoints_bad_case(): 62 | annotations = [[0] * 40, [1] * 60] 63 | with pytest.raises(ValueError): 64 | get_scale_keypoints(annotations) 65 | 66 | 67 | @pytest.fixture() 68 | def _setup_kp_dict(): 69 | kp_dict = {} 70 | for name in COCO_KEYPOINTS: 71 | kp_dict[name] = {"x": [2, 0], "y": [0, 2]} 72 | yield kp_dict 73 | kp_dict = None 74 | 75 | 76 | def test_get_average_skeleton(_setup_kp_dict): 77 | kp_dict = _setup_kp_dict 78 | kp_link_list = get_average_skeleton(kp_dict) 79 | 80 | assert kp_link_list[0] == [(1, 1), (1, 1)] 81 | assert len(kp_link_list) == len(COCO_SKELETON) 82 | -------------------------------------------------------------------------------- /tests/test_main_entrypoint.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | from click.testing import CliRunner 5 | 6 | from datasetinsights.__main__ import entrypoint 7 | 8 | 9 | @pytest.mark.parametrize("args", [[], ["-v"], ["-v", "invalid_command"]]) 10 | @patch("datasetinsights.__main__.logging") 11 | def test_entrypoint_except_not_called(logger_mock, args): 12 | # arrange 13 | runner = CliRunner() 14 | # act 15 | runner.invoke(entrypoint, args) 16 | # assert 17 | logger_mock.getLogger.assert_not_called() 18 | logger_mock.getLogger.return_value.setLevel.assert_not_called() 19 | 20 | 21 | @pytest.mark.parametrize("args", [["-v", "download"]]) 22 | @patch("datasetinsights.__main__.logging") 23 | def test_entrypoint_except_called_once(logger_mock, args): 24 | # arrange 25 | runner = CliRunner() 26 | # act 27 | runner.invoke(entrypoint, args) 28 | # assert 29 | logger_mock.getLogger.assert_called_once() 30 | logger_mock.getLogger.return_value.setLevel.assert_called_once() 31 | -------------------------------------------------------------------------------- /tests/test_object_detection_stats.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | from pathlib import Path 4 | 5 | import pandas as pd 6 | import pytest 7 | 8 | from datasetinsights.stats import ( 9 | convert_coco_annotations_to_df, 10 | get_bbox_heatmap, 11 | get_bbox_per_img_dict, 12 | get_bbox_relative_size_list, 13 | ) 14 | 15 | 16 | @pytest.fixture() 17 | def annotations_path(): 18 | parent_dir = Path.cwd() 19 | json_file = ( 20 | parent_dir 21 | / "tests" 22 | / "mock_data" 23 | / "coco" 24 | / "annotations" 25 | / "keypoints.json" 26 | ) 27 | yield json_file 28 | json_file = None 29 | 30 | 31 | @pytest.fixture() 32 | def _setup_annotation_df(annotations_path): 33 | coco_json = json.load(open(annotations_path, "r")) 34 | 35 | df_image = pd.DataFrame(coco_json["images"]) 36 | df_annotation = pd.DataFrame(coco_json["annotations"]) 37 | 38 | df_coco = df_annotation.merge(df_image, left_on="image_id", right_on="id") 39 | yield df_coco 40 | df_coco = None 41 | 42 | 43 | def test_convert_coco_annotations_to_df(annotations_path): 44 | processed_kp_dict = convert_coco_annotations_to_df(annotations_path) 45 | target_column_names = processed_kp_dict.columns.values.tolist() 46 | 47 | column_names = [ 48 | "image_id", 49 | "area", 50 | "bbox", 51 | "iscrowd", 52 | "num_keypoints", 53 | "keypoints", 54 | "width", 55 | "height", 56 | ] 57 | 58 | for column_name in column_names: 59 | assert column_name in target_column_names 60 | 61 | 62 | def test_get_bbox_heatmap(_setup_annotation_df): 63 | annotation_df = _setup_annotation_df 64 | bbox_heatmap = get_bbox_heatmap(annotation_df) 65 | height, width, _ = bbox_heatmap.shape 66 | 67 | max_width = max(annotation_df["width"]) 68 | max_height = max(annotation_df["height"]) 69 | 70 | assert max_width == width 71 | assert max_height == height 72 | assert (bbox_heatmap < 0).sum() == 0 73 | 74 | 75 | def test_get_bbox_relative_size_list(_setup_annotation_df): 76 | annotation_df = _setup_annotation_df 77 | bbox_relative_size = get_bbox_relative_size_list(annotation_df) 78 | assert annotation_df.shape[0] == bbox_relative_size.shape[0] 79 | 80 | test_row = annotation_df.iloc[0] 81 | assert bbox_relative_size[0] == math.sqrt( 82 | test_row["area"] / (test_row["width"] * test_row["height"]) 83 | ) 84 | 85 | 86 | def test_get_bbox_per_img_dict(_setup_annotation_df): 87 | annotation_df = _setup_annotation_df 88 | 89 | bbox_num_dict = get_bbox_per_img_dict(annotation_df) 90 | for value in bbox_num_dict.values(): 91 | assert value < 1 and value >= 0 92 | assert sum(bbox_num_dict.values()) == 1 93 | -------------------------------------------------------------------------------- /tests/unity_perception/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture 7 | def mock_data_base_dir(): 8 | parent_dir = Path(__file__).parent.parent.absolute() 9 | mock_data_dir = parent_dir / "mock_data" 10 | 11 | return mock_data_dir 12 | 13 | 14 | @pytest.fixture 15 | def mock_data_dir(mock_data_base_dir): 16 | mock_data_dir = mock_data_base_dir / "simrun" 17 | 18 | return mock_data_dir 19 | -------------------------------------------------------------------------------- /tests/unity_perception/test_captures.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import json 3 | 4 | import pytest 5 | 6 | from datasetinsights.datasets.unity_perception import Captures 7 | from datasetinsights.datasets.unity_perception.exceptions import ( 8 | DefinitionIDError, 9 | ) 10 | from datasetinsights.datasets.unity_perception.tables import ( 11 | SCHEMA_VERSION, 12 | glob, 13 | ) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "data_dir_name", 18 | ["simrun", "no_annotations_or_metrics"], 19 | ) 20 | def test_get_captures_and_annotations(mock_data_base_dir, data_dir_name): 21 | mock_data_dir = mock_data_base_dir / data_dir_name 22 | captures = Captures(str(mock_data_dir), version=SCHEMA_VERSION) 23 | 24 | captures_per_definition = collections.defaultdict(int) 25 | json_files = glob(mock_data_dir, captures.FILE_PATTERN) 26 | for json_file in json_files: 27 | records = json.load(open(json_file, "r", encoding="utf8"))[ 28 | Captures.TABLE_NAME 29 | ] 30 | for record in records: 31 | for annotation in record["annotations"]: 32 | def_id = annotation["annotation_definition"] 33 | captures_per_definition[def_id] += 1 34 | 35 | for def_id, count in captures_per_definition.items(): 36 | assert len(captures.filter(def_id)) == count 37 | 38 | with pytest.raises(DefinitionIDError): 39 | captures.filter("bad_definition_id") 40 | -------------------------------------------------------------------------------- /tests/unity_perception/test_metrics.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import json 3 | 4 | import pandas as pd 5 | import pytest 6 | 7 | from datasetinsights.datasets.unity_perception import Metrics 8 | from datasetinsights.datasets.unity_perception.exceptions import ( 9 | DefinitionIDError, 10 | ) 11 | from datasetinsights.datasets.unity_perception.tables import ( 12 | SCHEMA_VERSION, 13 | glob, 14 | ) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "data_dir_name", 19 | ["simrun", "no_annotations_or_metrics"], 20 | ) 21 | def test_filter_metrics(mock_data_base_dir, data_dir_name): 22 | mock_data_dir = mock_data_base_dir / data_dir_name 23 | metrics = Metrics(str(mock_data_dir), version=SCHEMA_VERSION) 24 | 25 | expected_rows = collections.defaultdict(int) 26 | expected_cols = collections.defaultdict(set) 27 | exclude_metrics = set(["metric_definition", "values"]) 28 | def_ids = set() 29 | actual_metrics = collections.defaultdict(pd.DataFrame) 30 | json_files = glob(mock_data_dir, metrics.FILE_PATTERN) 31 | for json_file in json_files: 32 | records = json.load(open(json_file, "r", encoding="utf8"))[ 33 | Metrics.TABLE_NAME 34 | ] 35 | for record in records: 36 | def_id = record["metric_definition"] 37 | def_ids.add(def_id) 38 | for key in record: 39 | if key not in exclude_metrics: 40 | expected_cols[def_id].add(key) 41 | values = pd.json_normalize(record["values"]) 42 | for key in values.columns: 43 | expected_cols[def_id].add(key) 44 | expected_rows[def_id] += len(values) 45 | 46 | for def_id in def_ids: 47 | actual_metrics[def_id] = metrics.filter_metrics(def_id) 48 | 49 | for def_id, expected_metric in actual_metrics.items(): 50 | expected_shape = (expected_rows[def_id], len(expected_cols[def_id])) 51 | assert expected_shape == actual_metrics[def_id].shape 52 | assert expected_cols[def_id] == set(actual_metrics[def_id].columns) 53 | 54 | with pytest.raises(DefinitionIDError): 55 | metrics.filter_metrics("bad_definition_id") 56 | 57 | 58 | def test_normalize_values(mock_data_dir): 59 | metrics = { 60 | "capture_id": "1234", 61 | "annotation_id": None, 62 | "sequence_id": "2345", 63 | "step": 50, 64 | "metric_definition": "193ce072-0e49-4ea4-a99f-7ca837e3a6ce", 65 | "values": [ 66 | { 67 | "label_id": 1, 68 | "label_name": "book_dorkdiaries_aladdin", 69 | "count": 1, 70 | }, 71 | { 72 | "label_id": 2, 73 | "label_name": "candy_minipralines_lindt", 74 | "count": 2, 75 | }, 76 | ], 77 | } 78 | expected = [ 79 | { 80 | "label_id": 1, 81 | "label_name": "book_dorkdiaries_aladdin", 82 | "count": 1, 83 | "capture_id": "1234", 84 | "annotation_id": None, 85 | "step": 50, 86 | "sequence_id": "2345", 87 | }, 88 | { 89 | "label_id": 2, 90 | "label_name": "candy_minipralines_lindt", 91 | "count": 2, 92 | "capture_id": "1234", 93 | "annotation_id": None, 94 | "step": 50, 95 | "sequence_id": "2345", 96 | }, 97 | ] 98 | flatten_metrics = Metrics._normalize_values(metrics) 99 | for i, metric in enumerate(expected): 100 | for k in metric: 101 | assert metric[k] == flatten_metrics[i][k] 102 | -------------------------------------------------------------------------------- /tests/unity_perception/test_references.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from datasetinsights.datasets.unity_perception import ( 8 | AnnotationDefinitions, 9 | MetricDefinitions, 10 | ) 11 | from datasetinsights.datasets.unity_perception.tables import ( 12 | SCHEMA_VERSION, 13 | glob, 14 | ) 15 | from datasetinsights.datasets.unity_perception.validation import ( 16 | DuplicateRecordError, 17 | NoRecordError, 18 | ) 19 | 20 | 21 | def test_annotation_definitions(mock_data_dir): 22 | definition = AnnotationDefinitions( 23 | str(mock_data_dir), version=SCHEMA_VERSION 24 | ) 25 | 26 | json_file = next(glob(mock_data_dir, AnnotationDefinitions.FILE_PATTERN)) 27 | records = json.load(open(json_file, "r", encoding="utf8"))[ 28 | AnnotationDefinitions.TABLE_NAME 29 | ] 30 | 31 | def_ids = [r["id"] for r in records] 32 | for (i, def_id) in enumerate(def_ids): 33 | record = records[i] 34 | 35 | assert definition.get_definition(def_id) == record 36 | 37 | 38 | def test_annotation_definitions_find_by_name(): 39 | def1 = { 40 | "id": 1, 41 | "name": "good name", 42 | "description": "does not matter", 43 | "format": "JSON", 44 | "spec": [], 45 | } 46 | def2 = { 47 | "id": 2, 48 | "name": "another good name", 49 | "description": "does not matter", 50 | "format": "JSON", 51 | "spec": [], 52 | } 53 | ann_def = { 54 | "version": SCHEMA_VERSION, 55 | "annotation_definitions": [def1, def2], 56 | } 57 | 58 | with tempfile.TemporaryDirectory() as tmp_dir: 59 | with open(Path(tmp_dir) / "annotation_definitions.json", "w") as f: 60 | json.dump(ann_def, f) 61 | definition = AnnotationDefinitions(tmp_dir, version=SCHEMA_VERSION) 62 | 63 | pattern = r"^good\sname$" 64 | assert definition.find_by_name(pattern) == def1 65 | 66 | pattern = "good name" 67 | with pytest.raises(DuplicateRecordError): 68 | definition.find_by_name(pattern) 69 | 70 | pattern = "w;fhohfoewh" 71 | with pytest.raises(NoRecordError): 72 | definition.find_by_name(pattern) 73 | 74 | 75 | def test_metric_definitions(mock_data_dir): 76 | definition = MetricDefinitions(str(mock_data_dir), version=SCHEMA_VERSION) 77 | 78 | json_file = next(glob(mock_data_dir, MetricDefinitions.FILE_PATTERN)) 79 | records = json.load(open(json_file, "r"))[MetricDefinitions.TABLE_NAME] 80 | 81 | def_ids = [r["id"] for r in records] 82 | for (i, def_id) in enumerate(def_ids): 83 | record = records[i] 84 | 85 | assert definition.get_definition(def_id) == record 86 | --------------------------------------------------------------------------------