├── .github ├── manifest.xml └── workflows │ ├── publish-website.yml │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── README.md ├── autogenerate_documentation.py ├── mkdocs.yml ├── structure.py └── templates │ ├── abstract │ ├── loader.md │ ├── messages.md │ ├── processor.md │ └── sequence.md │ ├── backend │ ├── boxes.md │ ├── camera.md │ ├── keypoints.md │ └── quaternion.md │ ├── getting-started │ ├── bounding_boxes.md │ ├── controlmap.md │ ├── image_augmentation.md │ ├── introduction_to_processors.md │ └── object_detection_pipeline.md │ ├── index.md │ ├── installation.md │ ├── models │ ├── classification.md │ ├── detection.md │ ├── keypoint.md │ └── layers.md │ ├── pipelines │ ├── applications.md │ ├── detection.md │ ├── image.md │ ├── keypoints.md │ └── pose.md │ └── processors │ ├── detection.md │ ├── draw.md │ ├── geometric.md │ ├── image.md │ ├── keypoints.md │ ├── pose.md │ ├── renderer.md │ └── standard.md ├── examples ├── action_scores │ ├── callbacks │ │ ├── __init__.py │ │ ├── discretized_actions_scores.py │ │ ├── feature_extractor.py │ │ ├── per_pixel_action_score.py │ │ └── scalar_action_score.py │ ├── datasets │ │ ├── __init__.py │ │ ├── cifar10.py │ │ ├── fashion_mnist.py │ │ ├── fer.py │ │ ├── ferplus.py │ │ ├── imagenet.py │ │ ├── kuzushiji_mnist.py │ │ ├── mnist.py │ │ ├── poisoned_cifar10.py │ │ └── utils.py │ ├── models │ │ ├── __init__.py │ │ ├── autoencoder.py │ │ ├── cnn.py │ │ ├── mlp.py │ │ ├── resnet.py │ │ └── xception.py │ ├── pipelines.py │ └── train_classifier.py ├── discovery_of_latent_keypoints │ ├── README.md │ ├── discover_latent_keypoints.py │ ├── run_experiments.sh │ └── scene.py ├── efficientdet │ ├── README.md │ ├── debugger.py │ ├── demo.py │ ├── demo_video.py │ ├── draw.py │ ├── efficientdet_test.py │ ├── evaluate_mAP.py │ ├── processors.py │ └── train.py ├── efficientpose │ ├── LICENSE │ ├── README.md │ ├── anchors.py │ ├── debugger.py │ ├── demo.py │ ├── linemod.py │ ├── losses.py │ ├── pose.py │ ├── processors.py │ ├── test_efficientpose.py │ └── train.py ├── eigenfaces │ ├── README.md │ ├── database.py │ ├── demo.py │ ├── eigenfaces.py │ ├── pipelines.py │ └── processors.py ├── face_classification │ ├── demo.py │ ├── pipelines.py │ └── train.py ├── fine-tuning_object_detection │ ├── backend.py │ ├── data_manager.py │ ├── demo.py │ ├── pipelines.py │ ├── processors.py │ ├── sequencer.py │ └── train.py ├── haar_cascade_detectors │ └── haar_cascade_detectors.py ├── hand_detection │ ├── README.md │ ├── demo.py │ ├── download_openimagesV6.py │ ├── model.py │ ├── open_images.py │ ├── pose_demo.py │ └── train.py ├── hand_pose_estimation │ ├── README.md │ ├── demo.py │ ├── demo3D.py │ ├── demo_image.py │ ├── hand_tracking.py │ └── is_open_demo.py ├── head_pose_estimation_6D │ └── demo.py ├── human_pose_estimation_2D │ ├── README.md │ ├── demo.py │ └── demo_image.py ├── human_pose_estimation_3D │ ├── demo.py │ ├── demo3D.py │ ├── demo_image.py │ └── viz.py ├── images_synthesis │ ├── README.md │ ├── data_manager.py │ ├── dataset_synthesis.py │ └── utils.py ├── implicit_orientation_learning │ ├── __init__.py │ ├── demo.py │ ├── model.py │ ├── pipelines.py │ ├── processors.py │ ├── run_experiments.sh │ ├── scenes.py │ └── train.py ├── keypoint_estimation │ ├── README.md │ ├── __init__.py │ ├── dataset_downloader.sh │ ├── demo.py │ ├── facial_keypoints.py │ ├── pipelines.py │ └── train.py ├── maml │ ├── README.md │ ├── maml.py │ ├── sinusoid.py │ ├── train_classification.py │ └── train_regression.py ├── mask_rcnn │ ├── README.md │ ├── __init__.py │ ├── backend │ │ ├── boxes.py │ │ └── image.py │ ├── coco_demo.py │ ├── datasets │ │ └── shapes.py │ ├── inference.py │ ├── inference_shapes.py │ ├── losses │ │ ├── proposal_bounding_box_loss.py │ │ └── proposal_class_loss.py │ ├── model │ │ ├── RPN_model.py │ │ ├── layer_utils.py │ │ ├── layers │ │ │ ├── bounding_box_loss.py │ │ │ ├── class_loss.py │ │ │ ├── detection.py │ │ │ ├── detection_target.py │ │ │ ├── feature_pyramid_network.py │ │ │ ├── mask_loss.py │ │ │ ├── proposal.py │ │ │ └── pyramid_ROI_align.py │ │ ├── model.py │ │ └── rpn_model.py │ ├── pipelines │ │ ├── data_generator.py │ │ └── detection.py │ ├── shapes_demo.py │ ├── shapes_train.py │ ├── tests │ │ ├── __init__.py │ │ ├── backend_test.py │ │ ├── inference_test.py │ │ ├── layers_test.py │ │ ├── losses_test.py │ │ └── pipeline_test.py │ ├── train.py │ └── utils.py ├── object_detection │ ├── VOC0712_downloader.sh │ ├── datasets │ │ └── ycb_video.py │ ├── debugger.py │ ├── demo.py │ ├── evaluate.py │ └── train.py ├── pix2pose │ ├── README.md │ ├── __init__.py │ ├── debugger.py │ ├── demo.py │ ├── demo_image.py │ ├── legacy │ │ ├── canonical_scene.py │ │ ├── demo_image.py │ │ ├── icp.py │ │ ├── metrics.py │ │ ├── processors.py │ │ ├── rock.py │ │ ├── test_icp.py │ │ ├── test_rotated_image.py │ │ ├── train_canonical_transform.py │ │ ├── train_gan.py │ │ └── train_symmetric.py │ ├── models │ │ ├── discriminator.py │ │ ├── dope.py │ │ ├── fully_convolutional_net.py │ │ ├── generator.py │ │ └── pix2pose.py │ ├── pipelines.py │ ├── scenes │ │ ├── __init__.py │ │ ├── canonical_pose_pixel_mask_renderer.py │ │ ├── pixel_mask_renderer.py │ │ ├── render_keypoints.py │ │ └── utils.py │ ├── train.py │ └── train_symmetric.py ├── probabilistic_keypoint_estimation │ ├── README.md │ ├── __init__.py │ ├── dataset_downloader.sh │ ├── demo.py │ ├── demo_image.py │ ├── facial_keypoints.py │ ├── model.py │ ├── pipelines.py │ ├── processors.py │ └── train.py ├── prototypical_networks │ ├── README.md │ └── train.py ├── semantic_segmentation │ ├── backend.py │ ├── demo.py │ ├── pipelines.py │ ├── processors.py │ ├── shapes.py │ ├── test_dataset.py │ ├── train.py │ └── train_cityscapes.py ├── spatial_transfomer_networks │ ├── STN.py │ ├── __init__.py │ ├── cluttered_mnist.py │ ├── layers.py │ └── train.py ├── structure_from_motion │ ├── backend.py │ ├── bundle_adjustment.py │ ├── demo.py │ ├── pipeline_cv2.py │ ├── pipeline_np.py │ ├── processors.py │ └── test.py ├── tutorials │ ├── bounding_boxes.py │ ├── controlmap_processor.py │ ├── image_augmentation.py │ └── object_detection_pipeline.py └── visual_voice_activity_detection │ ├── generator.py │ ├── live_demo.py │ ├── recorded_demo.py │ └── vvad_lrs3_dataset.py ├── paz ├── __init__.py ├── abstract │ ├── __init__.py │ ├── loader.py │ ├── messages.py │ ├── processor.py │ └── sequence.py ├── applications.py ├── backend │ ├── __init__.py │ ├── anchors.py │ ├── angles.py │ ├── boxes.py │ ├── camera.py │ ├── groups │ │ ├── SE3.py │ │ ├── SO3.py │ │ ├── __init__.py │ │ └── quaternion.py │ ├── heatmaps.py │ ├── image │ │ ├── __init__.py │ │ ├── draw.py │ │ ├── image.py │ │ ├── opencv_image.py │ │ └── tensorflow_image.py │ ├── keypoints.py │ ├── mask.py │ ├── munkres.py │ ├── poses.py │ ├── render.py │ └── standard.py ├── datasets │ ├── CMU_poanoptic.py │ ├── __init__.py │ ├── cityscapes.py │ ├── coco.py │ ├── fat.py │ ├── fer.py │ ├── ferplus.py │ ├── human36m.py │ ├── omniglot.py │ ├── open_images.py │ ├── shapes.py │ ├── utils.py │ └── voc.py ├── evaluation │ ├── __init__.py │ ├── detection.py │ └── pose.py ├── models │ ├── __init__.py │ ├── classification │ │ ├── __init__.py │ │ ├── cnn2Plus1.py │ │ ├── protonet.py │ │ ├── vvad_lrs3.py │ │ └── xception.py │ ├── detection │ │ ├── __init__.py │ │ ├── efficientdet │ │ │ ├── __init__.py │ │ │ ├── efficientdet.py │ │ │ ├── efficientdet_blocks.py │ │ │ ├── efficientnet.py │ │ │ └── layers.py │ │ ├── haar_cascade.py │ │ ├── ssd300.py │ │ ├── ssd512.py │ │ └── utils.py │ ├── keypoint │ │ ├── __init__.py │ │ ├── detnet.py │ │ ├── hrnet.py │ │ ├── iknet.py │ │ ├── keypointnet.py │ │ ├── projector.py │ │ └── simplebaselines.py │ ├── layers.py │ ├── pose_estimation │ │ ├── __init__.py │ │ ├── efficientpose │ │ │ ├── __init__.py │ │ │ ├── efficientpose.py │ │ │ └── efficientpose_blocks.py │ │ └── higher_hrnet.py │ └── segmentation │ │ ├── __init__.py │ │ └── unet.py ├── optimization │ ├── __init__.py │ ├── callbacks.py │ └── losses │ │ ├── __init__.py │ │ ├── keypointnet_loss.py │ │ ├── multi_box_loss.py │ │ └── segmentation │ │ ├── __init__.py │ │ ├── dice_loss.py │ │ ├── focal_loss.py │ │ ├── jaccard_loss.py │ │ └── weighted_reconstruction.py ├── pipelines │ ├── __init__.py │ ├── angles.py │ ├── classification.py │ ├── detection.py │ ├── heatmaps.py │ ├── image.py │ ├── keypoints.py │ ├── masks.py │ ├── pose.py │ └── renderer.py ├── processors │ ├── __init__.py │ ├── angles.py │ ├── detection.py │ ├── draw.py │ ├── geometric.py │ ├── groups.py │ ├── heatmaps.py │ ├── image.py │ ├── keypoints.py │ ├── munkres.py │ ├── pose.py │ ├── renderer.py │ └── standard.py └── utils │ ├── __init__.py │ ├── documentation.py │ └── logger.py ├── setup.py └── tests ├── examples └── pipelines.py └── paz ├── abstract ├── __init__.py ├── messages_test.py ├── processor_test.py └── sequence_test.py ├── backend ├── __init__.py ├── boxes_test.py ├── groups │ ├── test_groups.py │ └── test_quaternion.py ├── heatmaps_test.py ├── image │ ├── draw.py │ └── opencv_image_test.py ├── image_test.py ├── keypoints_backend_test.py ├── keypoints_test.py ├── munkres_test.py ├── render_test.py └── standard_test.py ├── models ├── __init__.py ├── classification │ └── protonet_test.py ├── detection │ ├── efficientdet │ │ └── efficientdet_test.py │ └── ssd300_test.py ├── pose_estimation │ └── efficientpose │ │ └── efficientpose_test.py └── segmentation │ └── unet_test.py ├── optimization ├── __init__.py └── losses │ ├── loss_test.py │ ├── segmentation_losses_test.py │ └── weighted_reconstruction_test.py ├── pipelines ├── __init__.py ├── classification_test.py ├── detection_test.py ├── human_pose_2d_test.py ├── human_pose_3d_tests.py ├── keypoints_test.py ├── minimal_hand_test.py └── pose_test.py ├── processors ├── __init__.py ├── draw_test.py ├── geometric_test.py ├── image_test.py ├── pose_test.py ├── processors_test.py └── standard_test.py └── utils └── documentation_test.py /.github/manifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | PAZ is a hierarchical perception library in Python containing 7 | examples for object detection, pose estimation, instance segmentation, 8 | keypoint estimation, emotion recognition, keypoint discovery 9 | and attention. 10 | 11 | Octavio Arriaga/octavio.arriaga@dfki.de 12 | Octavio Arriaga/octavio.arriaga@dfki.de 13 | MIT 14 | https://github.com/oarriaga/paz 15 | 16 | perception 17 | learning 18 | object detection 19 | pose estimation 20 | keypoint estimation 21 | instance segmentation 22 | emotion recognition 23 | python 24 | tensorflow 25 | openCV 26 | 27 | active 28 | 2 29 | 30 | -------------------------------------------------------------------------------- /.github/workflows/publish-website.yml: -------------------------------------------------------------------------------- 1 | name: Publish Website 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | with: 13 | python-version: 3.8 14 | - run: python -m pip install --upgrade pip 15 | - run: pip install mkdocs-material 16 | - run: pip install . --user 17 | - run: python docs/autogenerate_documentation.py 18 | - run: | 19 | cd docs/ 20 | mkdocs gh-deploy --force 21 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Unit testing 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: ["3.7", "3.8", "3.9", "3.10"] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install flake8 pytest 30 | pip install . --user 31 | - name: Lint with flake8 32 | run: | 33 | # stop the build if there are Python syntax errors or undefined names 34 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 35 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 36 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 37 | - name: Test with pytest 38 | run: | 39 | pytest tests/ 40 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: PyPI package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: ${{ secrets.PYPI_USERNAME }} 39 | password: ${{ secrets.PYPI_PASSWORD }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.hdf5 3 | *.h5 4 | *.log 5 | *.pkl 6 | *.json 7 | *.pdf 8 | *.txt 9 | *.png 10 | *.jpg 11 | *.xml 12 | *.avi 13 | *.mp4 14 | *.npz 15 | *.csv 16 | *.ipynb_checkpoints 17 | *.ipynb 18 | *tf.data* 19 | *tf.index* 20 | checkpoint 21 | *.coverage 22 | *.npy 23 | *.p 24 | *.zip 25 | *.iml 26 | *.jpeg 27 | *.tgz 28 | *.yml 29 | *.ply 30 | 31 | !.github/manifest.xml 32 | 33 | dist/* 34 | build/* 35 | pypaz.egg-info/ 36 | .pytest_cache/ 37 | 38 | docs/site/* 39 | docs/sources/* 40 | docs/theme/fonts/* 41 | docs/theme/img/* 42 | 43 | install.sh 44 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Pull requests 2 | 3 | 1. Always use full english names for variable names 4 | - Only the following exceptions are allowed: 5 | - "number" should be "num" 6 | - "argument" should be "arg" 7 | - "width" can be "W" if the context is clear. 8 | - "height" can be "H" if the context is clear. 9 | 10 | 2. Functions should be small, approximately ~6 lines: 11 | - Functions should only do one thing. 12 | - Certain aspects of the code base don't reflect this but we are working on changing this. 13 | 14 | 3. Use PEP8 syntax conventions: 15 | - This can be easily achieved when you install a linter e.g. [flake8](https://flake8.pycqa.org/en/latest/) 16 | 17 | 4. If new functionality is added please include unit-tests for it. 18 | 19 | 5. Please make sure that all unit-tests are passing before your make your PR. 20 | 21 | 6. Commits should try to have the following structure: 22 | - Commits are titles: 23 | - Start with a capital letter 24 | - Don't end the commit with a period 25 | - Commits should be written to answer: 26 | If applied, this commit will 27 | A good commit would then look like: 28 | "Remove deprecated backend function" 29 | - Find more information about how to write good commits [here](https://chris.beams.io/posts/git-commit/). 30 | 31 | 7. Provide documentation of new features: 32 | - Use the documentation syntax of the repository 33 | - If new functionality is added please add your function to paz/docs/structure.py 34 | 35 | 36 | 8. After looking into the points here discussed, please submit your PR such that we can start a discussion about it and check that all tests are passing. 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Octavio Arriaga (employed at the University of Bremen) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # PAZ documentation 2 | 3 | Documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). 4 | 5 | ## Building the documentation 6 | 7 | - Install MkDocs: `pip install mkdocs mkdocs-material ` 8 | - `pip install -e .` to make sure that Python will import your modified version of PAZ. 9 | - From the root directory, `cd` into the `docs/` folder and run: 10 | - `python autogenerate_documentation.py` 11 | - `mkdocs serve` # Starts a local webserver: [localhost:8000](http://localhost:8000) 12 | - `mkdocs build` # Builds a static site in `site/` directory 13 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: 'PAZ - Documentation' 2 | 3 | theme: 4 | name: 'material' 5 | palette: 6 | primary: orange 7 | accent: indigo 8 | 9 | markdown_extensions: 10 | - codehilite 11 | 12 | 13 | docs_dir: sources 14 | site_description: 'Modular Robot Perception library in Python.' 15 | repo_name: 'oarriaga/paz' 16 | repo_url: 'https://github.com/oarriaga/paz' 17 | 18 | 19 | nav: 20 | - Getting-started: index.md 21 | - Installation: installation.md 22 | - Contributing: contributing.md 23 | - Tutorials: 24 | - Intro to processors: getting-started/introduction_to_processors.md 25 | - Image augmentation: getting-started/image_augmentation.md 26 | - Bounding boxes: getting-started/bounding_boxes.md 27 | - Control-map: getting-started/controlmap.md 28 | - Object detection pipeline: getting-started/object_detection_pipeline.md 29 | - Models: 30 | - Detection: models/detection.md 31 | - Keypoints: models/keypoint.md 32 | - Classification: models/classification.md 33 | - Layers: models/layers.md 34 | - Pipelines (high-level): 35 | - Applications: pipelines/applications.md 36 | - Image: pipelines/image.md 37 | - Detection: pipelines/detection.md 38 | - Keypoints: pipelines/keypoints.md 39 | - Pose: pipelines/pose.md 40 | - Renderer: pipelines/renderer.md 41 | - Processors (mid-level): 42 | - Image: processors/image.md 43 | - Draw: processors/draw.md 44 | - Detection: processors/detection.md 45 | - Geometric: processors/geometric.md 46 | - Keypoints: processors/keypoints.md 47 | - Standard: processors/standard.md 48 | - Pose: processors/pose.md 49 | - Renderer: processors/renderer.md 50 | - Backends (low-level): 51 | - Image: backend/image.md 52 | - Draw: backend/draw.md 53 | - Boxes: backend/boxes.md 54 | - Keypoints: backend/keypoints.md 55 | - Quaternions: backend/quaternion.md 56 | - Camera: backend/camera.md 57 | - Render: backend/render.md 58 | - Abstract (core): 59 | - Messages: abstract/messages.md 60 | - Processor: abstract/processor.md 61 | - Sequence: abstract/sequence.md 62 | - Loader: abstract/loader.md 63 | - Additional functionality: 64 | - Datasets: datasets.md 65 | - Losses: optimization/losses.md 66 | - Callbacks: optimization/callbacks.md 67 | -------------------------------------------------------------------------------- /docs/templates/abstract/loader.md: -------------------------------------------------------------------------------- 1 | {{autogenerated}} 2 | -------------------------------------------------------------------------------- /docs/templates/abstract/messages.md: -------------------------------------------------------------------------------- 1 | {{autogenerated}} 2 | -------------------------------------------------------------------------------- /docs/templates/abstract/processor.md: -------------------------------------------------------------------------------- 1 | {{autogenerated}} 2 | -------------------------------------------------------------------------------- /docs/templates/abstract/sequence.md: -------------------------------------------------------------------------------- 1 | {{autogenerated}} 2 | -------------------------------------------------------------------------------- /docs/templates/backend/boxes.md: -------------------------------------------------------------------------------- 1 | Backend functionality for 2D bounding boxes 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/backend/camera.md: -------------------------------------------------------------------------------- 1 | Backend functionality for cameras 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/backend/keypoints.md: -------------------------------------------------------------------------------- 1 | Backend functionality for 2D keypoints 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/backend/quaternion.md: -------------------------------------------------------------------------------- 1 | Backend functionality for quaternions 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/installation.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | PAZ has only **three** dependencies: [Tensorflow2.0](https://www.tensorflow.org/), [OpenCV](https://opencv.org/) and [NumPy](https://numpy.org/). 3 | 4 | To install PAZ with pypi run: 5 | 6 | `pip install pypaz --user` 7 | -------------------------------------------------------------------------------- /docs/templates/models/classification.md: -------------------------------------------------------------------------------- 1 | Models for object classification 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/models/detection.md: -------------------------------------------------------------------------------- 1 | Models for 2D object detection 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/models/keypoint.md: -------------------------------------------------------------------------------- 1 | Models for 2D keypoint estimation 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/models/layers.md: -------------------------------------------------------------------------------- 1 | Custom layers used in our models 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/pipelines/applications.md: -------------------------------------------------------------------------------- 1 | Out-of-the-box high-level pipelines for inference. 2 | All of these pipelines can be imported too from: ``paz.pipelines`` 3 | 4 | {{autogenerated}} 5 | -------------------------------------------------------------------------------- /docs/templates/pipelines/detection.md: -------------------------------------------------------------------------------- 1 | Built-in pipelines for preprocessing, agumentating and predicting. 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/pipelines/image.md: -------------------------------------------------------------------------------- 1 | Built-in pipelines for preprocessing, agumentating and predicting. 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/pipelines/keypoints.md: -------------------------------------------------------------------------------- 1 | Built-in pipelines for preprocessing, agumentating and predicting. 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/pipelines/pose.md: -------------------------------------------------------------------------------- 1 | Built-in pipelines for preprocessing, agumentating and predicting. 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/detection.md: -------------------------------------------------------------------------------- 1 | Processors for object detection 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/draw.md: -------------------------------------------------------------------------------- 1 | Processors for drawing 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/geometric.md: -------------------------------------------------------------------------------- 1 | Processors for geometric image transformations 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/image.md: -------------------------------------------------------------------------------- 1 | Processors for image transformations 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/keypoints.md: -------------------------------------------------------------------------------- 1 | Processors for keypoints 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/pose.md: -------------------------------------------------------------------------------- 1 | Processors for pose estimation 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/renderer.md: -------------------------------------------------------------------------------- 1 | Processors used for rendering 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /docs/templates/processors/standard.md: -------------------------------------------------------------------------------- 1 | Standard processors 2 | 3 | {{autogenerated}} 4 | -------------------------------------------------------------------------------- /examples/action_scores/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .discretized_actions_scores import DiscretizedActionScores 2 | from .per_pixel_action_score import PerPixelActionScore 3 | from .scalar_action_score import ScalarActionScore 4 | from .scalar_action_score import DataScalarActionScore 5 | from .feature_extractor import FeatureExtractor 6 | -------------------------------------------------------------------------------- /examples/action_scores/callbacks/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | 5 | from tensorflow.keras.callbacks import Callback 6 | from tensorflow.keras.backend import function 7 | from tensorflow.keras.utils import Progbar 8 | 9 | 10 | class FeatureExtractor(Callback): 11 | def __init__(self, layer_name, sequencer, filepath): 12 | self.layer_name = layer_name 13 | self.sequencer = sequencer 14 | self.filepath = filepath 15 | 16 | @property 17 | def batch_size(self): 18 | return self.sequencer.batch_size 19 | 20 | @property 21 | def num_samples(self): 22 | return len(self.sequencer) * self.sequencer.batch_size 23 | 24 | def on_train_end(self, logs): 25 | print('Extracting features from layer:', self.layer_name) 26 | output_tensor = self.model.get_layer(self.layer_name).output 27 | feature_extractor = function(self.model.input, output_tensor) 28 | num_features = output_tensor.shape.as_list()[-1] 29 | 30 | directory_name = os.path.dirname(self.filepath) 31 | if not os.path.exists(directory_name): 32 | os.makedirs(directory_name) 33 | 34 | self.write_file = h5py.File(self.filepath, 'w') 35 | self.features = self.write_file.create_dataset( 36 | 'features', (self.num_samples, num_features)) 37 | 38 | progress_bar = Progbar(len(self.sequencer)) 39 | for batch_index in range(len(self.sequencer)): 40 | inputs = self.sequencer.__getitem__(batch_index)[0] 41 | batch_arg_A = self.batch_size * (batch_index) 42 | batch_arg_B = self.batch_size * (batch_index + 1) 43 | features = feature_extractor(inputs) 44 | features = np.squeeze(features) 45 | self.features[batch_arg_A:batch_arg_B, :] = features 46 | progress_bar.update(batch_index + 1) 47 | self.write_file.close() 48 | -------------------------------------------------------------------------------- /examples/action_scores/callbacks/per_pixel_action_score.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from tensorflow.keras.callbacks import Callback 4 | from tensorflow.keras.utils import Progbar 5 | 6 | 7 | class PerPixelActionScore(Callback): 8 | def __init__(self, sequencer, topic, evaluator, shape, epochs, filepath): 9 | super(Callback, self).__init__() 10 | self.sequencer = sequencer 11 | self.topic = topic 12 | self.evaluator = evaluator 13 | self.epochs = epochs 14 | H, W = self.shape = shape 15 | self.filepath = filepath 16 | self._build_directory(filepath) 17 | 18 | self.write_file = h5py.File(filepath, 'w') 19 | self.action_scores = self.write_file.create_dataset( 20 | 'action_scores', (self.epochs, self.num_samples, H, W)) 21 | 22 | def _build_directory(self, filepath): 23 | directory_name = os.path.dirname(filepath) 24 | if not os.path.exists(directory_name): 25 | os.makedirs(directory_name) 26 | 27 | @property 28 | def num_samples(self): 29 | return len(self.sequencer) * self.sequencer.batch_size 30 | 31 | @property 32 | def batch_size(self): 33 | return self.sequencer.batch_size 34 | 35 | def on_epoch_end(self, epoch, logs=None): 36 | print('\n Computing per-pixel evaluations for epoch', epoch) 37 | progress_bar = Progbar(len(self.sequencer)) 38 | for batch_index in range(len(self.sequencer)): 39 | inputs, labels = self.sequencer.__getitem__(batch_index) 40 | batch_arg_A = self.batch_size * (batch_index) 41 | batch_arg_B = self.batch_size * (batch_index + 1) 42 | y_true = labels[self.topic] 43 | y_pred = self.model(inputs) 44 | score = self.evaluator(y_true, y_pred) 45 | self.action_scores[epoch, batch_arg_A:batch_arg_B, :, :] = score 46 | progress_bar.update(batch_index + 1) 47 | self.action_scores.flush() 48 | 49 | def on_train_end(self, logs=None): 50 | print('\n Closing writing file in ', self.filepath) 51 | self.write_file.close() 52 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .mnist import MNIST 2 | from .cifar10 import CIFAR10 3 | from .kuzushiji_mnist import KuzushijiMNIST 4 | from .fashion_mnist import FashionMNIST 5 | from .imagenet import ImageNet64 6 | from .fer import FER 7 | from .ferplus import FERPlus 8 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.datasets import cifar10 2 | from tensorflow import keras 3 | from paz.abstract import Loader 4 | 5 | from .utils import get_class_names 6 | 7 | 8 | class CIFAR10(Loader): 9 | def __init__(self, split='train', class_names='all', image_size=(32, 32)): 10 | if class_names == 'all': 11 | class_names = get_class_names('CIFAR10') 12 | super(CIFAR10, self).__init__(None, split, class_names, 'CIFAR10') 13 | self.image_size = image_size 14 | self.split_to_arg = {'train': 0, 'test': 1} 15 | 16 | def load_data(self): 17 | images, labels = cifar10.load_data()[self.split_to_arg[self.split]] 18 | images = images.reshape( 19 | len(images), self.image_size[0], self.image_size[1], 3) 20 | labels = keras.utils.to_categorical(labels, self.num_classes) 21 | data = [] 22 | for image, label in zip(images, labels): 23 | sample = {'image': image, 'label': label} 24 | data.append(sample) 25 | return data 26 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/fashion_mnist.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.datasets import fashion_mnist 2 | from tensorflow import keras 3 | from paz.abstract import Loader 4 | 5 | from .utils import get_class_names 6 | 7 | 8 | class FashionMNIST(Loader): 9 | def __init__(self, split='train', class_names='all', image_size=(28, 28)): 10 | if class_names == 'all': 11 | class_names = get_class_names('FashionMNIST') 12 | super(FashionMNIST, self).__init__( 13 | None, split, class_names, 'FashionMNIST') 14 | self.image_size = image_size 15 | self.split_to_arg = {'train': 0, 'test': 1} 16 | 17 | def load_data(self): 18 | split_arg = self.split_to_arg[self.split] 19 | images, labels = fashion_mnist.load_data()[split_arg] 20 | images = images.reshape( 21 | len(images), self.image_size[0], self.image_size[1]) 22 | labels = keras.utils.to_categorical(labels, self.num_classes) 23 | data = [] 24 | for image, label in zip(images, labels): 25 | sample = {'image': image, 'label': label} 26 | data.append(sample) 27 | return data 28 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/fer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from tensorflow.keras.utils import to_categorical 4 | from paz.abstract import Loader 5 | from paz.backend.image import resize_image 6 | 7 | from .utils import get_class_names 8 | 9 | 10 | class FER(Loader): 11 | """Class for loading FER2013 emotion classification dataset. 12 | # Arguments 13 | path: String. Path to directory that has inside the files: 14 | `fer2013.csv` 15 | split: String. Valid option contain 'train', 'val' or 'test'. 16 | class_names: String or list: If 'all' then it loads all default 17 | class names. 18 | image_size: List of length two. Indicates the shape in which 19 | the image will be resized. 20 | 21 | # References 22 | -[FER2013 Dataset and Challenge](kaggle.com/c/challenges-in-\ 23 | representation-learning-facial-expression-recognition-challenge) 24 | """ 25 | 26 | def __init__( 27 | self, path, split='train', class_names='all', image_size=(48, 48)): 28 | 29 | if class_names == 'all': 30 | class_names = get_class_names('FER') 31 | 32 | path = os.path.join(path, 'fer2013.csv') 33 | super(FER, self).__init__(path, split, class_names, 'FER') 34 | self.image_size = image_size 35 | self._split_to_filter = {'train': 'Training', 'val': 'PublicTest', 36 | 'test': 'PrivateTest'} 37 | 38 | def load_data(self): 39 | data = np.genfromtxt(self.path, str, delimiter=',', skip_header=1) 40 | data = data[data[:, -1] == self._split_to_filter[self.split]] 41 | faces = np.zeros((len(data), *self.image_size, 1)) 42 | for sample_arg, sample in enumerate(data): 43 | face = np.array(sample[1].split(' '), dtype=int).reshape(48, 48) 44 | face = resize_image(face, self.image_size) 45 | faces[sample_arg, :, :, 0] = face 46 | emotions = to_categorical(data[:, 0].astype(int), self.num_classes) 47 | 48 | data = [] 49 | for face, emotion in zip(faces, emotions): 50 | sample = {'image': face, 'label': emotion} 51 | data.append(sample) 52 | return data 53 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/kuzushiji_mnist.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.utils import get_file 2 | from tensorflow import keras 3 | from paz.abstract import Loader 4 | import numpy as np 5 | import os 6 | 7 | from .utils import get_class_names 8 | 9 | 10 | class KuzushijiMNIST(Loader): 11 | def __init__(self, split='train', class_names='all', image_size=(28, 28)): 12 | if class_names == 'all': 13 | class_names = get_class_names('MNIST') 14 | super(KuzushijiMNIST, self).__init__( 15 | None, split, class_names, 'KuzushijiMNIST') 16 | self.image_size = image_size 17 | self.root_origin = 'http://codh.rois.ac.jp/kmnist/dataset/kmnist/' 18 | 19 | def load_data(self): 20 | data_parts = [] 21 | for data_part in ['imgs', 'labels']: 22 | name = '-'.join(['kmnist', self.split, data_part + '.npz']) 23 | origin = os.path.join(self.root_origin, name) 24 | path = get_file(name, origin, cache_subdir='paz/datasets') 25 | with np.load(path, allow_pickle=True) as array: 26 | data_parts.append(array['arr_0']) 27 | images, labels = data_parts 28 | images = images.reshape(len(images), *self.image_size) 29 | labels = keras.utils.to_categorical(labels, self.num_classes) 30 | data = [] 31 | for image, label in zip(images, labels): 32 | sample = {'image': image, 'label': label} 33 | data.append(sample) 34 | return data 35 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.datasets import mnist 2 | from tensorflow import keras 3 | from paz.abstract import Loader 4 | 5 | from .utils import get_class_names 6 | 7 | 8 | class MNIST(Loader): 9 | def __init__(self, split='train', class_names='all', image_size=(28, 28)): 10 | if class_names == 'all': 11 | class_names = get_class_names('MNIST') 12 | super(MNIST, self).__init__(None, split, class_names, 'MNIST') 13 | self.image_size = image_size 14 | self.split_to_arg = {'train': 0, 'test': 1} 15 | 16 | def load_data(self): 17 | images, labels = mnist.load_data()[self.split_to_arg[self.split]] 18 | images = images.reshape( 19 | len(images), self.image_size[0], self.image_size[1]) 20 | labels = keras.utils.to_categorical(labels, self.num_classes) 21 | data = [] 22 | for image, label in zip(images, labels): 23 | sample = {'image': image, 'label': label} 24 | data.append(sample) 25 | return data 26 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/poisoned_cifar10.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.datasets import cifar10 2 | from tensorflow import keras 3 | from paz.abstract import Loader 4 | 5 | from .utils import get_class_names 6 | 7 | 8 | class PoisonedCIFAR10(Loader): 9 | def __init__(self, split='train', class_names='all', image_size=(32, 32), 10 | poison_percentage=0.2, seed=777): 11 | super(PoisonedCIFAR10, self).__init__( 12 | None, split, class_names, 'PoisonedCIFAR10') 13 | 14 | if class_names == 'all': 15 | class_names = get_class_names('CIFAR10') 16 | self.image_size = image_size 17 | self.split_to_arg = {'train': 0, 'test': 1} 18 | 19 | def load_data(self): 20 | images, labels = cifar10.load_data()[self.split_to_arg[self.split]] 21 | images = images.reshape( 22 | len(images), self.image_size[0], self.image_size[1], 3) 23 | labels = keras.utils.to_categorical(labels, self.num_classes) 24 | data = [] 25 | for image, label in zip(images, labels): 26 | sample = {'image': image, 'label': label} 27 | data.append(sample) 28 | return data 29 | -------------------------------------------------------------------------------- /examples/action_scores/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_class_names(dataset_name='MNIST'): 5 | if dataset_name == 'MNIST': 6 | return ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] 7 | if dataset_name == 'FashionMNIST': 8 | return ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 9 | 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] 10 | if dataset_name == 'CIFAR10': 11 | return ['airplane', 'automobile', 'bird', 'cat', 'deer', 12 | 'dog', 'frog', 'horse', 'ship', 'truck'] 13 | if dataset_name == 'KuzushijiMNIST': 14 | return ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] 15 | if dataset_name == 'FER': 16 | return ['angry', 'disgust', 'fear', 'happy', 17 | 'sad', 'surprise', 'neutral'] 18 | else: 19 | raise ValueError('Invalid dataset name') 20 | 21 | 22 | def poison_labels(labels, num_classes, percent=0.1): 23 | num_samples = len(labels) 24 | num_poisons = int(num_samples * percent) 25 | selected_args = np.random.choice(num_samples, num_poisons, replace=False) 26 | poisoned_labels = labels.copy() 27 | for arg in selected_args: 28 | poisoned_labels[arg] = poison_label(poisoned_labels[arg], num_classes) 29 | return poisoned_labels 30 | 31 | 32 | def poison_label(label, num_classes): 33 | valid_class_args = list(range(num_classes)) 34 | valid_class_args.remove(label) 35 | poison = np.random.choice(valid_class_args, 1)[0] 36 | return poison 37 | 38 | 39 | def test_compute_poison(num_classes=10, num_samples=100): 40 | labels = np.random.randint(0, num_classes, num_samples) 41 | for label_arg, label in enumerate(labels): 42 | poison = poison_label(label, num_classes) 43 | assert poison != label 44 | 45 | 46 | def test_num_poisoned_labels(num_classes=10, num_samples=100, percent=0.1): 47 | labels = np.random.randint(0, num_classes, num_samples) 48 | poisoned_labels = poison_labels(labels, num_classes, percent) 49 | num_poisons = np.sum(labels != poisoned_labels) 50 | assert num_poisons == int(percent * num_samples) 51 | 52 | 53 | test_compute_poison() 54 | test_num_poisoned_labels() 55 | -------------------------------------------------------------------------------- /examples/action_scores/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .cnn import CNN_KERAS_A 2 | from .cnn import CNN_KERAS_B 3 | from .xception import XCEPTION_MINI 4 | from .resnet import RESNET_V2 5 | from .autoencoder import CNN_AUTOENCODER 6 | from .mlp import MLP_A 7 | -------------------------------------------------------------------------------- /examples/action_scores/models/cnn.py: -------------------------------------------------------------------------------- 1 | # code extracted and modified from keras/examples/ 2 | 3 | from tensorflow.keras.models import Model 4 | from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D 5 | from tensorflow.keras.layers import Dropout, Flatten, Input, Activation 6 | 7 | 8 | def CNN_KERAS_A(input_shape, num_classes): 9 | inputs = Input(input_shape, name='image') 10 | x = Conv2D(32, (3, 3), activation='relu')(inputs) 11 | x = Conv2D(64, (3, 3), activation='relu')(x) 12 | x = MaxPooling2D(pool_size=(2, 2))(x) 13 | x = Dropout(0.25)(x) 14 | x = Flatten()(x) 15 | x = Dense(128, activation='relu')(x) 16 | x = Dropout(0.5)(x) 17 | outputs = Dense(num_classes, activation='softmax', name='label')(x) 18 | model = Model(inputs, outputs, name='CNN-KERAS-A') 19 | return model 20 | 21 | 22 | def CNN_KERAS_B(input_shape, num_classes): 23 | inputs = Input(input_shape, name='image') 24 | x = Conv2D(32, (3, 3), padding='same')(inputs) 25 | x = Activation('relu')(x) 26 | x = Conv2D(32, (3, 3))(x) 27 | x = Activation('relu')(x) 28 | x = MaxPooling2D(pool_size=(2, 2))(x) 29 | x = Dropout(0.25)(x) 30 | 31 | x = Conv2D(64, (3, 3), padding='same')(inputs) 32 | x = Activation('relu')(x) 33 | x = Conv2D(64, (3, 3))(x) 34 | x = Activation('relu')(x) 35 | x = MaxPooling2D(pool_size=(2, 2))(x) 36 | x = Dropout(0.25)(x) 37 | 38 | x = Flatten()(x) 39 | x = Dense(512)(x) 40 | x = Activation('relu')(x) 41 | x = Dropout(0.5)(x) 42 | x = Dense(num_classes)(x) 43 | outputs = Activation('softmax', name='label')(x) 44 | model = Model(inputs, outputs, name='CNN-KERAS-B') 45 | return model 46 | -------------------------------------------------------------------------------- /examples/action_scores/models/mlp.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.models import Sequential 2 | from tensorflow.keras.layers import Dense 3 | 4 | 5 | def MLP_A(num_classes=1, hidden_neurons=128): 6 | model = Sequential( 7 | [Dense(hidden_neurons, activation='relu', input_shape=(2,)), 8 | Dense(1, activation='sigmoid')], name='MLP' + str(hidden_neurons)) 9 | return model 10 | -------------------------------------------------------------------------------- /examples/action_scores/models/xception.py: -------------------------------------------------------------------------------- 1 | from paz.models.classification.xception import build_xception 2 | 3 | 4 | def XCEPTION_MINI(input_shape, num_classes): 5 | stem_kernels, block_data = [32, 64], [128, 128, 256, 256, 512, 512, 1024] 6 | model_inputs = (input_shape, num_classes, stem_kernels, block_data) 7 | model = build_xception(*model_inputs) 8 | model._name = 'XCEPTION-MINI' 9 | return model 10 | -------------------------------------------------------------------------------- /examples/action_scores/pipelines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz import processors as pr 3 | 4 | 5 | class OneHotVector(pr.Processor): 6 | def __init__(self, num_classes): 7 | self.num_classes = num_classes 8 | super(OneHotVector, self).__init__() 9 | 10 | def call(self, label): 11 | one_hot_vector = np.zeros(self.num_classes) 12 | one_hot_vector[label] = 1.0 13 | return one_hot_vector 14 | 15 | 16 | class ProcessImage(pr.SequentialProcessor): 17 | def __init__(self, size, num_classes, grayscale=True, hot_vector=False): 18 | super(ProcessImage, self).__init__() 19 | preprocess = pr.SequentialProcessor() 20 | preprocess.add(pr.ResizeImage((size, size))) 21 | preprocess.add(pr.CastImage(float)) 22 | if grayscale: 23 | preprocess.add(pr.ExpandDims(axis=-1)) 24 | preprocess.add(pr.NormalizeImage()) 25 | 26 | self.add(pr.UnpackDictionary(['image', 'label'])) 27 | self.add(pr.ControlMap(preprocess)) 28 | if hot_vector: 29 | self.add(pr.ControlMap(OneHotVector(num_classes), [1], [1])) 30 | num_channels = 1 if grayscale else 3 31 | self.add(pr.SequenceWrapper( 32 | {0: {'image': [size, size, num_channels]}}, 33 | {1: {'label': [num_classes]}})) 34 | -------------------------------------------------------------------------------- /examples/discovery_of_latent_keypoints/README.md: -------------------------------------------------------------------------------- 1 | This example requires to install two additional libraries for rendering 2 | ``` 3 | pip install pyrender --user 4 | pip install trimesh --user 5 | ``` 6 | If you are running the script on a remote machine via SSH, then might run 7 | into the following error: 8 | 9 | ``` 10 | ValueError: Failed to initialize Pyglet window with an OpenGL >= 3+ context. If you're logged in via SSH, ensure that you're running your script with vglrun (i.e. VirtualGL). The internal error message was "" 11 | ``` 12 | To fix this, you need to run the PyOpenGL in a headless configuration which is not enabled by default. Just uncomment the following line(4) in discover_latent_keypoints.py 13 | ``` 14 | # os.environ["PYOPENGL_PLATFORM"] = 'egl' 15 | ``` 16 | This will use the GPU accelerated rending on your remote machine. To use CPU-accelerated rendering, your need to use 17 | OSMesa instead of EGL. However, this is not tested yet. 18 | ``` 19 | # os.environ["PYOPENGL_PLATFORM"] = 'osmesa' 20 | ``` 21 | 22 | If you run into the following error: 23 | ``` 24 | tensorflow.python.framework.errors_impl.UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D] 25 | ``` 26 | Either you are short on memory (reduce batch size) or uncomment the following line(5) in discover_latent_keypoints.py 27 | 28 | ``` 29 | # os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 30 | ``` -------------------------------------------------------------------------------- /examples/discovery_of_latent_keypoints/run_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # class_name="obj_000001" 4 | # obj_path="$HOME/.keras/paz/datasets/tless/models_cad/$class_name.ply" 5 | class_name="035_power_drill_test" 6 | obj_path="/home/robot/datasets/3d_models/klt.obj" 7 | python3 discover_latent_keypoints.py --filepath $obj_path --class_name $class_name --depth 0.30 --num_keypoints 8 --batch_size 10 --smooth False --rotation_noise 0.0 --loss_weights 1.0 1.0 10.0 0.2 0.5 8 | # python3 discover_latent_keypoints.py --obj_path $obj_path --class_name $class_name -d 80 -kp 10 9 | # python3 discover_latent_keypoints.py --obj_path $obj_path --class_name $class_name -d 80 -kp 15 10 | # python3 discover_latent_keypoints.py --obj_path $obj_path --class_name $class_name -d 80 -kp 20 11 | # python3 discover_latent_keypoints.py --obj_path $obj_path --class_name $class_name -d 80 -kp 30 12 | -------------------------------------------------------------------------------- /examples/efficientdet/README.md: -------------------------------------------------------------------------------- 1 | ### This example shows the following: 2 | ####   1. How to use EfficientDet models out of the box for object detection. (explained in [section-1](#1-using-coco-pre-trained-efficientdet-models)) 3 | ####   2. How to train or fine-tune an EfficientDet model on a custom dataset. (explained in [section-2](#2-training-or-fine-tuning-efficientdet-models-on-custom-dataset)) 4 | --- 5 | 6 | ### 1. Using COCO pre-trained EfficientDet models 7 | * `paz` contains Efficientdet models EfficientDetD0, EfficientDetD1, EfficientDetD2, ... until EfficientDetD7 that are pre-trained on COCO dataset and are ready to be used. 8 | * An example usage of COCO pre-trained EfficientDetD0 model is shown in `demo.py` python script. 9 | * To run the inference simply run the following command: 10 | ``` 11 | python demo.py 12 | ``` 13 | * To test the live object detection from camera, run: 14 | ``` 15 | python demo_video.py 16 | ``` 17 | 18 | * To perform inference using larger EfficientDet models, replace `EFFICIENTDETD0COCO` with `EFFICIENTDETDXCOCO` in the `demo.py` or `demo_video.py` script, where X in `EFFICIENTDETDXCOCO` can take values from (0,) 1 to 7. 19 | * In this way any of the readily available COCO pre-trained EfficientDet model can be used for inference. 20 | 21 | --- 22 | 23 | ### 2. Training (or fine tuning) EfficientDet models on custom dataset 24 | * To train or fine tune an EfficientDet model on a custom dataset, you may wish to use COCO pretrained weights rather than training the model from scratch. 25 | * To do so, in the `train.py` script set the `base_weights` to `'COCO'` and `head_weights` to `None`. 26 | * Replace `num_classes` by a value that indicates the number of classes in the custom dataset that one wants to train the model on. 27 | 28 | The following gives an example on how to train an EfficientDetD0 model on VOC dataset. 29 | 1. In this same directory download the VOC dataset into a folder named `VOCdevkit`. 30 | 2. In the `train.py` script replace the `num_classes` by the number of classes in the VOC dataset i.e 21. 31 | 3. Further in the line where the EfficientDet model is instantiated set `base_weights` to `'COCO`' and `head_weights` to `None`. 32 | 4. Any optimizer and a suitable loss function can be chosen. By default `SGD` optimizer and `MultiBoxLoss` from `paz.optimization` is used. 33 | 5. You may also choose training parameters such as `batch_size`, `learning_rate`, `momentum` etc, according to your application. Default values are used when they not explicitly specified. 34 | 6. To start the training run the following command. 35 | ``` 36 | python train.py 37 | ``` 38 | --- 39 | -------------------------------------------------------------------------------- /examples/efficientdet/debugger.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz import processors as pr 3 | from paz.abstract import Processor, SequentialProcessor 4 | from paz.datasets import VOC 5 | from paz.pipelines.detection import AugmentDetection 6 | 7 | from efficientdet import EFFICIENTDETD0 8 | 9 | 10 | class ShowBoxes(Processor): 11 | def __init__(self, class_names, prior_boxes, 12 | variances=[0.1, 0.1, 0.2, 0.2]): 13 | super(ShowBoxes, self).__init__() 14 | self.deprocess_boxes = SequentialProcessor([ 15 | pr.DecodeBoxes(prior_boxes, variances), 16 | pr.ToBoxes2D(class_names, True), 17 | pr.FilterClassBoxes2D(class_names[1:])]) 18 | self.denormalize_boxes2D = pr.DenormalizeBoxes2D() 19 | self.draw_boxes2D = pr.DrawBoxes2D(class_names) 20 | self.show_image = pr.ShowImage() 21 | self.resize_image = pr.ResizeImage((600, 600)) 22 | 23 | def call(self, image, boxes): 24 | image = self.resize_image(image) 25 | boxes2D = self.deprocess_boxes(boxes) 26 | boxes2D = self.denormalize_boxes2D(image, boxes2D) 27 | image = self.draw_boxes2D(image, boxes2D) 28 | image = (image + pr.BGR_IMAGENET_MEAN).astype(np.uint8) 29 | image = image[..., ::-1] 30 | self.show_image(image) 31 | return image, boxes2D 32 | 33 | 34 | size = 512 35 | split = 'train' 36 | epochs = 120 37 | batch_size = 30 38 | 39 | data_manager = VOC('VOCdevkit/') 40 | data = data_manager.load_data() 41 | 42 | class_names = data_manager.class_names 43 | model = EFFICIENTDETD0() 44 | prior_boxes = model.prior_boxes 45 | 46 | testor_encoder = AugmentDetection(prior_boxes, size=size) 47 | testor_decoder = ShowBoxes(class_names, prior_boxes) 48 | sample_arg = 0 49 | for sample_arg in range(2): 50 | sample = data[sample_arg] 51 | wrapped_outputs = testor_encoder(sample) 52 | print(wrapped_outputs['labels']) 53 | image = wrapped_outputs['inputs']['image'] 54 | boxes = wrapped_outputs['labels']['boxes'] 55 | image, boxes = testor_decoder(image, boxes) 56 | -------------------------------------------------------------------------------- /examples/efficientdet/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.keras.utils import get_file 3 | from paz.backend.image import load_image, show_image 4 | from paz.pipelines import EFFICIENTDETD0COCO 5 | 6 | URL = ('https://github.com/oarriaga/altamira-data/releases/download/v0.16/' 7 | 'image_with_multiple_objects.png') 8 | 9 | filename = os.path.basename(URL) 10 | fullpath = get_file(filename, URL, cache_subdir='paz/tests') 11 | image = load_image(fullpath) 12 | 13 | detect = EFFICIENTDETD0COCO(score_thresh=0.60, nms_thresh=0.25) 14 | detections = detect(image) 15 | show_image(detections['image']) 16 | -------------------------------------------------------------------------------- /examples/efficientdet/demo_video.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.backend.camera import Camera 3 | from paz.backend.camera import VideoPlayer 4 | from paz.pipelines import EFFICIENTDETD0COCO 5 | 6 | description = 'Demo script for object detection using EfficientDet model' 7 | parser = argparse.ArgumentParser(description=description) 8 | parser.add_argument('-c', '--camera_id', type=int, default=0, 9 | help='Camera device ID') 10 | args = parser.parse_args() 11 | 12 | camera = Camera(args.camera_id) 13 | pipeline = EFFICIENTDETD0COCO() 14 | player = VideoPlayer((640, 480), pipeline, camera) 15 | player.run() 16 | -------------------------------------------------------------------------------- /examples/efficientdet/draw.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | FONT = cv2.FONT_HERSHEY_SIMPLEX 4 | LINE = cv2.LINE_AA 5 | 6 | 7 | def put_text(image, text, point, scale, color, thickness): 8 | """Puts text on given image. 9 | 10 | # Arguments 11 | image: Array of shape `(H, W, 3)`, input image. 12 | text: String, text to show. 13 | point: Tuple, coordinate of top corner of text. 14 | scale: Float, scale of text. 15 | color: Tuple, RGB color coordinates. 16 | thickness: Int, text thickness. 17 | 18 | # Returns 19 | Array: Image of shape `(H, W, 3)` with text. 20 | """ 21 | image = cv2.putText( 22 | image, text, point, FONT, scale, color, thickness, LINE) 23 | return image 24 | 25 | 26 | def compute_text_bounds(text, scale, thickness, FONT=FONT): 27 | """Computes given text size. 28 | 29 | # Arguments 30 | text: Str, given text. 31 | scale: Float, text scale. 32 | thickness: Int, text line thickness. 33 | FONT: Int, text font. 34 | 35 | # Returns 36 | Tuple: holding width and height of given text. 37 | """ 38 | text_size = cv2.getTextSize(text, FONT, scale, thickness) 39 | return text_size 40 | 41 | 42 | def draw_opaque_box(image, corner_A, corner_B, color, thickness=-1): 43 | """Draws filled rectangle. 44 | 45 | # Arguments 46 | image: Array of shape `(H, W, 3)`, input image. 47 | corner_A: List, top left rectangle coordinate. 48 | corner_B: List, bottom right rectangle coordinate. 49 | color: List, rectangle's RGB color. 50 | thickness: Int, rectangle's line thickness. 51 | 52 | # Returns 53 | Array: image of shape `(H, W, 3)` with filled rectangle. 54 | """ 55 | image = cv2.rectangle(image, corner_A, corner_B, color, thickness) 56 | return image 57 | 58 | 59 | def make_box_transparent(raw_image, image, alpha=0.25): 60 | """Blends two images for transparency. 61 | 62 | # Arguments 63 | raw_image: Array of shape `(H, W, 3)`, first input image. 64 | image: Array of shape `(H, W, 3)`, second input image. 65 | alpha: Float, sum weight. 66 | 67 | # Returns 68 | Array: Blended image of shape `(H, W, 3)`. 69 | """ 70 | image = cv2.addWeighted(raw_image, 1 - alpha, image, alpha, 0.0) 71 | return image 72 | -------------------------------------------------------------------------------- /examples/efficientdet/evaluate_mAP.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | from paz.datasets import VOC 4 | from paz.optimization.callbacks import EvaluateMAP 5 | from paz.pipelines import DetectSingleShot 6 | from efficientdet import EFFICIENTDETD0 7 | 8 | if __name__ == "__main__": 9 | weights_path = "./trained_models/efficientdet-d0/" 10 | evaluation_frequency = 1 11 | 12 | if '.hdf5' in weights_path: 13 | evaluation_frequency = 1 14 | weights = [weights_path.split('/')[-1]] 15 | weights_path = "/".join(weights_path.split('/')[0:-1]) + "/" 16 | else: 17 | list_of_files = sorted(filter(lambda x: os.path.isfile( 18 | os.path.join(weights_path, x)), 19 | os.listdir(weights_path))) 20 | weights = [weight_file 21 | for weight_file in list_of_files 22 | if '.hdf5' in weight_file] 23 | 24 | gpus = tf.config.experimental.list_physical_devices('GPU') 25 | data_names = [['VOC2007', 'VOC2012'], 'VOC2007'] 26 | data_splits = [['trainval', 'trainval'], 'test'] 27 | data_managers, datasets, evaluation_data_managers = [], [], [] 28 | for data_name, data_split in zip(data_names, data_splits): 29 | data_manager = VOC('VOCdevkit/', data_split, name=data_name) 30 | data_managers.append(data_manager) 31 | datasets.append(data_manager.load_data()) 32 | if data_split == 'test': 33 | eval_data_manager = VOC( 34 | 'VOCdevkit/', data_split, name=data_name, evaluate=True) 35 | evaluation_data_managers.append(eval_data_manager) 36 | 37 | model = EFFICIENTDETD0(num_classes=21, base_weights='COCO', 38 | head_weights=None) 39 | for weight in weights[::evaluation_frequency]: 40 | model.load_weights(weights_path + weight) 41 | try: 42 | evaluate = EvaluateMAP( 43 | evaluation_data_managers[0], DetectSingleShot( 44 | model, data_managers[0].class_names, 0.01, 0.45), 45 | evaluation_frequency, './trained_models/', 0.5) 46 | epoch = int(weight.split('.')[1].split('-')[0]) - 2 47 | evaluate.on_epoch_end(epoch, None) 48 | except: 49 | pass 50 | -------------------------------------------------------------------------------- /examples/efficientpose/README.md: -------------------------------------------------------------------------------- 1 | Two modules of paz EfficientPose namely translation anchors and 2 | pose loss come under a different license given by author. -------------------------------------------------------------------------------- /examples/efficientpose/demo.py: -------------------------------------------------------------------------------- 1 | from paz.backend.image import load_image, show_image 2 | from pose import EfficientPosePhi0LinemodDriller 3 | 4 | IMAGE_PATH = ('/home/manummk95/Desktop/paz/paz/examples/efficientpose/' 5 | 'Linemod_preprocessed/data/08/rgb/0002.png') 6 | 7 | 8 | detect = EfficientPosePhi0LinemodDriller(score_thresh=0.60, nms_thresh=0.45, 9 | show_boxes2D=True, show_poses6D=True) 10 | detect.model.load_weights('weights.6760-1.39.hdf5') 11 | image = load_image(IMAGE_PATH) 12 | inferences = detect(image) 13 | show_image(inferences['image']) 14 | -------------------------------------------------------------------------------- /examples/efficientpose/pose.py: -------------------------------------------------------------------------------- 1 | from paz.pipelines.pose import EstimateEfficientPose 2 | from linemod import (LINEMOD_CAMERA_MATRIX, LINEMOD_OBJECT_SIZES, 3 | RGB_LINEMOD_MEAN) 4 | from paz.models.pose_estimation.efficientpose import EfficientPosePhi0 5 | from processors import ComputeTxTyTz, RegressTranslation 6 | from anchors import build_translation_anchors 7 | 8 | 9 | class EfficientPosePhi0LinemodDriller(EstimateEfficientPose): 10 | """Inference pipeline with EfficientPose phi=0 trained on Linemod. 11 | 12 | # Arguments 13 | score_thresh: Float between [0, 1]. 14 | nms_thresh: Float between [0, 1]. 15 | show_boxes2D: Boolean. If ``True`` prediction 16 | are drawn in the returned image. 17 | show_poses6D: Boolean. If ``True`` estimated poses 18 | are drawn in the returned image. 19 | 20 | # References 21 | [EfficientPose: An efficient, accurate and scalable end-to-end 22 | 6D multi object pose estimation approach]( 23 | https://arxiv.org/pdf/2011.04307.pdf) 24 | """ 25 | def __init__(self, score_thresh=0.60, nms_thresh=0.45, 26 | show_boxes2D=False, show_poses6D=True): 27 | names = ['background', 'driller'] 28 | model = EfficientPosePhi0(build_translation_anchors, 29 | num_classes=len(names), base_weights='COCO', 30 | head_weights=None) 31 | regress_translation = RegressTranslation(model.translation_priors) 32 | compute_tx_ty_tz = ComputeTxTyTz() 33 | super(EfficientPosePhi0LinemodDriller, self).__init__( 34 | model, names, score_thresh, nms_thresh, 35 | LINEMOD_OBJECT_SIZES, RGB_LINEMOD_MEAN, LINEMOD_CAMERA_MATRIX, 36 | regress_translation, compute_tx_ty_tz, show_boxes2D=show_boxes2D, 37 | show_poses6D=show_poses6D) 38 | -------------------------------------------------------------------------------- /examples/eigenfaces/README.md: -------------------------------------------------------------------------------- 1 | This example requires you to manually download the [FERPlus](https://github.com/microsoft/FERPlus) and [FER2013](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data) csv dataset files. 2 | 3 | More explicitly, you must extract the files "fer2013new.csv" and "fer2013.csv" from FERPlus and FER respectively, and put them inside the directory the directory ~/.keras/paz/datasets/FERPlus (if this directory is not there you must create it). 4 | 5 | After that you must be able to run: 6 | 7 | ``python eigenfaces.py`` 8 | 9 | Alternatively you can extract the "fer2013.csv" and "fer2013new.csv" file and located them in a directory ``my_data`` and explicitly pass the ``args.data_path`` to ``eigenfaces.py`` i.e. 10 | 11 | ``python eigenfaces.py --data_path my_data`` 12 | 13 | ### **To create database follow the following directory structure:** 14 | 15 | ``` 16 | 17 | ├── database 18 | │ ├── images 19 | │ │ ├── 20 | │ │ ├── ├── image1.png 21 | │ │ ├── ├── . 22 | │ │ ├── ├── . 23 | │ │ ├── 24 | │ │ ├── ├── image1.png 25 | │ │ ├── ├── . 26 | │ │ ├── ├── . 27 | │ │ ├── . 28 | │ │ ├── . 29 | │ ├── database.npy 30 | ├── experiments 31 | │ ├── eigenfaces.npy 32 | │ ├── eigenvalues.npy 33 | │ ├── mean_face.npy 34 | ├── database.py 35 | ├── demo.py 36 | ├── eigenfaces.py 37 | ├── pipelines.py 38 | ├── processors.py 39 | 40 | ``` -------------------------------------------------------------------------------- /examples/face_classification/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.backend.camera import VideoPlayer 3 | from paz.backend.camera import Camera 4 | from paz.pipelines import DetectMiniXceptionFER 5 | 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser(description='Real-time face classifier') 9 | parser.add_argument('-c', '--camera_id', type=int, default=0, 10 | help='Camera device ID') 11 | parser.add_argument('-o', '--offset', type=float, default=0.1, 12 | help='Scaled offset to be added to bounding boxes') 13 | args = parser.parse_args() 14 | 15 | pipeline = DetectMiniXceptionFER([args.offset, args.offset]) 16 | camera = Camera(args.camera_id) 17 | player = VideoPlayer((640, 480), pipeline, camera) 18 | player.run() 19 | -------------------------------------------------------------------------------- /examples/face_classification/pipelines.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 2 | from paz.abstract import SequentialProcessor, ProcessingSequence 3 | from paz.pipelines import PreprocessImage 4 | import paz.processors as pr 5 | 6 | 7 | class ProcessGrayImage(SequentialProcessor): 8 | def __init__(self, size, num_classes, generator=None): 9 | super(ProcessGrayImage, self).__init__() 10 | self.size = size 11 | self.process = SequentialProcessor([pr.ExpandDims(-1)]) 12 | if generator is not None: 13 | self.process.add(pr.ImageDataProcessor(generator)) 14 | self.process.add(PreprocessImage((size, size), mean=None)) 15 | self.process.add(pr.ExpandDims(-1)) 16 | self.add(pr.UnpackDictionary(['image', 'label'])) 17 | self.add(pr.ExpandDomain(self.process)) 18 | self.add(pr.SequenceWrapper({0: {'image': [size, size, 1]}}, 19 | {1: {'label': [num_classes]}})) 20 | 21 | 22 | if __name__ == "__main__": 23 | import os 24 | from paz.datasets import FER, FERPlus 25 | 26 | # data generator and augmentations 27 | generator = ImageDataGenerator( 28 | rotation_range=30, 29 | width_shift_range=0.1, 30 | height_shift_range=0.1, 31 | zoom_range=.1, 32 | horizontal_flip=True) 33 | 34 | pipeline = ProcessGrayImage(48, 8, generator) 35 | dataset = 'FERPlus' 36 | 37 | data_path = os.path.join(os.path.expanduser('~'), '.keras/paz/datasets/') 38 | name_to_manager = {'FER': FER, 'FERPlus': FERPlus} 39 | data_managers, datasets = {}, {} 40 | data_path = os.path.join(data_path, dataset) 41 | kwargs = {'path': data_path} if dataset in ['FERPlus'] else {} 42 | data_manager = name_to_manager[dataset](split='train', **kwargs) 43 | data = data_manager.load_data() 44 | 45 | sequence = ProcessingSequence(pipeline, 32, data) 46 | batch = sequence.__getitem__(0) 47 | show = pr.ShowImage() 48 | for arg in range(32): 49 | image = batch[0]['image'][arg][..., 0] 50 | image = 255 * image 51 | image = image.astype('uint8') 52 | show(image) 53 | -------------------------------------------------------------------------------- /examples/fine-tuning_object_detection/backend.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz.backend.boxes import compute_ious, to_corner_form 3 | 4 | 5 | def match(boxes, prior_boxes, iou_threshold=0.5): 6 | """Matches each prior box with a ground truth box (box from `boxes`). 7 | It then selects which matched box will be considered positive e.g. iou > .5 8 | and returns for each prior box a ground truth box that is either positive 9 | (with a class argument different than 0) or negative. 10 | 11 | # Arguments 12 | boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`, 13 | where the first the first four coordinates correspond to 14 | box coordinates and the last coordinates is the class 15 | argument. This boxes should be the ground truth boxes. 16 | prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`. 17 | where the four coordinates are in center form coordinates. 18 | iou_threshold: Float between [0, 1]. Intersection over union 19 | used to determine which box is considered a positive box. 20 | 21 | # Returns 22 | numpy array of shape `(num_prior_boxes, 4 + 1)`. 23 | where the first the first four coordinates correspond to point 24 | form box coordinates and the last coordinates is the class 25 | argument. 26 | """ 27 | ious = compute_ious(boxes, to_corner_form(np.float32(prior_boxes))) 28 | per_prior_which_box_iou = np.max(ious, axis=0) 29 | per_prior_which_box_arg = np.argmax(ious, 0) 30 | 31 | # overwriting per_prior_which_box_arg if they are the best prior box 32 | per_box_which_prior_arg = np.argmax(ious, 1) 33 | per_prior_which_box_iou[per_box_which_prior_arg] = 2 34 | for box_arg in range(len(per_box_which_prior_arg)): 35 | best_prior_box_arg = per_box_which_prior_arg[box_arg] 36 | per_prior_which_box_arg[best_prior_box_arg] = box_arg 37 | 38 | matches = boxes[per_prior_which_box_arg] 39 | matches[per_prior_which_box_iou < iou_threshold, 4] = 0 40 | return matches 41 | -------------------------------------------------------------------------------- /examples/fine-tuning_object_detection/data_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import numpy as np 4 | from pathlib import Path 5 | 6 | from paz.abstract import Loader 7 | 8 | 9 | class CSVLoader(Loader): 10 | def __init__( 11 | self, path, class_names, image_size=(1280, 720), split='train'): 12 | super(CSVLoader, self).__init__(path, split, class_names, 'CSVLoader') 13 | self.class_to_arg = self.build_class_to_arg(self.class_names) 14 | self.image_size = image_size 15 | 16 | def build_class_to_arg(self, class_names): 17 | args = list(range(len(class_names))) 18 | return dict(zip(args, class_names)) 19 | 20 | def load_data(self): 21 | file = open(self.path, 'r') 22 | csv_reader = csv.reader(file, delimiter=',') 23 | dataset = [] 24 | H, W = self.image_size 25 | for row in csv_reader: 26 | image_name, class_arg, x_min, y_min, x_max, y_max = row 27 | path = os.path.dirname(self.path) 28 | image_path = os.path.join(path, image_name + '.png') 29 | image_path = os.path.abspath(image_path) 30 | if not Path(image_path).is_file(): 31 | raise ValueError('File %s not found.\n' % image_path) 32 | box_data = [[int(x_min) / H, int(y_min) / W, 33 | int(x_max) / H, int(y_max) / W, int(class_arg)]] 34 | box_data = np.array(box_data) 35 | sample = {'image': image_path, 'boxes': box_data} 36 | dataset.append(sample) 37 | return dataset 38 | 39 | 40 | if __name__ == "__main__": 41 | path = 'datasets/solar_panel/BoundingBox.txt' 42 | class_names = ['background', 'solar_panel'] 43 | data_manager = CSVLoader(path, class_names) 44 | dataset = data_manager.load_data() 45 | -------------------------------------------------------------------------------- /examples/fine-tuning_object_detection/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | from paz.pipelines import DetectSingleShot 4 | from paz.models.detection import SSD300 5 | from paz.backend.image import load_image, show_image, write_image 6 | 7 | 8 | class SSD300SolarPanel(DetectSingleShot): 9 | def __init__(self, weights_path, score_thresh=0.50, 10 | nms_thresh=0.45, draw=True): 11 | class_names = ['background', 'solar_panel'] 12 | model = SSD300(len(class_names), None, None) 13 | model.load_weights(weights_path) 14 | super(SSD300SolarPanel, self).__init__( 15 | model, class_names, score_thresh, nms_thresh, draw=draw) 16 | 17 | 18 | # weights_path = 'trained_models/SSD300/weights.172-3.15.hdf5' 19 | weights_path = 'trained_models/SSD300/weights.141-2.66.hdf5' 20 | detect = SSD300SolarPanel(weights_path) 21 | image_paths = glob.glob('datasets/test_solar_panel/*.jpg') 22 | for image_arg, image_path in enumerate(image_paths): 23 | image = load_image(image_path) 24 | results = detect(image) 25 | # show_image(results['image']) 26 | write_image('results/image_%s.png' % image_arg, results['image']) 27 | -------------------------------------------------------------------------------- /examples/fine-tuning_object_detection/processors.py: -------------------------------------------------------------------------------- 1 | from paz.abstract import Processor 2 | from backend import match 3 | 4 | 5 | class MatchBoxes(Processor): 6 | """Match prior boxes with ground truth boxes. 7 | 8 | # Arguments 9 | prior_boxes: Numpy array of shape (num_boxes, 4). 10 | iou: Float in [0, 1]. Intersection over union in which prior boxes 11 | will be considered positive. A positive box is box with a class 12 | different than `background`. 13 | variance: List of two floats. 14 | """ 15 | def __init__(self, prior_boxes, iou=.5): 16 | self.prior_boxes = prior_boxes 17 | self.iou = iou 18 | super(MatchBoxes, self).__init__() 19 | 20 | def call(self, boxes): 21 | boxes = match(boxes, self.prior_boxes, self.iou) 22 | return boxes 23 | -------------------------------------------------------------------------------- /examples/fine-tuning_object_detection/sequencer.py: -------------------------------------------------------------------------------- 1 | from paz.abstract.sequence import SequenceExtra 2 | import numpy as np 3 | 4 | 5 | class ProcessingSequence(SequenceExtra): 6 | """Sequence generator used for generating samples. 7 | 8 | # Arguments 9 | processor: Function used for generating and processing ``samples``. 10 | batch_size: Int. 11 | num_steps: Int. Number of steps for each epoch. 12 | as_list: Bool, if True ``inputs`` and ``labels`` are dispatched as 13 | lists. If false ``inputs`` and ``labels`` are dispatched as 14 | dictionaries. 15 | """ 16 | def __init__(self, processor, batch_size, data, num_steps, as_list=False): 17 | self.num_steps = num_steps 18 | super(ProcessingSequence, self).__init__( 19 | processor, batch_size, as_list) 20 | self.data = data 21 | 22 | def _num_batches(self): 23 | return int(np.ceil(len(self.data) / float(self.batch_size))) 24 | 25 | def __len__(self): 26 | return self.num_steps 27 | 28 | def process_batch(self, inputs, labels, batch_index): 29 | unprocessed_batch = self._get_unprocessed_batch(self.data, batch_index) 30 | 31 | for sample_arg, unprocessed_sample in enumerate(unprocessed_batch): 32 | sample = self.pipeline(unprocessed_sample.copy()) 33 | self._place_sample(sample['inputs'], sample_arg, inputs) 34 | self._place_sample(sample['labels'], sample_arg, labels) 35 | return inputs, labels 36 | 37 | def _get_unprocessed_batch(self, data, batch_index): 38 | # batch_index = np.random.randint(0, self._num_batches()) 39 | batch_index = 0 40 | batch_arg_A = self.batch_size * (batch_index) 41 | batch_arg_B = self.batch_size * (batch_index + 1) 42 | unprocessed_batch = data[batch_arg_A:batch_arg_B] 43 | return unprocessed_batch 44 | -------------------------------------------------------------------------------- /examples/haar_cascade_detectors/haar_cascade_detectors.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from paz.abstract import Processor 4 | from paz.backend.camera import VideoPlayer, Camera 5 | from paz.models import HaarCascadeDetector 6 | from paz.backend.image import load_image, show_image 7 | import paz.processors as pr 8 | 9 | 10 | class HaarCascadeDetectors(Processor): 11 | def __init__(self, model_names): 12 | super(HaarCascadeDetectors, self).__init__() 13 | self.model_names = model_names 14 | self.detectors = [] 15 | for class_arg, model_name in enumerate(self.model_names): 16 | detector = pr.Predict( 17 | HaarCascadeDetector(model_name, class_arg), 18 | pr.ConvertColorSpace(pr.RGB2GRAY), 19 | pr.ToBoxes2D(args.models)) 20 | self.detectors.append(detector) 21 | self.draw_boxes2D = pr.DrawBoxes2D(args.models) 22 | self.wrap = pr.WrapOutput(['image', 'boxes2D']) 23 | 24 | def call(self, image): 25 | boxes2D = [] 26 | for detector in self.detectors: 27 | boxes2D.extend(detector(image)) 28 | image = self.draw_boxes2D(image, boxes2D) 29 | return self.wrap(image, boxes2D) 30 | 31 | 32 | if __name__ == "__main__": 33 | parser = argparse.ArgumentParser(description='MultiHaarCascadeDetectors') 34 | parser.add_argument('-m', '--models', nargs='+', type=str, 35 | default=['frontalface_default', 'eye'], 36 | help='Model name postfix of openCV xml file') 37 | parser.add_argument('-c', '--camera_id', type=int, default=0, 38 | help='Camera device ID') 39 | parser.add_argument('-p', '--image_path', type=str, default=None, 40 | help='full image path used for the pipelines') 41 | args = parser.parse_args() 42 | 43 | pipeline = HaarCascadeDetectors(args.models) 44 | if args.image_path is None: 45 | camera = Camera(args.camera_id) 46 | player = VideoPlayer((640, 480), pipeline, camera) 47 | player.run() 48 | else: 49 | image = load_image(args.image_path) 50 | predictions = pipeline(image) 51 | show_image(predictions['image']) 52 | -------------------------------------------------------------------------------- /examples/hand_detection/README.md: -------------------------------------------------------------------------------- 1 | # Hand detection example 2 | 3 | To test the live hand detection from camera, run: 4 | ```py 5 | python demo.py 6 | ``` 7 | 8 | To test the hand detection with pose estimation and hand closure classification, run: 9 | ```py 10 | python demo_image.py 11 | ``` 12 | -------------------------------------------------------------------------------- /examples/hand_detection/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.pipelines import SSD512HandDetection 3 | from paz.backend.camera import VideoPlayer, Camera 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Minimal hand detection') 7 | parser.add_argument('-c', '--camera_id', type=int, default=0, 8 | help='Camera device ID') 9 | args = parser.parse_args() 10 | 11 | pipeline = SSD512HandDetection() 12 | camera = Camera(args.camera_id) 13 | player = VideoPlayer((640, 480), pipeline, camera) 14 | player.run() 15 | -------------------------------------------------------------------------------- /examples/hand_detection/download_openimagesV6.py: -------------------------------------------------------------------------------- 1 | import fiftyone 2 | 3 | fiftyone.zoo.load_zoo_dataset( 4 | 'open-images-v6', 5 | split='train', 6 | label_types=['detections'], 7 | classes=['Human hand']) 8 | 9 | 10 | fiftyone.zoo.load_zoo_dataset( 11 | 'open-images-v6', 12 | split='validation', 13 | label_types=['detections'], 14 | classes=['Human hand']) 15 | 16 | 17 | fiftyone.zoo.load_zoo_dataset( 18 | 'open-images-v6', 19 | split='test', 20 | label_types=['detections'], 21 | classes=['Human hand']) 22 | -------------------------------------------------------------------------------- /examples/hand_detection/model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import Model 2 | from paz.models import SSD512 3 | from paz.models.detection.utils import create_multibox_head 4 | 5 | 6 | def SSD512Custom(num_classes, num_priors=[4, 6, 6, 6, 6, 4, 4], l2_loss=5e-4, 7 | trainable_base=False): 8 | base_model = SSD512(base_weights='COCO', head_weights='COCO', 9 | trainable_base=trainable_base) 10 | branch_names = ['branch_1', 'branch_2', 'branch_3', 'branch_4', 11 | 'branch_5', 'branch_6', 'branch_7'] 12 | branch_tensors = [] 13 | for branch_name in branch_names: 14 | branch_layer = base_model.get_layer(branch_name) 15 | branch_tensors.append(branch_layer.output) 16 | 17 | output_tensor = create_multibox_head( 18 | branch_tensors, num_classes, num_priors, l2_loss) 19 | model = Model(base_model.input, output_tensor, name='SSD512Custom') 20 | model.prior_boxes = base_model.prior_boxes 21 | return model 22 | -------------------------------------------------------------------------------- /examples/hand_detection/pose_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.applications import SSD512MinimalHandPose 3 | from paz.backend.camera import VideoPlayer, Camera 4 | 5 | 6 | parser = argparse.ArgumentParser(description='''Minimal hand detection and 7 | keypoints estimation''') 8 | parser.add_argument('-c', '--camera_id', type=int, default=0, 9 | help='Camera device ID') 10 | args = parser.parse_args() 11 | 12 | pipeline = SSD512MinimalHandPose(right_hand=False, offsets=[0.5, 0.5]) 13 | camera = Camera(args.camera_id) 14 | player = VideoPlayer((640, 480), pipeline, camera) 15 | player.run() 16 | -------------------------------------------------------------------------------- /examples/hand_pose_estimation/README.md: -------------------------------------------------------------------------------- 1 | ### This example detects hand pose from an image. 2 | 3 | To test the live hand pose estimation from camera, run: 4 | ```py 5 | python demo.py 6 | ``` 7 | 8 | To test the hand pose estimation on image, run: 9 | ```py 10 | python demo_image.py 11 | ``` 12 | 13 | To test the live hand closure status with the pose estimation from camera, run: 14 | ```py 15 | python is_open_demo.py 16 | ``` 17 | 18 | To test the live hand pose estimation from camera and visualize keypoints in 3D, run (This module has an extra dependency of matplotlib): 19 | ```py 20 | python demo3D.py 21 | ``` 22 | 23 | ### Additional notes 24 | To test a more robust hand pose estimation and open / close classification try out the "paz/examples/hand_detection/pose_demo.py" 25 | 26 | -------------------------------------------------------------------------------- /examples/hand_pose_estimation/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.applications import MinimalHandPoseEstimation 3 | from paz.backend.camera import VideoPlayer 4 | from paz.backend.camera import Camera 5 | 6 | 7 | parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') 8 | parser.add_argument('-c', '--camera_id', type=int, default=0, 9 | help='Camera device ID') 10 | args = parser.parse_args() 11 | 12 | pipeline = MinimalHandPoseEstimation(right_hand=False) 13 | camera = Camera(args.camera_id) 14 | player = VideoPlayer((640, 480), pipeline, camera) 15 | player.run() 16 | -------------------------------------------------------------------------------- /examples/hand_pose_estimation/demo_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.keras.utils import get_file 3 | from paz.backend.image import load_image, show_image 4 | from paz.applications import MinimalHandPoseEstimation 5 | 6 | 7 | URL = ('https://github.com/oarriaga/altamira-data/releases/download' 8 | '/v0.14/image_with_hand.png') 9 | filename = os.path.basename(URL) 10 | fullpath = get_file(filename, URL, cache_subdir='paz/tests') 11 | image = load_image(fullpath) 12 | 13 | detect = MinimalHandPoseEstimation(right_hand=False) 14 | inferences = detect(image) 15 | 16 | image = inferences['image'] 17 | show_image(image) 18 | -------------------------------------------------------------------------------- /examples/hand_pose_estimation/hand_tracking.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.abstract import SequentialProcessor 3 | from paz.backend.camera import VideoPlayer, Camera 4 | from paz.applications import SSD512MinimalHandPose 5 | from paz import processors as pr 6 | 7 | 8 | parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') 9 | parser.add_argument('-c', '--camera_id', type=int, default=0, 10 | help='Camera device ID') 11 | parser.add_argument('-HFOV', '--horizontal_field_of_view', type=float, 12 | default=75, help='Horizontal field of view in degrees') 13 | args = parser.parse_args() 14 | 15 | camera = Camera(args.camera_id) 16 | camera.intrinsics_from_HFOV(args.horizontal_field_of_view) 17 | 18 | 19 | class HandStateEstimation(SequentialProcessor): 20 | def __init__(self, camera): 21 | super(HandStateEstimation, self).__init__() 22 | intro_topics = ['image', 'boxes2D', 'keypoints2D', 'keypoints3D'] 23 | self.add(SSD512MinimalHandPose()) 24 | self.add(pr.UnpackDictionary(intro_topics)) 25 | self.add(pr.ControlMap( 26 | pr.Translation3DFromBoxWidth(camera), [1], [4], {1: 1})) 27 | outro_topics = intro_topics + ['translation3D'] 28 | self.add(pr.WrapOutput(outro_topics)) 29 | 30 | 31 | pipeline = HandStateEstimation(camera) 32 | player = VideoPlayer((640, 480), pipeline, camera) 33 | player.run() 34 | -------------------------------------------------------------------------------- /examples/hand_pose_estimation/is_open_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.backend.camera import VideoPlayer 3 | from paz.backend.camera import Camera 4 | from paz.applications import ClassifyHandClosure 5 | 6 | parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') 7 | parser.add_argument('-c', '--camera_id', type=int, default=0, 8 | help='Camera device ID') 9 | args = parser.parse_args() 10 | 11 | pipeline = ClassifyHandClosure(draw=True, right_hand=False) 12 | camera = Camera(args.camera_id) 13 | player = VideoPlayer((640, 480), pipeline, camera) 14 | player.run() 15 | -------------------------------------------------------------------------------- /examples/head_pose_estimation_6D/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from paz.backend.camera import Camera 4 | from paz.backend.camera import VideoPlayer 5 | from paz.pipelines import HeadPoseKeypointNet2D32 6 | 7 | description = 'Demo script for estimating 6D pose-heads from face-keypoints' 8 | parser = argparse.ArgumentParser(description=description) 9 | parser.add_argument('-c', '--camera_id', type=int, default=0, 10 | help='Camera device ID') 11 | parser.add_argument('-HFOV', '--horizontal_field_of_view', type=float, 12 | default=70, help='Horizontal field of view in degrees') 13 | args = parser.parse_args() 14 | 15 | camera = Camera(args.camera_id) 16 | camera.intrinsics_from_HFOV(args.horizontal_field_of_view) 17 | pipeline = HeadPoseKeypointNet2D32(camera) 18 | player = VideoPlayer((640, 480), pipeline, camera) 19 | player.run() 20 | -------------------------------------------------------------------------------- /examples/human_pose_estimation_2D/README.md: -------------------------------------------------------------------------------- 1 | ### This example detects 2d human pose from an image. 2 | 3 | To test the live human pose detection from camera, run: 4 | ```py 5 | python demo.py 6 | ``` 7 | 8 | To test the human pose detection on image, run: 9 | ```py 10 | python demo_image.py 11 | ``` 12 | 13 | The output image with human pose skeleton will be saved to *output/result.jpg*. 14 | -------------------------------------------------------------------------------- /examples/human_pose_estimation_2D/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.applications import HigherHRNetHumanPose2D 3 | from paz.backend.camera import VideoPlayer 4 | from paz.backend.camera import Camera 5 | 6 | 7 | parser = argparse.ArgumentParser(description='Test keypoints network') 8 | parser.add_argument('-c', '--camera_id', type=int, default=0, 9 | help='Camera device ID') 10 | args = parser.parse_args() 11 | 12 | pipeline = HigherHRNetHumanPose2D() 13 | camera = Camera(args.camera_id) 14 | player = VideoPlayer((640, 480), pipeline, camera) 15 | player.run() 16 | -------------------------------------------------------------------------------- /examples/human_pose_estimation_2D/demo_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.keras.utils import get_file 3 | from paz.applications import HigherHRNetHumanPose2D 4 | from paz.backend.image import load_image, show_image 5 | 6 | 7 | URL = ('https://github.com/oarriaga/altamira-data/releases/download' 8 | '/v0.10/multi_person_test_pose.png') 9 | filename = os.path.basename(URL) 10 | fullpath = get_file(filename, URL, cache_subdir='paz/tests') 11 | image = load_image(fullpath) 12 | 13 | detect = HigherHRNetHumanPose2D() 14 | inferences = detect(image) 15 | 16 | image = inferences['image'] 17 | show_image(image) 18 | -------------------------------------------------------------------------------- /examples/human_pose_estimation_3D/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from scipy.optimize import least_squares 3 | from paz.applications import EstimateHumanPose 4 | from paz.backend.camera import Camera, VideoPlayer 5 | 6 | 7 | parser = argparse.ArgumentParser(description='Estimate human pose') 8 | parser.add_argument('-c', '--camera_id', type=int, default=0, 9 | help='Camera device ID') 10 | args = parser.parse_args() 11 | 12 | camera = Camera(args.camera_id) 13 | camera.intrinsics_from_HFOV(HFOV=70, image_shape=(640, 480)) 14 | pipeline = EstimateHumanPose(least_squares, camera.intrinsics) 15 | camera = Camera() 16 | player = VideoPlayer((640, 480), pipeline, camera) 17 | player.run() 18 | -------------------------------------------------------------------------------- /examples/human_pose_estimation_3D/demo3D.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from viz import show3Dpose 3 | import matplotlib.pyplot as plt 4 | from scipy.optimize import least_squares 5 | from paz.applications import EstimateHumanPose 6 | from matplotlib.animation import FuncAnimation 7 | from paz.backend.camera import Camera, VideoPlayer 8 | from paz.backend.image import resize_image, show_image 9 | 10 | 11 | parser = argparse.ArgumentParser(description='Human3D visualization') 12 | parser.add_argument('-c', '--camera_id', type=int, default=0, 13 | help='Camera device ID') 14 | args = parser.parse_args() 15 | 16 | camera = Camera() 17 | camera.intrinsics_from_HFOV(HFOV=70, image_shape=(640, 480)) 18 | pipeline = EstimateHumanPose(least_squares, camera.intrinsics) 19 | camera = Camera(args.camera_id) 20 | player = VideoPlayer((640, 480), pipeline, camera) 21 | 22 | 23 | def animate(player): 24 | """Opens camera and starts continuous inference using ``pipeline``, 25 | until the user presses ``q`` inside the opened window. Plot the 26 | 3D keypoints on pyplot. 27 | """ 28 | player.camera.start() 29 | ax = plt.axes(projection='3d') 30 | ax.view_init(-160, -80) 31 | ax.figure.canvas.manager.set_window_title('Human pose visualization') 32 | 33 | def wrapped_animate(i): 34 | output = player.step() 35 | image = resize_image(output[player.topic], tuple(player.image_size)) 36 | show_image(image, 'inference', wait=False) 37 | 38 | keypoints3D = output['keypoints3D'] 39 | if len(keypoints3D) == 0: 40 | return 41 | 42 | plt.cla() 43 | ax.set_xlabel('X') 44 | ax.set_ylabel('Y') 45 | ax.set_zlabel('Z') 46 | show3Dpose(keypoints3D, ax) 47 | return wrapped_animate 48 | 49 | 50 | animation = FuncAnimation(plt.gcf(), animate(player), interval=1) 51 | plt.tight_layout() 52 | plt.show() 53 | -------------------------------------------------------------------------------- /examples/human_pose_estimation_3D/demo_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | from paz.backend.camera import Camera 3 | from scipy.optimize import least_squares 4 | from tensorflow.keras.utils import get_file 5 | from paz.applications import EstimateHumanPose 6 | from paz.backend.image import load_image, show_image 7 | 8 | 9 | URL = ('https://github.com/oarriaga/altamira-data/releases/download' 10 | '/v0.17/multiple_persons_posing.png') 11 | 12 | filename = os.path.basename(URL) 13 | fullpath = get_file(filename, URL, cache_subdir='paz/tests') 14 | image = load_image(fullpath) 15 | H, W = image.shape[:2] 16 | camera = Camera() 17 | camera.intrinsics_from_HFOV(HFOV=70, image_shape=[H, W]) 18 | pipeline = EstimateHumanPose(least_squares, camera.intrinsics) 19 | inference = pipeline(image) 20 | show_image(inference['image']) 21 | -------------------------------------------------------------------------------- /examples/images_synthesis/README.md: -------------------------------------------------------------------------------- 1 | dataset_synthesis.py gives an example of training image synthesis with the OBJ file of 3D model using pyrender. The images information including image size, bounding box coordinates and class name is recorded in annotation.txt, which is in the same folder as the synthesized images. 2 | 3 | data_manager.py provides the methods reading the annotations in the format that dataset_synthesis.py creates. 4 | -------------------------------------------------------------------------------- /examples/images_synthesis/data_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from paz.abstract import Loader 4 | 5 | 6 | class TXTLoader(Loader): 7 | """Preprocess the .txt annotations data. 8 | 9 | # Arguments 10 | path: path of the annotation.txt 11 | 12 | # Return 13 | data: dictionary with keys corresponding to the image paths 14 | and values numpy arrays of shape "[num_objects, 4+1]" 15 | """ 16 | 17 | def __init__(self, path, class_names, split='train'): 18 | super(TXTLoader, self).__init__(path, split, class_names, 'TXTLoader') 19 | self.class_to_arg = self.build_class_to_arg(self.class_names) 20 | 21 | def build_class_to_arg(self, class_names): 22 | args = list(range(len(class_names))) 23 | return dict(zip(class_names, args)) 24 | 25 | def load_data(self): 26 | images_path = os.path.dirname(self.path) 27 | data = [] 28 | 29 | file = open(self.path, 'r') 30 | lines = file.readlines() 31 | file.close() 32 | for line in lines: 33 | """ 34 | get the image name (write in the data), 35 | image size (to normalize the bounding box, not normalize first), 36 | class name, bounding box (write in data) 37 | """ 38 | image_name, H, W, class_name, x_min, y_min, x_max, y_max = line.split(',') 39 | image_path = os.path.join(images_path, image_name) 40 | box_data = [] 41 | box_data.append([float(x_min) / int(W), float(y_min) / int(H), 42 | float(x_max) / int(W), float(y_max) / int(H), 43 | self.class_to_arg.get(class_name)]) 44 | box_data = np.asarray(box_data) 45 | data.append({'image': image_path, 'boxes': box_data}) 46 | return data 47 | 48 | 49 | if __name__ == "__main__": 50 | path = 'training_images/images/annotation.txt' 51 | class_names = ['background', 'coyote'] 52 | data_manager = TXTLoader(path, class_names) 53 | dataset = data_manager.load_data() 54 | print(dataset) 55 | -------------------------------------------------------------------------------- /examples/images_synthesis/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import trimesh 4 | from pyrender import RenderFlags, Mesh, Scene, Viewer 5 | from paz.backend.boxes import extract_bounding_box_corners 6 | from paz.backend.image import normalize_min_max 7 | 8 | 9 | def as_mesh(scene_or_mesh, scale=None): 10 | if scale is None: 11 | scale = [1.0, 1.0, 1.0] 12 | scale = np.asarray(scale) 13 | if hasattr(scene_or_mesh, "bounds") and scene_or_mesh.bounds is None: 14 | return None 15 | if isinstance(scene_or_mesh, trimesh.Scene): 16 | dump = scene_or_mesh.dump() 17 | mesh = dump.sum() 18 | else: 19 | mesh = scene_or_mesh 20 | assert isinstance(mesh, trimesh.Trimesh), f"Can't convert {type(scene_or_mesh)} to trimesh.Trimesh!" 21 | return mesh 22 | 23 | 24 | def load_obj(path): 25 | mesh = as_mesh(trimesh.load(path)) 26 | return mesh 27 | 28 | 29 | def color_object(path): 30 | mesh = load_obj(path) 31 | colors = compute_vertices_colors(mesh.vertices) 32 | mesh.visual = mesh.visual.to_color() 33 | mesh.visual.vertex_colors = colors 34 | mesh = Mesh.from_trimesh(mesh, smooth=False) 35 | mesh.primitives[0].material.metallicFactor = 0.0 36 | mesh.primitives[0].material.roughnessFactor = 1.0 37 | mesh.primitives[0].material.alphaMode = 'OPAQUE' 38 | return mesh 39 | 40 | 41 | def quick_color_visualize(): 42 | scene = Scene(bg_color=[0, 0, 0]) 43 | root = os.path.expanduser('~') 44 | mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' 45 | path = os.path.join(root, mesh_path) 46 | mesh = color_object(path) 47 | scene.add(mesh) 48 | Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT) 49 | # mesh_extents = np.array([0.184, 0.187, 0.052]) 50 | 51 | 52 | def compute_vertices_colors(vertices): 53 | corner3D_min, corner3D_max = extract_bounding_box_corners(vertices) 54 | normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max) 55 | colors = (255 * normalized_colors).astype('uint8') 56 | return colors 57 | -------------------------------------------------------------------------------- /examples/implicit_orientation_learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/examples/implicit_orientation_learning/__init__.py -------------------------------------------------------------------------------- /examples/implicit_orientation_learning/processors.py: -------------------------------------------------------------------------------- 1 | from paz.abstract import Processor 2 | import numpy as np 3 | 4 | 5 | class MakeDictionary(Processor): 6 | def __init__(self, encoder, renderer): 7 | super(MakeDictionary, self).__init__() 8 | self.latent_dimension = encoder.encoder.output_shape[1] 9 | self.encoder = encoder 10 | self.renderer = renderer 11 | 12 | def call(self): 13 | data = self.renderer.render() 14 | dictionary = {} 15 | latent_vectors = np.zeros((len(data), self.latent_dimension)) 16 | for sample_arg, sample in enumerate(data): 17 | image = sample['image'] 18 | latent_vectors[sample_arg] = self.encoder(image) 19 | dictionary[sample_arg] = image 20 | dictionary['latent_vectors'] = latent_vectors 21 | return dictionary 22 | 23 | 24 | class MeasureSimilarity(Processor): 25 | def __init__(self, dictionary, measure): 26 | super(MeasureSimilarity, self).__init__() 27 | self.dictionary = dictionary 28 | self.measure = measure 29 | 30 | def call(self, latent_vector): 31 | latent_vectors = self.dictionary['latent_vectors'] 32 | measurements = self.measure(latent_vectors, latent_vector) 33 | closest_image = self.dictionary[np.argmax(measurements)] 34 | return latent_vector, closest_image 35 | -------------------------------------------------------------------------------- /examples/implicit_orientation_learning/run_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | class_name="035_power_drill" 3 | images_directory="$HOME/.keras/paz/datasets/voc-backgrounds/" 4 | object_path="$HOME/.keras/paz/datasets/ycb/models/$class_name/textured.obj" 5 | 6 | python3 train.py --images_directory $images_directory --obj_path $object_path --class_name $class_name -st 4 7 | -------------------------------------------------------------------------------- /examples/keypoint_estimation/README.md: -------------------------------------------------------------------------------- 1 | Please install kaggle python module to automatically download datasets 2 | 3 | Make sure to have an account and have set up the API token 4 | 5 | And to have accepted the rules of the competition (you would recieve a 403 error in case you have not) 6 | 7 | ```bash 8 | kaggle competitions download -c facial-keypoints-detection 9 | ``` 10 | 11 | -------------------------------------------------------------------------------- /examples/keypoint_estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/examples/keypoint_estimation/__init__.py -------------------------------------------------------------------------------- /examples/keypoint_estimation/dataset_downloader.sh: -------------------------------------------------------------------------------- 1 | echo "Creating default directory for dataset ..." 2 | mkdir dataset 3 | 4 | echo "Entering new directory ..." 5 | cd dataset/ 6 | 7 | echo "Using kaggle API to download dataset..." 8 | echo "Make sure to have installed kaggle-API, set-up kaggle API Token and have accepted the rules of the Facial Keypoints Detection challenge" 9 | kaggle competitions download -c facial-keypoints-detection 10 | 11 | echo "Unzipping downloaded dataset" 12 | unzip facial-keypoints-detection.zip 13 | 14 | echo "Unzipping train split" 15 | unzip training.zip 16 | 17 | echo "Unzipping test split" 18 | unzip test.zip 19 | 20 | echo "Removing zip files" 21 | rm facial-keypoints-detection.zip 22 | rm training.zip 23 | rm test.zip 24 | -------------------------------------------------------------------------------- /examples/keypoint_estimation/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from paz.backend.camera import Camera 4 | from paz.backend.camera import VideoPlayer 5 | from paz.pipelines import DetectFaceKeypointNet2D32 6 | 7 | description = 'Demo script for running 2D keypoints face detector' 8 | parser = argparse.ArgumentParser(description=description) 9 | parser.add_argument('-c', '--camera_id', type=int, default=0, 10 | help='Camera device ID') 11 | args = parser.parse_args() 12 | 13 | # instantiating model 14 | pipeline = DetectFaceKeypointNet2D32(radius=5) 15 | camera = Camera(args.camera_id) 16 | player = VideoPlayer((640, 480), pipeline, camera) 17 | player.run() 18 | -------------------------------------------------------------------------------- /examples/keypoint_estimation/facial_keypoints.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from paz.abstract import Loader 5 | 6 | 7 | class FacialKeypoints(Loader): 8 | def __init__(self, path, split='train'): 9 | split_to_filename = {'train': 'training.csv', 'test': 'test.csv'} 10 | filename = split_to_filename[split] 11 | path = os.path.join(path, filename) 12 | super(FacialKeypoints, self).__init__( 13 | path, split, None, 'FacialKeypoints') 14 | 15 | def _load_faces(self, data_frame): 16 | faces = np.zeros((len(data_frame), 96, 96)) 17 | for arg, face in enumerate(data_frame.Image): 18 | faces[arg] = np.array(face.split(' '), dtype=int).reshape(96, 96) 19 | return faces 20 | 21 | def _load_keypoints(self, data_frame): 22 | keypoints = np.zeros((len(data_frame), 15, 2)) 23 | for arg, keypoint_set in data_frame.iloc[:, :-1].iterrows(): 24 | keypoints[arg] = keypoint_set.to_numpy().reshape(15, 2) 25 | return keypoints 26 | 27 | def _to_list_of_dictionaries(self, faces, keypoints=None): 28 | dataset = [] 29 | for arg in range(len(faces)): 30 | face, sample = faces[arg], {} 31 | sample['image'] = face 32 | if keypoints is not None: 33 | sample['keypoints'] = keypoints[arg] 34 | dataset.append(sample) 35 | return dataset 36 | 37 | def load_data(self): 38 | data_frame = pd.read_csv(self.path) 39 | data_frame.fillna(method='ffill', inplace=True) 40 | faces = self._load_faces(data_frame) 41 | if self.split == 'train': 42 | keypoints = self._load_keypoints(data_frame) 43 | dataset = self._to_list_of_dictionaries(faces, keypoints) 44 | else: 45 | dataset = self._to_list_of_dictionaries(faces, None) 46 | return dataset 47 | 48 | 49 | if __name__ == '__main__': 50 | path = 'dataset/' 51 | split = 'train' 52 | data_manager = FacialKeypoints(path, split) 53 | dataset = data_manager.load_data() 54 | -------------------------------------------------------------------------------- /examples/keypoint_estimation/pipelines.py: -------------------------------------------------------------------------------- 1 | from paz.backend.keypoints import denormalize_keypoints 2 | from paz.abstract import SequentialProcessor 3 | from paz import processors as pr 4 | from paz.backend.image import draw_keypoint 5 | from paz.backend.image.draw import GREEN 6 | 7 | 8 | def draw_circles(image, points, color=GREEN, radius=3): 9 | for point in points: 10 | draw_keypoint(image, point, color, radius) 11 | return image 12 | 13 | 14 | class AugmentKeypoints(SequentialProcessor): 15 | def __init__(self, phase, rotation_range=30, 16 | delta_scales=[0.2, 0.2], num_keypoints=15): 17 | super(AugmentKeypoints, self).__init__() 18 | 19 | self.add(pr.UnpackDictionary(['image', 'keypoints'])) 20 | if phase == 'train': 21 | self.add(pr.ControlMap(pr.RandomBrightness())) 22 | self.add(pr.ControlMap(pr.RandomContrast())) 23 | self.add(pr.RandomKeypointRotation(rotation_range)) 24 | self.add(pr.RandomKeypointTranslation(delta_scales)) 25 | self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) 26 | self.add(pr.ControlMap(pr.ExpandDims(-1), [0], [0])) 27 | self.add(pr.ControlMap(pr.NormalizeKeypoints((96, 96)), [1], [1])) 28 | self.add(pr.SequenceWrapper({0: {'image': [96, 96, 1]}}, 29 | {1: {'keypoints': [num_keypoints, 2]}})) 30 | 31 | 32 | if __name__ == '__main__': 33 | from paz.abstract import ProcessingSequence 34 | from paz.backend.image import show_image 35 | 36 | from facial_keypoints import FacialKeypoints 37 | 38 | data_manager = FacialKeypoints('dataset/', 'train') 39 | dataset = data_manager.load_data() 40 | augment_keypoints = AugmentKeypoints('train') 41 | for arg in range(1, 100): 42 | sample = dataset[arg] 43 | predictions = augment_keypoints(sample) 44 | original_image = predictions['inputs']['image'][:, :, 0] 45 | original_image = original_image * 255.0 46 | kp = predictions['labels']['keypoints'] 47 | kp = denormalize_keypoints(kp, 96, 96) 48 | original_image = draw_circles( 49 | original_image, kp.astype('int')) 50 | show_image(original_image.astype('uint8')) 51 | sequence = ProcessingSequence(augment_keypoints, 32, dataset, True) 52 | batch = sequence.__getitem__(0) 53 | -------------------------------------------------------------------------------- /examples/maml/README.md: -------------------------------------------------------------------------------- 1 | # Model agnostic meta learning (MAML) 2 | 3 | ## Regression 4 | To run the regression example using sinusoids run: 5 | 6 | ``` 7 | python3 train_regression.py 8 | ``` 9 | 10 | ## Classification 11 | To run the regression example using the Omniglot dataset run: 12 | 13 | ``` 14 | python3 train_classification.py 15 | ``` 16 | -------------------------------------------------------------------------------- /examples/maml/sinusoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def build_equally_spaced_points(num_points, min_x, max_x): 5 | return np.linspace(min_x, max_x, num_points) 6 | 7 | 8 | def sample_random_points(RNG, num_points, min_x, max_x): 9 | return RNG.uniform(min_x, max_x, num_points) 10 | 11 | 12 | def sample_amplitude(RNG, min_amplitude=0.1, max_amplitude=5.0): 13 | return RNG.uniform(min_amplitude, max_amplitude) 14 | 15 | 16 | def sample_phase(RNG): 17 | return RNG.uniform(0, np.pi) 18 | 19 | 20 | def compute_sinusoid(x, amplitude, phase): 21 | return amplitude * np.sin(x - phase) 22 | 23 | 24 | def Sinusoid(RNG, num_points, min_amplitude=0.1, max_amplitude=5.0, 25 | min_x=-5.0, max_x=5.0): 26 | amplitude = sample_amplitude(RNG, min_amplitude, max_amplitude) 27 | phase = sample_phase(RNG) 28 | 29 | def sample(batch_size=None, equally_spaced=False): 30 | batch_size = num_points if batch_size is None else batch_size 31 | if equally_spaced: 32 | x_support = build_equally_spaced_points(batch_size, min_x, max_x) 33 | x_queries = build_equally_spaced_points(batch_size, min_x, max_x) 34 | else: 35 | x_support = sample_random_points(RNG, batch_size, min_x, max_x) 36 | x_queries = sample_random_points(RNG, batch_size, min_x, max_x) 37 | y_support = compute_sinusoid(x_support, amplitude, phase) 38 | y_queries = compute_sinusoid(x_queries, amplitude, phase) 39 | return ((x_support, y_support), (x_queries, y_queries)) 40 | return sample 41 | -------------------------------------------------------------------------------- /examples/mask_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/mask_rcnn/coco_demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow.keras.utils import get_file 5 | import cv2 6 | 7 | from paz.abstract import SequentialProcessor 8 | from paz.backend.image.opencv_image import load_image 9 | 10 | from mask_rcnn.model.model import MaskRCNN 11 | from mask_rcnn.datasets.shapes import Shapes 12 | 13 | from mask_rcnn.pipelines.detection import ResizeImages, NormalizeImages 14 | from mask_rcnn.pipelines.detection import Detect, PostprocessInputs 15 | 16 | from mask_rcnn.inference import test 17 | from mask_rcnn.utils import display_instances 18 | 19 | image_min_dim = 800 20 | image_max_dim = 1024 21 | image_scale = 0 22 | anchor_ratios = (32, 64, 128, 256, 512) 23 | images_per_gpu = 1 24 | num_classes = 81 25 | 26 | url = 'https://github.com/oarriaga/altamira-data/releases/tag/v0.18/' 27 | 28 | weights_local_path = os.path.join(os.getcwd() + '/mask_rcnn_coco.h5') 29 | image_local_path = os.path.join(os.getcwd() + '/television.jpeg') 30 | 31 | weights_path = get_file(weights_local_path, url + '/mask_rcnn_coco.h5') 32 | image_path = get_file(image_local_path, url + '/television.jpeg') 33 | 34 | image = load_image(image_path) 35 | 36 | class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 37 | 'bus', 'train', 'truck', 'boat', 'traffic light', 38 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 39 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 40 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 41 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 42 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 43 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 44 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 45 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 46 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 47 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 48 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 49 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 50 | 'teddy bear', 'hair drier', 'toothbrush'] 51 | 52 | results = test(image, weights_path, 128, num_classes, 1, images_per_gpu, 53 | anchor_ratios, [1024, 1024], 1) 54 | r = results[0] 55 | print(r) 56 | display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, 57 | r['scores']) 58 | -------------------------------------------------------------------------------- /examples/mask_rcnn/inference.py: -------------------------------------------------------------------------------- 1 | from paz.abstract import SequentialProcessor 2 | 3 | from mask_rcnn.model.model import MaskRCNN, norm_all_boxes 4 | from mask_rcnn.pipelines.detection import ResizeImages, NormalizeImages 5 | from mask_rcnn.pipelines.detection import Detect, PostprocessInputs 6 | 7 | 8 | def test(images, weights_path, ROIs_per_image, num_classes, batch_size, 9 | images_per_gpu, anchor_ratios, image_shape, min_image_scale): 10 | resize = SequentialProcessor([ResizeImages(image_shape[0], min_image_scale, 11 | image_shape[1])]) 12 | molded_images, windows = resize([images]) 13 | image_shape = molded_images[0].shape 14 | window = norm_all_boxes(windows[0], image_shape[:2]) 15 | 16 | base_model = MaskRCNN(model_dir='../../mask_rcnn', 17 | image_shape=image_shape, 18 | backbone="resnet101", 19 | batch_size=batch_size, images_per_gpu=images_per_gpu, 20 | RPN_anchor_scales=anchor_ratios, 21 | train_ROIs_per_image=ROIs_per_image, 22 | num_classes=num_classes, 23 | window=window) 24 | 25 | base_model.build_model(train=False) 26 | base_model.keras_model.load_weights(weights_path, by_name=True) 27 | preprocess = SequentialProcessor([ResizeImages(), NormalizeImages()]) 28 | postprocess = SequentialProcessor([PostprocessInputs()]) 29 | 30 | detect = Detect(base_model, anchor_ratios, images_per_gpu, preprocess, 31 | postprocess) 32 | results = detect([images]) 33 | return results 34 | -------------------------------------------------------------------------------- /examples/mask_rcnn/inference_shapes.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from paz.abstract import SequentialProcessor 6 | 7 | from mask_rcnn.model.model import MaskRCNN 8 | from mask_rcnn.datasets.shapes import Shapes 9 | 10 | from mask_rcnn.pipelines.detection import ResizeImages, NormalizeImages 11 | from mask_rcnn.pipelines.detection import Detect, PostprocessInputs 12 | 13 | from mask_rcnn.model.model import norm_all_boxes 14 | from mask_rcnn.utils import display_instances 15 | 16 | 17 | def test(image, weights_path): 18 | resize = SequentialProcessor([ResizeImages()]) 19 | molded_images, windows = resize([image]) 20 | image_shape = molded_images[0].shape 21 | window = norm_all_boxes(windows[0], image_shape[:2]) 22 | 23 | base_model = MaskRCNN(model_dir='../../mask_rcnn', 24 | image_shape=image_shape, 25 | backbone="resnet101", 26 | batch_size=1, images_per_gpu=1, 27 | RPN_anchor_scales=(8, 16, 32, 64, 128), 28 | train_ROIs_per_image=32, num_classes=4, 29 | window=window) 30 | 31 | base_model.build_model(train=False) 32 | base_model.keras_model.load_weights(weights_path, by_name=True) 33 | preprocess = SequentialProcessor([ResizeImages(), 34 | NormalizeImages()]) 35 | postprocess = SequentialProcessor([PostprocessInputs()]) 36 | detect = Detect(base_model, (8, 16, 32, 64, 128), 1, 37 | preprocess, postprocess) 38 | result = detect([image]) 39 | return result 40 | 41 | 42 | path = '/Users/poornimakaushik/Desktop/mask_rcnn/weights.20-0.43.hdf5' 43 | 44 | dataset_train = Shapes(1, (128, 128)) 45 | data = dataset_train.load_data() 46 | images = data[0]['image'] 47 | 48 | class_names = ['BG', 'Square', 'Circle', 'Triangle'] 49 | results = test(images, path) 50 | r = results[0] 51 | print(r) 52 | display_instances(images, r['rois'], r['masks'], r['class_ids'], class_names, 53 | r['scores']) 54 | -------------------------------------------------------------------------------- /examples/mask_rcnn/losses/proposal_class_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.keras.backend as K 3 | from keras.layers import Layer 4 | 5 | 6 | class ProposalClassLoss(tf.keras.losses.Loss): 7 | """Computes the loss for the Mask RCNN architecture's Region Proposal 8 | Network (RPN) class loss. This loss function calculates the RPN anchor 9 | classifier loss. 10 | 11 | Args: 12 | loss_weight: A float specifying the loss weight (default: 1.0) 13 | y_true: The ground truth tensor containing the anchor match type. 14 | Shape: [batch, anchors, 1]. Anchor match type. 1=positive, 15 | -1=negative, 0=neutral anchor. 16 | y_pred: The predicted tensor containing the RPN classifier logits for 17 | BG/FG. Shape: [batch, anchors, 2]. 18 | 19 | Returns: 20 | The computed loss value 21 | """ 22 | 23 | def __init__(self, loss_weight=1.0, name='rpn_class_loss', **kwargs): 24 | super().__init__(name=name, **kwargs) 25 | self.loss_weight = loss_weight 26 | 27 | def call(self, y_true, y_pred): 28 | y_true = tf.squeeze(y_true, -1) 29 | anchor_class = K.cast(K.equal(y_true, 1), tf.int32) 30 | indices = tf.compat.v1.where(K.not_equal(y_true, 0)) 31 | y_pred = tf.gather_nd(y_pred, indices) 32 | anchor_class = tf.gather_nd(anchor_class, indices) 33 | loss = K.sparse_categorical_crossentropy(target=anchor_class, 34 | output=y_pred, 35 | from_logits=True) 36 | loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) 37 | return loss 38 | -------------------------------------------------------------------------------- /examples/mask_rcnn/model/layers/class_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.keras.backend as K 3 | from tensorflow.keras.layers import Layer 4 | 5 | 6 | class ClassLoss(Layer): 7 | """Computes loss for Mask RCNN architecture, for MRCNN class loss 8 | Loss for the classifier head of Mask RCNN. 9 | 10 | # Arguments: 11 | target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero 12 | padding to fill in the array. 13 | pred_class_logits: [batch, num_rois, num_classes] 14 | active_class_ids: [batch, num_classes]. Has a value of 1 for 15 | classes that are in the dataset of the image, and 0 16 | for classes that are not in the dataset. 17 | 18 | # Returns: 19 | loss: class loss value 20 | """ 21 | 22 | def __init__(self, num_classes, loss_weight=1.0, name='mrcnn_class_loss', 23 | **kwargs): 24 | super().__init__(name=name, **kwargs) 25 | self.active_class_ids = tf.ones([num_classes], dtype=tf.int32) 26 | self.loss_weight = loss_weight 27 | 28 | def call(self, y_true, y_pred): 29 | pred_class_ids = tf.argmax(input=y_pred, axis=2) 30 | 31 | pred_active = tf.gather(self.active_class_ids, pred_class_ids) 32 | 33 | y_true = tf.cast(y_true, 'int64') 34 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, 35 | logits=y_pred) 36 | loss = loss * tf.cast(pred_active, 'float32') 37 | 38 | loss = tf.math.reduce_sum(loss) / (tf.math.reduce_sum( 39 | input_tensor=tf.cast(pred_active, 'float32'))) 40 | self.add_loss(loss * self.loss_weight) 41 | 42 | metric = (loss * self.loss_weight) 43 | self.add_metric(metric, name='mrcnn_class_loss', aggregation='mean') 44 | return loss 45 | -------------------------------------------------------------------------------- /examples/mask_rcnn/shapes_demo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from paz.abstract import SequentialProcessor 6 | 7 | from mask_rcnn.model.model import MaskRCNN 8 | from mask_rcnn.datasets.shapes import Shapes 9 | 10 | from mask_rcnn.pipelines.detection import ResizeImages, NormalizeImages 11 | from mask_rcnn.pipelines.detection import Detect, PostprocessInputs 12 | 13 | from mask_rcnn.inference import test 14 | from mask_rcnn.utils import display_instances 15 | 16 | 17 | image_min_dim = 128 18 | image_max_dim = 128 19 | image_scale = 0 20 | anchor_ratios = (8, 16, 32, 64, 128) 21 | images_per_gpu = 1 22 | num_classes = 4 23 | 24 | path = '' # Weights path 25 | 26 | dataset_train = Shapes(1, (128, 128)) 27 | data = dataset_train.load_data() 28 | images = data[0]['input_image'] 29 | 30 | class_names = ['BG', 'Square', 'Circle', 'Triangle'] 31 | results = test(images, path, 32, num_classes, 1, images_per_gpu, anchor_ratios, 32 | [image_max_dim, image_min_dim], image_scale) 33 | r = results[0] 34 | 35 | print(r) 36 | display_instances(images, r['rois'], r['masks'], r['class_ids'], class_names, 37 | r['scores']) 38 | 39 | -------------------------------------------------------------------------------- /examples/mask_rcnn/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/examples/mask_rcnn/tests/__init__.py -------------------------------------------------------------------------------- /examples/object_detection/VOC0712_downloader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | echo "Downloading VOC2007 trainval ..." 7 | # Download the data. 8 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 9 | echo "Downloading VOC2007 test data ..." 10 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 11 | echo "Done downloading." 12 | 13 | # Extract data 14 | echo "Extracting trainval ..." 15 | tar -xvf VOCtrainval_06-Nov-2007.tar 16 | echo "Extracting test ..." 17 | tar -xvf VOCtest_06-Nov-2007.tar 18 | echo "removing tars ..." 19 | rm VOCtrainval_06-Nov-2007.tar 20 | rm VOCtest_06-Nov-2007.tar 21 | 22 | end=`date +%s` 23 | runtime=$((end-start)) 24 | 25 | echo "Completed in" $runtime "seconds" 26 | 27 | 28 | start=`date +%s` 29 | 30 | # handle optional download dir 31 | 32 | echo "Downloading VOC2012 trainval ..." 33 | # Download the data. 34 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 35 | echo "Done downloading." 36 | 37 | 38 | # Extract data 39 | echo "Extracting trainval ..." 40 | tar -xvf VOCtrainval_11-May-2012.tar 41 | echo "removing tar ..." 42 | rm VOCtrainval_11-May-2012.tar 43 | 44 | end=`date +%s` 45 | runtime=$((end-start)) 46 | 47 | echo "Completed in" $runtime "seconds" 48 | -------------------------------------------------------------------------------- /examples/object_detection/debugger.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | gpus = tf.config.experimental.list_physical_devices('GPU') 3 | tf.config.experimental.set_memory_growth(gpus[0], True) 4 | 5 | 6 | import numpy as np 7 | from paz.models import SSD300 8 | from paz.datasets import VOC 9 | from paz.abstract import Processor, SequentialProcessor 10 | from paz import processors as pr 11 | from paz.pipelines import AugmentDetection 12 | 13 | 14 | class ShowBoxes(Processor): 15 | def __init__(self, class_names, prior_boxes, 16 | variances=[0.1, 0.1, 0.2, 0.2]): 17 | super(ShowBoxes, self).__init__() 18 | self.deprocess_boxes = SequentialProcessor([ 19 | pr.DecodeBoxes(prior_boxes, variances), 20 | pr.ToBoxes2D(class_names, True), 21 | pr.FilterClassBoxes2D(class_names[1:])]) 22 | self.denormalize_boxes2D = pr.DenormalizeBoxes2D() 23 | self.draw_boxes2D = pr.DrawBoxes2D(class_names) 24 | self.show_image = pr.ShowImage() 25 | self.resize_image = pr.ResizeImage((600, 600)) 26 | 27 | def call(self, image, boxes): 28 | image = self.resize_image(image) 29 | boxes2D = self.deprocess_boxes(boxes) 30 | boxes2D = self.denormalize_boxes2D(image, boxes2D) 31 | image = self.draw_boxes2D(image, boxes2D) 32 | image = (image + pr.BGR_IMAGENET_MEAN).astype(np.uint8) 33 | image = image[..., ::-1] 34 | self.show_image(image) 35 | return image, boxes2D 36 | 37 | 38 | size = 300 39 | split = 'train' 40 | epochs = 120 41 | batch_size = 30 42 | 43 | data_manager = VOC('VOCdevkit/') 44 | data = data_manager.load_data() 45 | 46 | class_names = data_manager.class_names 47 | # model = SSD300(base_weights='VGG', head_weights=None) 48 | model = SSD300() 49 | prior_boxes = model.prior_boxes 50 | 51 | testor_encoder = AugmentDetection(prior_boxes) 52 | testor_decoder = ShowBoxes(class_names, prior_boxes) 53 | sample_arg = 0 54 | for sample_arg in range(1000): 55 | sample = data[sample_arg] 56 | wrapped_outputs = testor_encoder(sample) 57 | print(wrapped_outputs['labels']) 58 | image = wrapped_outputs['inputs']['image'] 59 | boxes = wrapped_outputs['labels']['boxes'] 60 | image, boxes = testor_decoder(image, boxes) 61 | -------------------------------------------------------------------------------- /examples/object_detection/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from paz.pipelines import SSD300FAT, SSD300VOC, SSD512COCO, SSD512YCBVideo 4 | from paz.backend.camera import VideoPlayer, Camera 5 | import tensorflow as tf 6 | gpus = tf.config.experimental.list_physical_devices('GPU') 7 | tf.config.experimental.set_memory_growth(gpus[0], True) 8 | 9 | 10 | 11 | 12 | parser = argparse.ArgumentParser(description='SSD object detection demo') 13 | parser.add_argument('-c', '--camera_id', type=int, default=0, 14 | help='Camera device ID') 15 | parser.add_argument('-s', '--score_thresh', type=float, default=0.6, 16 | help='Box/class score threshold') 17 | parser.add_argument('-n', '--nms_thresh', type=float, default=0.45, 18 | help='non-maximum suppression threshold') 19 | parser.add_argument('-d', '--dataset', type=str, default='VOC', 20 | choices=['VOC', 'COCO', 'YCBVideo', 'FAT'], 21 | help='Dataset name') 22 | args = parser.parse_args() 23 | name_to_model = {'VOC': SSD300VOC, 24 | 'FAT': SSD300FAT, 25 | 'COCO': SSD512COCO, 26 | 'YCBVideo': SSD512YCBVideo} 27 | 28 | pipeline = name_to_model[args.dataset] 29 | detect = pipeline(args.score_thresh, args.nms_thresh) 30 | camera = Camera(args.camera_id) 31 | player = VideoPlayer((1280, 960), detect, camera) 32 | player.run() 33 | -------------------------------------------------------------------------------- /examples/object_detection/evaluate.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | 5 | import numpy as np 6 | from paz.datasets import VOC 7 | from paz.datasets import get_class_names 8 | from paz.evaluation import evaluateMAP 9 | from paz.models import SSD300 10 | from paz.pipelines import DetectSingleShot 11 | 12 | class_names = get_class_names() 13 | class_dict = { 14 | class_name: class_arg for class_arg, class_name in enumerate(class_names) 15 | } 16 | 17 | voc_root = './examples/object_detection/data/VOCdevkit' 18 | 19 | 20 | def test(weights_path): 21 | """ 22 | Arguments: 23 | weights_path: model path to be evaluated 24 | Returns: 25 | result: Dictionary of evaluation results 26 | """ 27 | score_thresh, nms_thresh, labels = 0.01, .45, get_class_names('VOC') 28 | 29 | model = SSD300() 30 | model.load_weights(weights_path) 31 | detector = DetectSingleShot(model, labels, score_thresh, nms_thresh) 32 | 33 | data_name = 'VOC2007' 34 | data_split = 'test' 35 | data_manager = VOC(voc_root, data_split, name=data_name, evaluate=True) 36 | dataset = data_manager.load_data() 37 | 38 | result = evaluateMAP( 39 | detector, 40 | dataset, 41 | class_dict, 42 | iou_thresh=0.5, 43 | use_07_metric=True) 44 | 45 | result_str = "mAP: {:.4f}\n".format(result["map"]) 46 | metrics = {'mAP': result["map"]} 47 | for arg, ap in enumerate(result["ap"]): 48 | if arg == 0 or np.isnan(ap): # skip background 49 | continue 50 | metrics[class_names[arg]] = ap 51 | result_str += "{:<16}: {:.4f}\n".format(class_names[arg], ap) 52 | print(result_str) 53 | 54 | 55 | description = 'Test script for single-shot object detection models' 56 | parser = argparse.ArgumentParser(description=description) 57 | parser.add_argument('-wp', '--weights_path', default=None, 58 | type=str, help='Path for model to be evaluated') 59 | args = parser.parse_args() 60 | 61 | test(args.weights_path) 62 | -------------------------------------------------------------------------------- /examples/pix2pose/README.md: -------------------------------------------------------------------------------- 1 | One needs to provide a path to a directory containing multiple png images. 2 | These images will be used as backgrounds to the domain randomization pipeline 3 | -------------------------------------------------------------------------------- /examples/pix2pose/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/examples/pix2pose/__init__.py -------------------------------------------------------------------------------- /examples/pix2pose/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from paz.applications import PIX2YCBTools6D 3 | from paz.backend.camera import VideoPlayer 4 | from paz.backend.camera import Camera 5 | 6 | 7 | parser = argparse.ArgumentParser(description='Object pose estimation') 8 | parser.add_argument('-c', '--camera_id', type=int, default=0, 9 | help='Camera device ID') 10 | args = parser.parse_args() 11 | 12 | camera = Camera(args.camera_id) 13 | pipeline = PIX2YCBTools6D(camera, offsets=[0.25, 0.25], epsilon=0.015) 14 | player = VideoPlayer((640, 480), pipeline, camera) 15 | player.run() 16 | -------------------------------------------------------------------------------- /examples/pix2pose/demo_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.keras.utils import get_file 3 | from paz.backend.image import load_image, show_image 4 | from paz.applications import PIX2YCBTools6D 5 | from paz.backend.camera import Camera 6 | 7 | 8 | URL = ('https://github.com/oarriaga/altamira-data/releases/download' 9 | '/v0.9.1/image_with_YCB_objects.jpg') 10 | filename = os.path.basename(URL) 11 | fullpath = get_file(filename, URL, cache_subdir='paz/tests') 12 | image = load_image(fullpath) 13 | camera = Camera() 14 | camera.intrinsics_from_HFOV(55, image.shape) 15 | 16 | detect = PIX2YCBTools6D(camera, offsets=[0.25, 0.25], epsilon=0.015) 17 | inferences = detect(image) 18 | 19 | image = inferences['image'] 20 | show_image(image) 21 | -------------------------------------------------------------------------------- /examples/pix2pose/legacy/demo_image.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz.models import UNET_VGG16 3 | from paz.backend.image import show_image, load_image 4 | from paz.backend.camera import Camera 5 | from paz.pipelines import DetectSingleShot 6 | from paz.models import SSD300 7 | 8 | from pipelines import MultiPix2Pose 9 | 10 | 11 | image_path = 'images/lab_condition.png' 12 | epsilon = 0.001 13 | score_thresh = 0.50 14 | offsets = [0.2, 0.2] 15 | nms_thresh = 0.45 16 | 17 | image_shape = (128, 128, 3) 18 | num_classes = 3 19 | camera = Camera(device_id=0) 20 | image = load_image(image_path) 21 | image_size = image.shape[0:2] 22 | focal_length = image_size[1] 23 | image_center = (image_size[1] / 2.0, image_size[0] / 2.0) 24 | camera.distortion = np.zeros((4)) 25 | camera.intrinsics = np.array([[focal_length, 0, image_center[0]], 26 | [0, focal_length, image_center[1]], 27 | [0, 0, 1]]) 28 | 29 | class_names = ['background', 'Large_clamp', 'flat_screwdriver', 30 | 'hammer', 'Solar_panel', 'power_drill'] 31 | detection = SSD300(len(class_names), head_weights=None) 32 | detection.load_weights('weights/SSD300_weights_.53-1.40.hdf5') 33 | detect = DetectSingleShot(detection, class_names, score_thresh, 34 | nms_thresh, draw=False) 35 | 36 | name_to_sizes = { 37 | 'power_drill': np.array([1840, 1870, 520]), 38 | 'Solar_panel': np.array([15000, 15000, 2000]), 39 | 'Large_clamp': np.array([12000, 17100, 3900]), 40 | 'hammer': np.array([18210, 33272, 3280])} 41 | 42 | 43 | name_to_weights = { 44 | 'power_drill': 'weights/UNET_weights_epochs-10_beta-3.hdf5', 45 | 'Solar_panel': 'weights/UNET-VGG_solar_panel_canonical_13.hdf5', 46 | 'Large_clamp': 'weights/UNET-VGG_large_clamp_canonical_10.hdf5', 47 | 'hammer': 'weights/UNET-VGG16_weights_hammer_10.hdf5'} 48 | 49 | 50 | segment = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) 51 | valid_class_names = ['power_drill', 'Solar_panel', 'Large_clamp', 'hammer'] 52 | 53 | pipeline = MultiPix2Pose(detect, segment, camera, name_to_weights, 54 | name_to_sizes, valid_class_names, offsets, 55 | epsilon, draw=True) 56 | 57 | results = pipeline(image) 58 | predicted_image = results['image'] 59 | show_image(predicted_image) 60 | -------------------------------------------------------------------------------- /examples/pix2pose/legacy/metrics.py: -------------------------------------------------------------------------------- 1 | from loss import compute_weighted_reconstruction_loss_with_error 2 | from loss import compute_error_prediction_loss 3 | from loss import compute_weighted_reconstruction_loss 4 | import tensorflow as tf 5 | 6 | 7 | def error_prediction(RGBA_true, RGBE_pred, beta=3.0): 8 | return compute_error_prediction_loss(RGBA_true, RGBE_pred) 9 | 10 | 11 | def mean_squared_error(y_true, y_pred): 12 | squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred[:, :, :, 0:3]) 13 | return tf.reduce_mean(squared_difference, axis=-1) 14 | 15 | 16 | def weighted_reconstruction_wrapper(beta=3.0, with_error=False): 17 | if with_error: 18 | def weighted_reconstruction(y_true, y_pred): 19 | return compute_weighted_reconstruction_loss_with_error( 20 | y_true, y_pred, beta) 21 | else: 22 | def weighted_reconstruction(y_true, y_pred): 23 | return compute_weighted_reconstruction_loss(y_true, y_pred, beta) 24 | return weighted_reconstruction 25 | -------------------------------------------------------------------------------- /examples/pix2pose/legacy/processors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz.abstract import Processor 3 | from paz.backend.keypoints import project_points3D 4 | from paz.backend.keypoints import build_cube_points3D 5 | from paz.backend.image import draw_cube 6 | from paz.processors import DrawBoxes3D 7 | 8 | 9 | class DrawBoxes3D(Processor): 10 | def __init__(self, camera, class_to_dimensions, thickness=1): 11 | """Draw boxes 3D of multiple objects 12 | 13 | # Arguments 14 | camera: Instance of ``paz.backend.camera.Camera''. 15 | class_to_dimensions: Dictionary that has as keys the 16 | class names and as value a list [model_height, model_width] 17 | thickness: Int. Thickness of 3D box 18 | """ 19 | super(DrawBoxes3D, self).__init__() 20 | self.camera = camera 21 | self.class_to_dimensions = class_to_dimensions 22 | self.class_to_points = self._build_points(self.class_to_dimensions) 23 | self.thickness = thickness 24 | 25 | def _build_points(self, class_to_dimensions): 26 | class_to_cube3D = {} 27 | print(class_to_dimensions) 28 | for class_name, dimensions in class_to_dimensions.items(): 29 | width, height, depth = dimensions 30 | cube_points3D = build_cube_points3D(width, height, depth) 31 | class_to_cube3D[class_name] = cube_points3D 32 | return class_to_cube3D 33 | 34 | def call(self, image, pose6D): 35 | points3D = self.class_to_points[pose6D.class_name] 36 | points2D = project_points3D(points3D, pose6D, self.camera) 37 | points2D = points2D.astype(np.int32) 38 | draw_cube(image, points2D, thickness=self.thickness) 39 | return image 40 | -------------------------------------------------------------------------------- /examples/pix2pose/models/discriminator.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.models import Model 2 | from tensorflow.keras.layers import (Conv2D, BatchNormalization, LeakyReLU, 3 | Input, Flatten, Dense) 4 | 5 | 6 | def convolution_block(x, filters): 7 | x = Conv2D(filters, (3, 3), strides=(2, 2), padding='same')(x) 8 | x = BatchNormalization()(x) 9 | x = LeakyReLU(0.2)(x) 10 | return x 11 | 12 | 13 | def Discriminator(input_shape=(128, 128, 3), name='PIX2POSE_DISCRIMINATOR'): 14 | input_image = Input(input_shape, name='input_image') 15 | x = convolution_block(input_image, 64) 16 | for filters in [128, 256, 512, 512, 512, 512]: 17 | x = convolution_block(x, filters) 18 | flatten = Flatten()(x) 19 | x = Dense(1, activation='sigmoid', name='discriminator_output')(flatten) 20 | model = Model(input_image, x, name=name) 21 | return model 22 | 23 | 24 | model = Discriminator() 25 | assert model.count_params() == 8640897 26 | assert model.output_shape == (None, 1) 27 | assert model.input_shape == (None, 128, 128, 3) 28 | -------------------------------------------------------------------------------- /examples/pix2pose/models/fully_convolutional_net.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.models import Model 2 | from tensorflow.keras.layers import ( 3 | Input, Conv2D, Activation, LeakyReLU, BatchNormalization) 4 | 5 | 6 | def block(x, filters, dilation_rate, alpha): 7 | x = Conv2D(filters, (3, 3), dilation_rate=dilation_rate, padding='same')(x) 8 | x = BatchNormalization()(x) 9 | x = LeakyReLU(alpha)(x) 10 | return x 11 | 12 | 13 | def FullyConvolutionalNet(num_classes, input_shape, filters=64, alpha=0.1): 14 | """Fully convolutional network for segmentation. 15 | 16 | # Arguments 17 | num_classes: Int. Number of output channels. 18 | input_shape: List of integers indicating ``[H, W, num_channels]``. 19 | filters: Int. Number of filters used in convolutional layers. 20 | alpha: Float. Alpha parameter of leaky relu. 21 | 22 | # Returns 23 | Keras/tensorflow model 24 | 25 | # References 26 | - [Discovery of Latent 3D Keypoints via End-to-end 27 | Geometric Reasoning](https://arxiv.org/abs/1807.03146) 28 | """ 29 | base = inputs = Input(input_shape, name='image') 30 | for base_arg, rate in enumerate([1, 1, 2, 4, 8, 16, 1, 2, 4, 8, 16, 1]): 31 | base = block(base, filters, (rate, rate), alpha) 32 | x = Conv2D(num_classes, (3, 3), padding='same')(base) 33 | outputs = Activation('softmax', name='masks')(x) 34 | model = Model(inputs, outputs, name='FULLY_CONVOLUTIONAL_NET') 35 | return model 36 | -------------------------------------------------------------------------------- /examples/pix2pose/pipelines.py: -------------------------------------------------------------------------------- 1 | from paz import processors as pr 2 | from paz.pipelines import RandomizeRenderedImage as RandomizeRender 3 | 4 | 5 | class DomainRandomization(pr.SequentialProcessor): 6 | """Performs domain randomization on a rendered image 7 | """ 8 | def __init__(self, renderer, image_shape, image_paths, inputs_to_shape, 9 | labels_to_shape, num_occlusions=1): 10 | super(DomainRandomization, self).__init__() 11 | H, W = image_shape[:2] 12 | self.add(pr.Render(renderer)) 13 | self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) 14 | self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) 15 | self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) 16 | self.add(pr.SequenceWrapper({0: inputs_to_shape}, 17 | {1: labels_to_shape})) 18 | -------------------------------------------------------------------------------- /examples/pix2pose/scenes/__init__.py: -------------------------------------------------------------------------------- 1 | from .canonical_pose_pixel_mask_renderer import CanonicalPosePixelMaskRenderer 2 | from .pixel_mask_renderer import PixelMaskRenderer 3 | -------------------------------------------------------------------------------- /examples/probabilistic_keypoint_estimation/README.md: -------------------------------------------------------------------------------- 1 | In this example we show how to train a model using tensorflow-probability. 2 | 3 | Please install kaggle python module to automatically download datasets 4 | 5 | Make sure to have an account and have set up the API token 6 | 7 | And to have accepted the rules of the competition (you would recieve a 403 error in case you have not) 8 | 9 | ```bash 10 | kaggle competitions download -c facial-keypoints-detection 11 | ``` 12 | 13 | -------------------------------------------------------------------------------- /examples/probabilistic_keypoint_estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/examples/probabilistic_keypoint_estimation/__init__.py -------------------------------------------------------------------------------- /examples/probabilistic_keypoint_estimation/dataset_downloader.sh: -------------------------------------------------------------------------------- 1 | echo "Creating default directory for dataset ..." 2 | mkdir dataset 3 | 4 | echo "Entering new directory ..." 5 | cd dataset/ 6 | 7 | echo "Using kaggle API to download dataset..." 8 | echo "Make sure to have installed kaggle-API, set-up kaggle API Token and have accepted the rules of the Facial Keypoints Detection challenge" 9 | kaggle competitions download -c facial-keypoints-detection 10 | 11 | echo "Unzipping downloaded dataset" 12 | unzip facial-keypoints-detection.zip 13 | 14 | echo "Unzipping train split" 15 | unzip training.zip 16 | 17 | echo "Unzipping test split" 18 | unzip test.zip 19 | 20 | echo "Removing zip files" 21 | rm facial-keypoints-detection.zip 22 | rm training.zip 23 | rm test.zip 24 | -------------------------------------------------------------------------------- /examples/probabilistic_keypoint_estimation/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | 5 | from paz.backend.camera import Camera 6 | from paz.backend.camera import VideoPlayer 7 | from paz.models import HaarCascadeDetector 8 | 9 | from model import GaussianMixtureModel 10 | from pipelines import ProbabilisticKeypointPrediction 11 | 12 | 13 | description = 'Demo script for running 2D probabilistic keypoints' 14 | parser = argparse.ArgumentParser(description=description) 15 | parser.add_argument('-f', '--filters', default=8, type=int, 16 | help='Number of filters in convolutional blocks') 17 | parser.add_argument('-nk', '--num_keypoints', default=15, type=int, 18 | help='Number of keypoints') 19 | parser.add_argument('-is', '--image_size', default=96, type=int, 20 | help='Image size') 21 | parser.add_argument('-c', '--camera_id', type=int, default=0, 22 | help='Camera device ID') 23 | parser.add_argument('-d', '--detector_name', type=str, 24 | default='frontalface_default') 25 | parser.add_argument('-s', '--save_path', 26 | default=os.path.join( 27 | os.path.expanduser('~'), '.keras/paz/models'), 28 | type=str, help='Path for writing model weights and logs') 29 | args = parser.parse_args() 30 | 31 | # instantiate model 32 | batch_shape = (1, args.image_size, args.image_size, 1) 33 | model = GaussianMixtureModel(batch_shape, args.num_keypoints, args.filters) 34 | model.summary() 35 | 36 | # loading weights 37 | model_name = ['FaceKP', model.name, str(args.filters), str(args.num_keypoints)] 38 | model_name = '_'.join(model_name) 39 | save_path = os.path.join(args.save_path, model_name) 40 | model_path = os.path.join(save_path, '%s_weights.hdf5' % model_name) 41 | model.load_weights(model_path) 42 | model.compile(run_eagerly=False) 43 | 44 | model.predict(np.zeros((1, 96, 96, 1))) # first prediction takes a while... 45 | # setting detector 46 | detector = HaarCascadeDetector(args.detector_name, 0) 47 | 48 | # setting prediction pipeline 49 | pipeline = ProbabilisticKeypointPrediction(detector, model) 50 | 51 | # setting camera and video player 52 | camera = Camera(args.camera_id) 53 | player = VideoPlayer((640, 480), pipeline, camera) 54 | player.run() 55 | -------------------------------------------------------------------------------- /examples/probabilistic_keypoint_estimation/demo_image.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pipelines import DetectGMMKeypointNet2D 3 | from paz.backend.image import show_image, load_image 4 | 5 | description = 'Demo for visualizing uncertainty in probabilistic keypoints' 6 | parser = argparse.ArgumentParser(description=description) 7 | parser.add_argument('-p', '--path', type=str, help='Path to image') 8 | args = parser.parse_args() 9 | 10 | pipeline = DetectGMMKeypointNet2D() 11 | image = load_image(args.path) 12 | inferences = pipeline(image) 13 | show_image(inferences['image']) 14 | show_image(inferences['contours'][0]) 15 | -------------------------------------------------------------------------------- /examples/probabilistic_keypoint_estimation/facial_keypoints.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from paz.abstract import Loader 5 | 6 | 7 | class FacialKeypoints(Loader): 8 | def __init__(self, path, split='train'): 9 | split_to_filename = {'train': 'training.csv', 'test': 'test.csv'} 10 | filename = split_to_filename[split] 11 | path = os.path.join(path, filename) 12 | super(FacialKeypoints, self).__init__( 13 | path, split, None, 'FacialKeypoints') 14 | 15 | def _load_faces(self, data_frame): 16 | faces = np.zeros((len(data_frame), 96, 96)) 17 | for arg, face in enumerate(data_frame.Image): 18 | faces[arg] = np.array(face.split(' '), dtype=int).reshape(96, 96) 19 | return faces 20 | 21 | def _load_keypoints(self, data_frame): 22 | keypoints = np.zeros((len(data_frame), 15, 2)) 23 | for arg, keypoint_set in data_frame.iloc[:, :-1].iterrows(): 24 | keypoints[arg] = keypoint_set.to_numpy().reshape(15, 2) 25 | return keypoints 26 | 27 | def _to_list_of_dictionaries(self, faces, keypoints=None): 28 | dataset = [] 29 | for arg in range(len(faces)): 30 | face, sample = faces[arg], {} 31 | sample['image'] = face 32 | if keypoints is not None: 33 | sample['keypoints'] = keypoints[arg] 34 | dataset.append(sample) 35 | return dataset 36 | 37 | def load_data(self): 38 | data_frame = pd.read_csv(self.path) 39 | data_frame.fillna(method='ffill', inplace=True) 40 | faces = self._load_faces(data_frame) 41 | if self.split == 'train': 42 | keypoints = self._load_keypoints(data_frame) 43 | dataset = self._to_list_of_dictionaries(faces, keypoints) 44 | else: 45 | dataset = self._to_list_of_dictionaries(faces, None) 46 | return dataset 47 | 48 | 49 | if __name__ == '__main__': 50 | path = 'dataset/' 51 | split = 'train' 52 | data_manager = FacialKeypoints(path, split) 53 | dataset = data_manager.load_data() 54 | -------------------------------------------------------------------------------- /examples/prototypical_networks/README.md: -------------------------------------------------------------------------------- 1 | # Prototypical networks implementation 2 | 3 | ## Dataset 4 | Download the dataset calling: 5 | 6 | `./download_omniglot.sh` 7 | 8 | ## Training 9 | Train dataset: 10 | `python train.py` 11 | -------------------------------------------------------------------------------- /examples/semantic_segmentation/backend.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from paz.backend.image.draw import put_text, draw_rectangle 4 | from paz.backend.image.draw import GREEN 5 | 6 | 7 | def draw_box(image, coordinates, class_name, score, 8 | color=GREEN, scale=0.7, weighted=False): 9 | x_min, y_min, x_max, y_max = coordinates 10 | if weighted: 11 | color = [int(channel * score) for channel in color] 12 | text = '{:0.2f}, {}'.format(score, class_name) 13 | put_text(image, text, (x_min, y_min - 10), scale, color, 1) 14 | draw_rectangle(image, (x_min, y_min), (x_max, y_max), color, 2) 15 | return image 16 | 17 | 18 | def resize_image_with_nearest_neighbors(image, size): 19 | """Resize image using nearest neighbors interpolation. 20 | 21 | # Arguments 22 | image: Numpy array. 23 | size: List of two ints. 24 | 25 | # Returns 26 | Numpy array. 27 | """ 28 | if(type(image) != np.ndarray): 29 | raise ValueError( 30 | 'Recieved Image is not of type numpy array', type(image)) 31 | else: 32 | return cv2.resize(image, size, interpolation=cv2.INTER_NEAREST) 33 | -------------------------------------------------------------------------------- /examples/semantic_segmentation/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import argparse 4 | gpus = tf.config.experimental.list_physical_devices('GPU') 5 | tf.config.experimental.set_memory_growth(gpus[0], True) 6 | 7 | from paz.datasets import CityScapes 8 | from paz import processors as pr 9 | from paz.backend.image import resize_image, load_image, show_image 10 | from processors import Round, MasksToColors 11 | from processors import ResizeImageWithNearestNeighbors 12 | from paz.models import UNET_VGG16 13 | 14 | 15 | description = 'Training script for semantic segmentation' 16 | parser = argparse.ArgumentParser(description=description) 17 | parser.add_argument('--label_path', type=str, help='Path to labels') 18 | parser.add_argument('--image_path', type=str, help='Path to images') 19 | parser.add_argument('--weights_path', type=str, help='Path to weights') 20 | args = parser.parse_args() 21 | 22 | data_manager = CityScapes(args.image_path, args.label_path, 'test') 23 | data = data_manager.load_data() 24 | 25 | 26 | class PostprocessSegmentation(pr.SequentialProcessor): 27 | def __init__(self, model, colors=None): 28 | super(PostprocessSegmentation, self).__init__() 29 | self.add(pr.UnpackDictionary(['image_path'])) 30 | self.add(pr.LoadImage()) 31 | self.add(pr.ResizeImage(model.input_shape[1:3])) 32 | self.add(pr.ConvertColorSpace(pr.RGB2BGR)) 33 | self.add(pr.SubtractMeanImage(pr.BGR_IMAGENET_MEAN)) 34 | self.add(pr.ExpandDims(0)) 35 | self.add(pr.Predict(model)) 36 | self.add(pr.Squeeze(0)) 37 | self.add(Round()) 38 | self.add(MasksToColors(model.output_shape[-1], colors)) 39 | self.add(pr.DenormalizeImage()) 40 | self.add(pr.CastImage('uint8')) 41 | self.add(ResizeImageWithNearestNeighbors((1024, 512))) 42 | # self.add(pr.ShowImage()) 43 | 44 | 45 | num_classes = len(data_manager.class_names) 46 | input_shape = (128, 128, 3) 47 | model = UNET_VGG16(num_classes, input_shape, 'imagenet', activation='softmax') 48 | post_process = PostprocessSegmentation(model) 49 | model.load_weights(args.weights_path) 50 | 51 | for sample in data: 52 | masks = post_process(sample) 53 | image = load_image(sample['image_path']) 54 | image = resize_image(image, (1024, 512)) 55 | image_with_masks = ((0.6 * image) + (0.4 * masks)).astype("uint8") 56 | # image_and_masks = np.concatenate([image, masks], axis=1) 57 | show_image(image_with_masks) 58 | -------------------------------------------------------------------------------- /examples/semantic_segmentation/shapes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from backend import draw_box 3 | from paz.datasets import Shapes 4 | 5 | if __name__ == '__main__': 6 | from paz.backend.image import show_image 7 | data_manager = Shapes(1000, (128, 128), iou_thresh=0.3, max_num_shapes=3) 8 | dataset = data_manager.load_data() 9 | for sample in dataset: 10 | image = sample['image'] 11 | masks = (sample['masks'] * 255.0).astype('uint8') 12 | background_mask, masks = masks[..., 0:1], masks[..., 1:] 13 | background_mask = np.repeat(background_mask, 3, axis=-1) 14 | boxes = sample['box_data'] 15 | for box in boxes: 16 | coordinates, class_arg = box[:4], box[4] 17 | # coordinates = denormalize_box(coordinates, (128, 128)) 18 | class_name = data_manager.arg_to_name[class_arg] 19 | image = draw_box(image, coordinates, class_name, 1.0) 20 | show_image(np.concatenate([image, masks, background_mask], axis=1)) 21 | -------------------------------------------------------------------------------- /examples/semantic_segmentation/test_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz.datasets import CityScapes 3 | from pipelines import PreprocessSegmentationIds 4 | from pipelines import PostprocessSegmentationIds 5 | from pipelines import PostProcessImage 6 | 7 | 8 | if __name__ == "__main__": 9 | from paz.backend.image import show_image 10 | 11 | label_path = '/home/octavio/Downloads/dummy/gtFine/' 12 | # label_path = '/home/octavio/Downloads/dummy/gtCoarse/' 13 | image_path = '/home/octavio/Downloads/dummy/RGB_images/leftImg8bit/' 14 | data_manager = CityScapes(image_path, label_path, 'train') 15 | dataset = data_manager.load_data() 16 | class_names = data_manager.class_names 17 | num_classes = len(class_names) 18 | preprocess = PreprocessSegmentationIds((128, 128), num_classes) 19 | postprocess_masks = PostprocessSegmentationIds(num_classes) 20 | postprocess_image = PostProcessImage() 21 | for sample in dataset: 22 | preprocessed_sample = preprocess(sample) 23 | image = preprocessed_sample['inputs']['input_1'] 24 | image = postprocess_image(image) 25 | masks = preprocessed_sample['labels']['masks'] 26 | masks = postprocess_masks(masks) 27 | mask_and_image = np.concatenate([masks, image], axis=1) 28 | show_image(mask_and_image) 29 | -------------------------------------------------------------------------------- /examples/semantic_segmentation/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | gpus = tf.config.experimental.list_physical_devices('GPU') 4 | tf.config.experimental.set_memory_growth(gpus[0], True) 5 | 6 | from tensorflow.keras.optimizers import Adam 7 | from tensorflow.keras.callbacks import CSVLogger, EarlyStopping 8 | from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau 9 | from paz.abstract import ProcessingSequence 10 | from paz.optimization import DiceLoss, JaccardLoss, FocalLoss 11 | from paz.models import UNET_VGG16 12 | # from paz import processors as pr 13 | from paz.datasets import Shapes 14 | from pipelines import PreprocessSegmentation 15 | from pipelines import PostprocessSegmentation 16 | 17 | num_classes = 3 18 | input_shape = (128, 128, 3) 19 | # softmax requires a background class and a background mask 20 | activation = 'softmax' 21 | # activation = 'sigmoid' 22 | num_samples = 1000 23 | iou_thresh = 0.3 24 | max_num_shapes = 3 25 | metrics = ['mean_squared_error'] 26 | # loss = JaccardLoss() 27 | loss = [DiceLoss(), JaccardLoss(), FocalLoss()] 28 | H, W = image_shape = input_shape[:2] 29 | batch_size = 5 30 | epochs = 10 31 | freeze = True 32 | stop_patience = 5 33 | reduce_patience = 2 34 | experiment_path = 'experiments/' 35 | 36 | data_manager = Shapes(num_samples, image_shape, iou_thresh=iou_thresh, 37 | max_num_shapes=max_num_shapes) 38 | num_classes = data_manager.num_classes 39 | data = data_manager.load_data() 40 | processor = PreprocessSegmentation(image_shape, num_classes) 41 | 42 | # setting additional callbacks 43 | callbacks = [] 44 | log_filename = os.path.join(experiment_path, 'optimization.log') 45 | log = CSVLogger(log_filename) 46 | stop = EarlyStopping('loss', patience=stop_patience) 47 | save_filename = os.path.join(experiment_path, 'model.hdf5') 48 | save = ModelCheckpoint(save_filename, 'loss', save_best_only=True) 49 | plateau = ReduceLROnPlateau('loss', patience=reduce_patience) 50 | callbacks.extend([log, stop, save, plateau]) 51 | 52 | model = UNET_VGG16(num_classes, input_shape, 'imagenet', freeze, activation) 53 | sequence = ProcessingSequence(processor, batch_size, data) 54 | optimizer = Adam() 55 | model.compile(optimizer, loss, metrics) 56 | model.summary() 57 | model.fit(sequence, batch_size=batch_size, epochs=epochs, callbacks=callbacks) 58 | 59 | colors = [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]] 60 | postprocess = PostprocessSegmentation(model, colors) 61 | for sample in data: 62 | image = sample['image'] 63 | postprocess(image) 64 | -------------------------------------------------------------------------------- /examples/spatial_transfomer_networks/STN.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.models import Model 2 | from tensorflow.keras.layers import Input, Activation, MaxPool2D, Flatten 3 | from tensorflow.keras.layers import Conv2D, Dense 4 | 5 | from layers import BilinearInterpolation 6 | import numpy as np 7 | 8 | 9 | def get_initial_weights(output_size): 10 | b = np.zeros((2, 3), dtype='float32') 11 | b[0, 0] = 1 12 | b[1, 1] = 1 13 | W = np.zeros((output_size, 6), dtype='float32') 14 | weights = [W, b.flatten()] 15 | return weights 16 | 17 | 18 | def STN(input_shape=(60, 60, 1), interpolation_size=(30, 30), num_classes=10): 19 | image = Input(shape=input_shape) 20 | x = MaxPool2D(pool_size=(2, 2))(image) 21 | x = Conv2D(20, (5, 5))(x) 22 | x = MaxPool2D(pool_size=(2, 2))(x) 23 | x = Conv2D(20, (5, 5))(x) 24 | x = Flatten()(x) 25 | x = Dense(50)(x) 26 | x = Activation('relu')(x) 27 | x = Dense(6, weights=get_initial_weights(50))(x) 28 | interpolated_image = BilinearInterpolation(interpolation_size)([image, x]) 29 | x = Conv2D(32, (3, 3), padding='same')(interpolated_image) 30 | x = Activation('relu')(x) 31 | x = MaxPool2D(pool_size=(2, 2))(x) 32 | x = Conv2D(32, (3, 3))(x) 33 | x = Activation('relu')(x) 34 | x = MaxPool2D(pool_size=(2, 2))(x) 35 | x = Flatten()(x) 36 | x = Dense(256)(x) 37 | x = Activation('relu')(x) 38 | x = Dense(num_classes)(x) 39 | x = Activation('softmax', name='label')(x) 40 | return Model(image, [x, interpolated_image], name='STN') 41 | -------------------------------------------------------------------------------- /examples/spatial_transfomer_networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/examples/spatial_transfomer_networks/__init__.py -------------------------------------------------------------------------------- /examples/spatial_transfomer_networks/cluttered_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ClutteredMNIST(object): 5 | def __init__(self, dataset_path): 6 | self.dataset_path = dataset_path 7 | 8 | def to_categorical(self, y, num_classes=None): 9 | y = np.array(y, dtype='int').ravel() 10 | if not num_classes: 11 | num_classes = np.max(y) + 1 12 | n = y.shape[0] 13 | categorical = np.zeros((n, num_classes)) 14 | categorical[np.arange(n), y] = 1 15 | return categorical 16 | 17 | def load(self): 18 | num_classes = 10 19 | data = np.load(self.dataset_path) 20 | x_train = data['x_train'] 21 | x_train = x_train.reshape((x_train.shape[0], 60, 60, 1)) 22 | y_train = np.argmax(data['y_train'], axis=-1) 23 | y_train = self.to_categorical(y_train, num_classes) 24 | train_data = (x_train, y_train) 25 | 26 | x_val = data['x_valid'] 27 | x_val = x_val.reshape((x_val.shape[0], 60, 60, 1)) 28 | y_val = np.argmax(data['y_valid'], axis=-1) 29 | y_val = self.to_categorical(y_val, num_classes) 30 | val_data = (x_val, y_val) 31 | 32 | x_test = data['x_test'] 33 | x_test = x_test.reshape((x_test.shape[0], 60, 60, 1)) 34 | y_test = np.argmax(data['y_test'], axis=-1) 35 | y_test = self.to_categorical(y_test, num_classes) 36 | test_data = (x_test, y_test) 37 | return(train_data, val_data, test_data) 38 | -------------------------------------------------------------------------------- /examples/spatial_transfomer_networks/train.py: -------------------------------------------------------------------------------- 1 | from cluttered_mnist import ClutteredMNIST 2 | from STN import STN 3 | from paz.backend.image import write_image 4 | 5 | 6 | dataset_path = "mnist_cluttered_60x60_6distortions.npz" 7 | batch_size = 256 8 | num_epochs = 10 9 | save_path = '' 10 | 11 | data_manager = ClutteredMNIST(dataset_path) 12 | train_data, val_data, test_data = data_manager.load() 13 | x_train, y_train = train_data 14 | 15 | 16 | model = STN() 17 | model.compile(loss={'label': 'categorical_crossentropy'}, optimizer='adam') 18 | model.summary() 19 | 20 | 21 | def plot_predictions(samples): 22 | (lables, interpolations) = model.predict(samples) 23 | for arg, images in enumerate(zip(interpolations, samples)): 24 | interpolated, image = images 25 | interpolated = (interpolated * 255).astype('uint8') 26 | image = (image * 255).astype('uint8') 27 | write_image('images/interpolated_image_%03d.png' % arg, interpolated) 28 | write_image('images/original_image_%03d.png' % arg, image) 29 | 30 | 31 | model.fit(x_train, y_train, batch_size, num_epochs, validation_data=val_data) 32 | plot_predictions(test_data[0][:9]) 33 | -------------------------------------------------------------------------------- /examples/structure_from_motion/bundle_adjustment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import least_squares 3 | from paz.backend.keypoints import project_to_image 4 | from paz.backend.groups import rotation_matrix_to_compact_axis_angle 5 | from paz.backend.groups import rotation_vector_to_rotation_matrix 6 | 7 | 8 | def residuals(camera_pose, points3D, points2D, camera_intrinsics): 9 | rotation = camera_pose[:3] 10 | rotation = rotation_vector_to_rotation_matrix(rotation) 11 | translation = camera_pose[3: 6] 12 | project2D = project_to_image(rotation, translation, points3D, 13 | camera_intrinsics) 14 | joints_distance = np.linalg.norm(points2D - project2D, axis=1) 15 | return joints_distance 16 | 17 | 18 | def local_bundle_adjustment(rotation, translation, points3D, points2D, 19 | camera_intrinsics): 20 | num_points = points3D.shape[0] 21 | axis_angle = rotation_matrix_to_compact_axis_angle(rotation) 22 | camera_pose = np.concatenate([axis_angle, translation.reshape(-1)]) 23 | param_init = np.hstack((camera_pose, points3D.ravel())) 24 | 25 | result = least_squares(residuals, param_init, 26 | args=(points3D, points2D, camera_intrinsics)) 27 | 28 | optimized_params = result.x 29 | 30 | # Extract the optimized camera poses and 3D points 31 | optimized_camera_poses = optimized_params[:6] 32 | optimized_point_cloud = optimized_params[6:].reshape((num_points, 3)) 33 | 34 | return optimized_point_cloud, optimized_camera_poses 35 | -------------------------------------------------------------------------------- /examples/structure_from_motion/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from paz.backend.image import load_image 4 | from pipeline_cv2 import StructureFromMotion 5 | # from pipeline_np import StructureFromMotion 6 | import numpy as np 7 | from backend import camera_intrinsics_from_dfov 8 | 9 | 10 | parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') 11 | root = os.path.expanduser('~/DFKI/paz/examples/structure_from_motion/datasets') 12 | parser.add_argument('-c', '--camera_id', type=int, default=0, 13 | help='Camera device ID') 14 | parser.add_argument('-i', '--images_path', type=str, 15 | default='datasets/SixDPose/cheezIt_textured', 16 | # default='datasets/SixDPose/cheezIt', 17 | # default='datasets/images1', 18 | help='Directory for images') 19 | parser.add_argument('-DFOV', '--diagonal_field_of_view', type=float, 20 | default=54, help='Diagonal field of view in degrees') 21 | args = parser.parse_args() 22 | 23 | 24 | camera_intrinsics = np.array([[568.996140852, 0, 643.21055941], 25 | [0, 568.988362396, 477.982801038], 26 | [0, 0, 1]]) 27 | 28 | images = [] 29 | 30 | # image_files = os.listdir(args.images_path) 31 | # for filename in image_files: 32 | # image = load_image(os.path.join(args.images_path, filename)) 33 | # images.append(image) 34 | 35 | # detect = StructureFromMotion(camera_intrinsics) 36 | # inferences = detect(images) 37 | 38 | 39 | # for custom objects 40 | image_files = os.listdir(args.images_path) 41 | image_files = sorted(image_files, key=lambda f: int(f.split('.')[0])) 42 | for filename in image_files: 43 | image = load_image(os.path.join(args.images_path, filename)) 44 | images.append(image) 45 | 46 | H, W = images[0].shape[:2] 47 | camera_intrinsics = camera_intrinsics_from_dfov( 48 | args.diagonal_field_of_view, H, W) 49 | 50 | f = np.sqrt(H ** 2 + W ** 2) 51 | camera_intrinsics = np.asarray([[f, 0, W/2], 52 | [0, f, H/2], 53 | [0, 0, 1]], np.float32) 54 | 55 | print(camera_intrinsics) 56 | detect = StructureFromMotion(camera_intrinsics) 57 | # inferences = detect(images[30:40]) 58 | inferences = detect(images[:8]) 59 | -------------------------------------------------------------------------------- /examples/tutorials/bounding_boxes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from tensorflow.keras.utils import get_file 4 | 5 | import paz.processors as pr 6 | from paz.abstract import SequentialProcessor 7 | from paz.backend.image import load_image 8 | 9 | # let's download a test image and put it inside our PAZ directory 10 | IMAGE_URL = ('https://github.com/oarriaga/altamira-data/releases/download' 11 | '/v0.9/object_detection_augmentation.png') 12 | filename = os.path.basename(IMAGE_URL) 13 | image_fullpath = get_file(filename, IMAGE_URL, cache_subdir='paz/tutorials') 14 | 15 | 16 | # Boxes 17 | 18 | # Let's first build our labels: 19 | # Keep in mind that the origin of our images is located at the **top-left**. 20 | 21 | # The x_min, y_min are the **noramlized** coordinates 22 | # of **top-left** bounding-box corner. 23 | height, width = load_image(image_fullpath).shape[:2] 24 | x_min_human, y_min_human = 200 / width, 60 / height 25 | x_min_horse, y_min_horse = 100 / width, 90 / height 26 | 27 | # The x_max, y_max are the **normalized** coordinates 28 | # of **bottom-right** bounding-box corner. 29 | x_max_human, y_max_human = 300 / width, 200 / height 30 | x_max_horse, y_max_horse = 400 / width, 300 / height 31 | 32 | # Our image has 1 + 2 classes. The **first** class is the background-class. 33 | # The other 2 classes correspond to each object i.e. person (human), horse. 34 | num_classes = 3 35 | background_class, human_class, horse_class = 0, 1, 2 36 | class_names = ['background', 'human', 'horse'] 37 | 38 | box_data = np.array( 39 | [[x_min_human, y_min_human, x_max_human, y_max_human, human_class], 40 | [x_min_horse, y_min_horse, x_max_horse, y_max_horse, horse_class]]) 41 | 42 | # Let's create a simple visualization pipeline. 43 | # For an explanation of what control-map is doing please check our tutorial at: 44 | # paz/examples/tutorials/controlmap_processor.py 45 | draw_boxes = SequentialProcessor() 46 | draw_boxes.add(pr.ControlMap(pr.ToBoxes2D(class_names), [1], [1])) 47 | draw_boxes.add(pr.ControlMap(pr.LoadImage(), [0], [0])) 48 | draw_boxes.add(pr.ControlMap(pr.DenormalizeBoxes2D(), [0, 1], [1], {0: 0})) 49 | draw_boxes.add(pr.DrawBoxes2D(class_names)) 50 | draw_boxes.add(pr.ShowImage()) 51 | 52 | 53 | # We can now look at our boxes! 54 | draw_boxes(image_fullpath, box_data) 55 | -------------------------------------------------------------------------------- /examples/visual_voice_activity_detection/generator.py: -------------------------------------------------------------------------------- 1 | class Generator(object): 2 | """Abstract class for generating a dataset. 3 | 4 | # Arguments 5 | path: String. Path to data. 6 | split: String. Dataset split e.g. traing, val, test. 7 | class_names: List of strings. Label names of the classes. 8 | name: String. Dataset name. 9 | 10 | # Properties 11 | name: Str. 12 | path: Str. 13 | split: Str or Flag. 14 | class_names: List of strings. 15 | num_classes: Int. 16 | 17 | # Methods 18 | __call__() 19 | """ 20 | def __init__(self, path, split, class_names, name): 21 | self.path = path 22 | self.split = split 23 | self.class_names = class_names 24 | self.name = name 25 | 26 | def __call__(self): 27 | """Abstract method for generating a dataset. 28 | 29 | # Yields 30 | tuple containing a sample and a label. 31 | """ 32 | raise NotImplementedError() 33 | 34 | # Name of the dataset (VOC2007, COCO, OpenImagesV4, etc) 35 | @property 36 | def name(self): 37 | return self._name 38 | 39 | @name.setter 40 | def name(self, name): 41 | self._name = name 42 | 43 | # Path to the dataset, ideally loaded from a configuration file. 44 | @property 45 | def path(self): 46 | return self._path 47 | 48 | @path.setter 49 | def path(self, path): 50 | self._path = path 51 | 52 | # Kind of split to use, either train, validation, test, or trainval. 53 | @property 54 | def split(self): 55 | return self._split 56 | 57 | @split.setter 58 | def split(self, split): 59 | self._split = split 60 | 61 | # List of class names to train/test. 62 | @property 63 | def class_names(self): 64 | return self._class_names 65 | 66 | @class_names.setter 67 | def class_names(self, class_names): 68 | # assert type(class_names) == list 69 | self._class_names = class_names 70 | 71 | @property 72 | def num_classes(self): 73 | if isinstance(self.class_names, list): 74 | return len(self.class_names) 75 | else: 76 | raise ValueError('class names are not a list') 77 | -------------------------------------------------------------------------------- /examples/visual_voice_activity_detection/live_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from paz.backend.camera import VideoPlayer, Camera 4 | import paz.pipelines.detection as dt 5 | 6 | parser = argparse.ArgumentParser(description='Visual Voice Activity Detection Live Demonstration') 7 | parser.add_argument('-c', '--camera_id', type=int, default=0, 8 | help='Camera device ID') 9 | args = parser.parse_args() 10 | 11 | pipeline = dt.DetectVVAD() 12 | camera = Camera(args.camera_id) 13 | player = VideoPlayer((640, 480), pipeline, camera) 14 | player.run() 15 | -------------------------------------------------------------------------------- /examples/visual_voice_activity_detection/recorded_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from paz.backend.camera import VideoPlayer 4 | import paz.pipelines.detection as dt 5 | 6 | parser = argparse.ArgumentParser(description='Visual Voice Activity Detection Recorded Demonstration') 7 | parser.add_argument('-i', '--input_path', type=str, default="./demo_video.mp4", 8 | help='Path to the video file to be used as input for the VVAD Pipeline.') 9 | parser.add_argument('-o', '--output_path', type=str, default="./demo_video_labeled.avi", 10 | help='Path to the video file to be used as output for the VVAD Pipeline.') 11 | args = parser.parse_args() 12 | 13 | pipeline = dt.DetectVVAD() 14 | player = VideoPlayer((640, 480), pipeline, None) 15 | 16 | player.record_from_file(video_file_path=args.input_path, 17 | name=args.output_path, fps=25) 18 | -------------------------------------------------------------------------------- /paz/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.2.6' 2 | -------------------------------------------------------------------------------- /paz/abstract/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import Loader 2 | from .sequence import GeneratingSequence, ProcessingSequence 3 | from .messages import Box2D, Pose6D 4 | from .processor import Processor, SequentialProcessor 5 | -------------------------------------------------------------------------------- /paz/abstract/loader.py: -------------------------------------------------------------------------------- 1 | class Loader(object): 2 | """Abstract class for loading a dataset. 3 | 4 | # Arguments 5 | path: String. Path to data. 6 | split: String. Dataset split e.g. traing, val, test. 7 | class_names: List of strings. Label names of the classes. 8 | name: String. Dataset name. 9 | 10 | # Properties 11 | name: Str. 12 | path: Str. 13 | split: Str or Flag. 14 | class_names: List of strings. 15 | num_classes: Int. 16 | 17 | # Methods 18 | load_data() 19 | """ 20 | def __init__(self, path, split, class_names, name): 21 | self.path = path 22 | self.split = split 23 | self.class_names = class_names 24 | self.name = name 25 | 26 | def load_data(self): 27 | """Abstract method for loading dataset. 28 | 29 | # Returns 30 | dictionary containing absolute image paths as keys, and 31 | ground truth vectors as values. 32 | """ 33 | raise NotImplementedError() 34 | 35 | # Name of the dataset (VOC2007, COCO, OpenImagesV4, etc) 36 | @property 37 | def name(self): 38 | return self._name 39 | 40 | @name.setter 41 | def name(self, name): 42 | self._name = name 43 | 44 | # Path to the dataset, ideally loaded from a configuration file. 45 | @property 46 | def path(self): 47 | return self._path 48 | 49 | @path.setter 50 | def path(self, path): 51 | self._path = path 52 | 53 | # Kind of split to use, either train, validation, test, or trainval. 54 | @property 55 | def split(self): 56 | return self._split 57 | 58 | @split.setter 59 | def split(self, split): 60 | self._split = split 61 | 62 | # List of class names to train/test. 63 | @property 64 | def class_names(self): 65 | return self._class_names 66 | 67 | @class_names.setter 68 | def class_names(self, class_names): 69 | # assert type(class_names) == list 70 | self._class_names = class_names 71 | 72 | @property 73 | def num_classes(self): 74 | if isinstance(self.class_names, list): 75 | return len(self.class_names) 76 | else: 77 | raise ValueError('class names are not a list') 78 | -------------------------------------------------------------------------------- /paz/applications.py: -------------------------------------------------------------------------------- 1 | from .pipelines import SSD512COCO 2 | from .pipelines import SSD300VOC 3 | from .pipelines import SSD512YCBVideo 4 | from .pipelines import SSD300FAT 5 | from .pipelines import DetectMiniXceptionFER 6 | from .pipelines import MiniXceptionFER 7 | from .pipelines import FaceKeypointNet2D32 8 | from .pipelines import HeadPoseKeypointNet2D32 9 | from .pipelines import HaarCascadeFrontalFace 10 | from .pipelines import EFFICIENTDETD0COCO 11 | from .pipelines import EFFICIENTDETD1COCO 12 | from .pipelines import EFFICIENTDETD2COCO 13 | from .pipelines import EFFICIENTDETD3COCO 14 | from .pipelines import EFFICIENTDETD4COCO 15 | from .pipelines import EFFICIENTDETD5COCO 16 | from .pipelines import EFFICIENTDETD6COCO 17 | from .pipelines import EFFICIENTDETD7COCO 18 | from .pipelines import EFFICIENTDETD0VOC 19 | 20 | from .pipelines import SinglePowerDrillPIX2POSE6D 21 | from .pipelines import MultiPowerDrillPIX2POSE6D 22 | from .pipelines import PIX2POSEPowerDrill 23 | from .pipelines import PIX2YCBTools6D 24 | 25 | from .pipelines import HigherHRNetHumanPose2D 26 | from .pipelines import DetNetHandKeypoints 27 | from .pipelines import MinimalHandPoseEstimation 28 | from .pipelines import DetectMinimalHand 29 | from .pipelines import ClassifyHandClosure 30 | from .pipelines import SSD512MinimalHandPose 31 | from .pipelines import EstimateHumanPose 32 | -------------------------------------------------------------------------------- /paz/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchors import build_anchors 2 | from .anchors import build_octaves 3 | from .anchors import build_aspect 4 | from .anchors import build_scales 5 | from .anchors import build_strides 6 | from .anchors import make_branch_boxes 7 | from .anchors import compute_box_coordinates 8 | from .anchors import build_base_anchor 9 | from .anchors import compute_aspect_size 10 | from .anchors import compute_anchor_dims 11 | from .anchors import compute_anchor_centres 12 | 13 | from .poses import match_poses 14 | from .poses import rotation_matrix_to_axis_angle 15 | from .poses import concatenate_poses 16 | from .poses import concatenate_scale 17 | from .poses import augment_pose_6D 18 | from .poses import generate_random_transformation 19 | 20 | from .standard import compute_common_row_indices 21 | 22 | from .mask import mask_to_box 23 | -------------------------------------------------------------------------------- /paz/backend/groups/SE3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def to_affine_matrix(rotation_matrix, translation): 5 | """Builds affine matrix from rotation matrix and translation vector. 6 | 7 | # Arguments 8 | rotation_matrix: Array (3, 3). Representing a rotation matrix. 9 | translation: Array (3). Translation vector. 10 | 11 | # Returns 12 | Array (4, 4) representing an affine matrix. 13 | """ 14 | if len(translation) != 3: 15 | raise ValueError('Translation should be of lenght 3') 16 | if rotation_matrix.shape != (3, 3): 17 | raise ValueError('Rotation matrix should be of shape (3, 3)') 18 | translation = translation.reshape(3, 1) 19 | affine_top = np.concatenate([rotation_matrix, translation], axis=1) 20 | affine_row = np.array([[0.0, 0.0, 0.0, 1.0]]) 21 | affine_matrix = np.concatenate([affine_top, affine_row], axis=0) 22 | return affine_matrix 23 | 24 | 25 | def to_affine_matrices(rotations, translations): 26 | """Construct affine matrices for rotation matrices vector and 27 | translation vector. 28 | 29 | # Arguments 30 | ratations: Rotation matrix vector [N, 3, 3]. 31 | translations: Translation vector [N, 3]. 32 | 33 | # Returns 34 | Transformation matrix [N, 4, 4] 35 | """ 36 | affine_matrices = [] 37 | for rotation, translation in zip(rotations, translations): 38 | transformation = to_affine_matrix(rotation, translation) 39 | affine_matrices.append(transformation) 40 | return np.array(affine_matrices) 41 | -------------------------------------------------------------------------------- /paz/backend/groups/__init__.py: -------------------------------------------------------------------------------- 1 | from .quaternion import * 2 | from .SO3 import * 3 | from .SE3 import * 4 | -------------------------------------------------------------------------------- /paz/backend/image/__init__.py: -------------------------------------------------------------------------------- 1 | from .opencv_image import * 2 | from .image import * 3 | from .draw import * -------------------------------------------------------------------------------- /paz/backend/mask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def mask_to_box(mask, mask_value): 5 | """Computes bounding box from mask image. 6 | 7 | # Arguments 8 | mask: Array mask corresponding to raw image. 9 | mask_value: Int, pixel gray value of foreground in mask image. 10 | 11 | # Returns: 12 | box: List containing box coordinates. 13 | """ 14 | masked = np.where(mask == mask_value) 15 | mask_x, mask_y = masked[1], masked[0] 16 | if mask_x.size <= 0 or mask_y.size <= 0: 17 | box = [0, 0, 0, 0] 18 | else: 19 | x_min, y_min = np.min(mask_x), np.min(mask_y) 20 | x_max, y_max = np.max(mask_x), np.max(mask_y) 21 | box = [x_min, y_min, x_max, y_max] 22 | return box 23 | -------------------------------------------------------------------------------- /paz/backend/munkres.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class UnsolvableMatrix(Exception): 5 | """ 6 | Exception raised for unsolvable matrices 7 | """ 8 | pass 9 | 10 | 11 | class DISALLOWED_OBJ(object): 12 | pass 13 | 14 | 15 | DISALLOWED = DISALLOWED_OBJ() 16 | DISALLOWED_PRINTVAL = "D" 17 | 18 | 19 | def get_cover_matrix(shape): 20 | """Returns the initialized row and column cover matrix. 21 | 22 | # Arguments 23 | shape: Tuple. Shape of the cover matrix. 24 | """ 25 | row_covered = np.zeros(shape, dtype='bool') 26 | col_covered = np.zeros(shape, dtype='bool') 27 | return row_covered, col_covered 28 | 29 | 30 | def find_uncovered_zero(n, cost_matrix, row_covered, col_covered, i0, j0): 31 | row = -1 32 | col = -1 33 | done = False 34 | for row_arg in range(i0, n): 35 | for col_arg in range(j0, n): 36 | if (cost_matrix[row_arg][col_arg] == 0) and \ 37 | (not row_covered[row_arg]) and \ 38 | (not col_covered[col_arg]): 39 | row = row_arg 40 | col = col_arg 41 | done = True 42 | if done: 43 | break 44 | return (row, col) 45 | 46 | 47 | def find_star_in_row(n, row_arg, marked): 48 | col = -1 49 | for col_arg in range(n): 50 | if marked[row_arg][col_arg] == 1: 51 | col = col_arg 52 | break 53 | return col 54 | 55 | 56 | def find_star_in_col(n, col_arg, marked): 57 | row = -1 58 | for row_arg in range(n): 59 | if marked[row_arg][col_arg] == 1: 60 | row = row_arg 61 | break 62 | return row 63 | 64 | 65 | def find_prime_in_row(n, row_arg, marked): 66 | col = -1 67 | for col_arg in range(n): 68 | if marked[row_arg][col_arg] == 2: 69 | col = col_arg 70 | break 71 | return col 72 | 73 | 74 | def get_min_value(series): 75 | values = [] 76 | for x in series: 77 | if type(x) is not type(DISALLOWED): 78 | values.append(x) 79 | if len(values) == 0: 80 | raise UnsolvableMatrix("One row is entirely DISALLOWED.") 81 | min_value = np.min(values) 82 | return min_value 83 | 84 | 85 | def find_smallest_uncovered(n, row_covered, col_covered, cost_matrix): 86 | minval = np.inf 87 | for i in range(n): 88 | for j in range(n): 89 | if (not row_covered[i]) and (not col_covered[j]): 90 | if cost_matrix[i][j] is not DISALLOWED and \ 91 | minval > cost_matrix[i][j]: 92 | minval = cost_matrix[i][j] 93 | return minval 94 | -------------------------------------------------------------------------------- /paz/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import get_class_names 2 | from .voc import VOC 3 | from .fat import FAT 4 | from .open_images import OpenImages 5 | from .ferplus import FERPlus 6 | from .fer import FER 7 | from .cityscapes import CityScapes 8 | from .coco import JOINT_CONFIG 9 | from .coco import FLIP_CONFIG 10 | from .coco import HUMAN_JOINT_CONFIG 11 | from .CMU_poanoptic import MINIMAL_HAND_CONFIG 12 | from .CMU_poanoptic import IK_UNIT_LENGTH 13 | from .CMU_poanoptic import MANOHandJoints 14 | from .CMU_poanoptic import MPIIHandJoints 15 | from .shapes import Shapes 16 | from .omniglot import Omniglot 17 | -------------------------------------------------------------------------------- /paz/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | from paz.abstract import Loader 5 | 6 | from .utils import get_class_names 7 | 8 | 9 | class CityScapes(Loader): 10 | """CityScapes data manager for loading the paths of the RGB and 11 | segmentation masks. 12 | 13 | # Arguments 14 | image_path: String. Path to RGB images e.g. '/home/user/leftImg8bit/' 15 | label_path: String. Path to label masks e.g. '/home/user/gtFine/' 16 | split: String. Valid option contain 'train', 'val' or 'test'. 17 | class_names: String or list: If 'all' then it loads all default 18 | class names. 19 | 20 | # References 21 | -[The Cityscapes Dataset for Semantic Urban Scene Understanding]( 22 | https://www.cityscapes-dataset.com/citation/) 23 | """ 24 | def __init__(self, image_path, label_path, split, class_names='all'): 25 | if split not in ['train', 'val', 'test']: 26 | raise ValueError('Invalid split name:', split) 27 | self.image_path = os.path.join(image_path, split) 28 | self.label_path = os.path.join(label_path, split) 29 | if class_names == 'all': 30 | class_names = get_class_names('CityScapes') 31 | super(CityScapes, self).__init__( 32 | None, split, class_names, 'CityScapes') 33 | 34 | def load_data(self): 35 | image_path = os.path.join(self.image_path, '*/*.png') 36 | label_path = os.path.join(self.label_path, '*/*labelIds.png') 37 | image_paths = glob.glob(image_path) 38 | label_paths = glob.glob(label_path) 39 | image_paths = sorted(image_paths) 40 | label_paths = sorted(label_paths) 41 | assert len(image_paths) == len(label_paths) 42 | dataset = [] 43 | for image_path, label_path in zip(image_paths, label_paths): 44 | sample = {'image_path': image_path, 'label_path': label_path} 45 | dataset.append(sample) 46 | return dataset 47 | -------------------------------------------------------------------------------- /paz/datasets/fer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.keras.utils import to_categorical 3 | import numpy as np 4 | 5 | from .utils import get_class_names 6 | from ..abstract import Loader 7 | from ..backend.image import resize_image 8 | 9 | 10 | class FER(Loader): 11 | """Class for loading FER2013 emotion classification dataset. 12 | # Arguments 13 | path: String. Full path to fer2013.csv file. 14 | split: String. Valid option contain 'train', 'val' or 'test'. 15 | class_names: String or list: If 'all' then it loads all default 16 | class names. 17 | image_size: List of length two. Indicates the shape in which 18 | the image will be resized. 19 | 20 | # References 21 | -[FER2013 Dataset and Challenge](kaggle.com/c/challenges-in-\ 22 | representation-learning-facial-expression-recognition-challenge) 23 | """ 24 | 25 | def __init__( 26 | self, path, split='train', class_names='all', image_size=(48, 48)): 27 | 28 | if class_names == 'all': 29 | class_names = get_class_names('FER') 30 | 31 | path = os.path.join(path, 'fer2013.csv') 32 | super(FER, self).__init__(path, split, class_names, 'FER') 33 | self.image_size = image_size 34 | self._split_to_filter = {'train': 'Training', 'val': 'PublicTest', 35 | 'test': 'PrivateTest'} 36 | 37 | def load_data(self): 38 | data = np.genfromtxt(self.path, str, delimiter=',', skip_header=1) 39 | data = data[data[:, -1] == self._split_to_filter[self.split]] 40 | faces = np.zeros((len(data), *self.image_size)) 41 | for sample_arg, sample in enumerate(data): 42 | face = np.array(sample[1].split(' '), dtype=int).reshape(48, 48) 43 | face = resize_image(face, self.image_size) 44 | faces[sample_arg, :, :] = face 45 | emotions = to_categorical(data[:, 0].astype(int), self.num_classes) 46 | 47 | data = [] 48 | for face, emotion in zip(faces, emotions): 49 | sample = {'image': face, 'label': emotion} 50 | data.append(sample) 51 | return data 52 | -------------------------------------------------------------------------------- /paz/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import evaluateMAP 2 | from .pose import EvaluateADD 3 | from .pose import EvaluateADI 4 | -------------------------------------------------------------------------------- /paz/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import SSD300 2 | from .detection import SSD512 3 | from .detection import HaarCascadeDetector 4 | from .detection import EFFICIENTDETD0 5 | from .detection import EFFICIENTDETD1 6 | from .detection import EFFICIENTDETD2 7 | from .detection import EFFICIENTDETD3 8 | from .detection import EFFICIENTDETD4 9 | from .detection import EFFICIENTDETD5 10 | from .detection import EFFICIENTDETD6 11 | from .detection import EFFICIENTDETD7 12 | from .keypoint.simplebaselines import SimpleBaseline 13 | from .keypoint.projector import Projector 14 | from .keypoint.keypointnet import KeypointNet 15 | from .keypoint.keypointnet import KeypointNetShared 16 | from .keypoint.keypointnet import KeypointNet2D 17 | from .keypoint.hrnet import HRNetResidual 18 | from .keypoint.hrnet import HRNetDense 19 | from .keypoint.detnet import DetNet 20 | from .keypoint.iknet import IKNet 21 | from .classification import build_xception 22 | from .classification import MiniXception 23 | from .classification import ProtoEmbedding 24 | from .classification import ProtoNet 25 | from .classification import CNN2Plus1D 26 | from .classification import VVAD_LRS3_LSTM 27 | from .segmentation import UNET 28 | from .segmentation import UNET_VGG16 29 | from .segmentation import UNET_VGG19 30 | from .segmentation import UNET_RESNET50 31 | from .pose_estimation import HigherHRNet 32 | from .pose_estimation import EfficientPosePhi0 33 | from .pose_estimation import EfficientPosePhi1 34 | from .pose_estimation import EfficientPosePhi2 35 | from .pose_estimation import EfficientPosePhi3 36 | from .pose_estimation import EfficientPosePhi4 37 | from .pose_estimation import EfficientPosePhi5 38 | from .pose_estimation import EfficientPosePhi6 39 | from .pose_estimation import EfficientPosePhi7 40 | -------------------------------------------------------------------------------- /paz/models/classification/__init__.py: -------------------------------------------------------------------------------- 1 | from .xception import build_xception 2 | from .xception import MiniXception 3 | from .protonet import ProtoEmbedding 4 | from .protonet import ProtoNet 5 | from .cnn2Plus1 import CNN2Plus1D 6 | from .vvad_lrs3 import VVAD_LRS3_LSTM 7 | -------------------------------------------------------------------------------- /paz/models/detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .ssd300 import SSD300 2 | from .ssd512 import SSD512 3 | from .haar_cascade import HaarCascadeDetector 4 | from .efficientdet import EFFICIENTDETD0 5 | from .efficientdet import EFFICIENTDETD1 6 | from .efficientdet import EFFICIENTDETD2 7 | from .efficientdet import EFFICIENTDETD3 8 | from .efficientdet import EFFICIENTDETD4 9 | from .efficientdet import EFFICIENTDETD5 10 | from .efficientdet import EFFICIENTDETD6 11 | from .efficientdet import EFFICIENTDETD7 12 | -------------------------------------------------------------------------------- /paz/models/detection/efficientdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .efficientdet import EFFICIENTDETD0 2 | from .efficientdet import EFFICIENTDETD1 3 | from .efficientdet import EFFICIENTDETD2 4 | from .efficientdet import EFFICIENTDETD3 5 | from .efficientdet import EFFICIENTDETD4 6 | from .efficientdet import EFFICIENTDETD5 7 | from .efficientdet import EFFICIENTDETD6 8 | from .efficientdet import EFFICIENTDETD7 9 | from .efficientdet import EfficientNet_to_BiFPN 10 | from .efficientdet import BiFPN 11 | from .efficientnet import EFFICIENTNET 12 | -------------------------------------------------------------------------------- /paz/models/detection/haar_cascade.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from tensorflow.keras.utils import get_file 4 | 5 | WEIGHT_PATH = ('https://raw.githubusercontent.com/opencv/opencv/' 6 | 'master/data/haarcascades/') 7 | 8 | 9 | class HaarCascadeDetector(object): 10 | """Haar cascade face detector. 11 | 12 | # Arguments 13 | path: String. Postfix to default openCV haarcascades XML files, see [1] 14 | e.g. `eye`, `frontalface_alt2`, `fullbody` 15 | class_arg: Int. Class label argument. 16 | scale = Float. Scale for image reduction 17 | neighbors: Int. Minimum neighbors 18 | 19 | # Reference 20 | - [Haar 21 | Cascades](https://github.com/opencv/opencv/tree/master/data/haarcascades) 22 | """ 23 | 24 | def __init__(self, weights='frontalface_default', class_arg=None, 25 | scale=1.3, neighbors=5): 26 | self.weights = weights 27 | self.name = 'haarcascade_' + weights + '.xml' 28 | self.url = WEIGHT_PATH + self.name 29 | self.path = get_file(self.name, self.url, cache_subdir='paz/models') 30 | self.model = cv2.CascadeClassifier(self.path) 31 | self.class_arg = class_arg 32 | self.scale = scale 33 | self.neighbors = neighbors 34 | 35 | def __call__(self, gray_image): 36 | """ Detects faces from gray images. 37 | 38 | # Arguments 39 | gray_image: Numpy array of shape ``(H, W, 2)``. 40 | 41 | # Returns 42 | Numpy array of shape ``(num_boxes, 4)``. 43 | """ 44 | if len(gray_image.shape) != 2: 45 | raise ValueError('Invalid gray image shape:', gray_image.shape) 46 | args = (gray_image, self.scale, self.neighbors) 47 | boxes = self.model.detectMultiScale(*args) 48 | boxes_point_form = np.zeros_like(boxes) 49 | if len(boxes) != 0: 50 | boxes_point_form[:, 0] = boxes[:, 0] 51 | boxes_point_form[:, 1] = boxes[:, 1] 52 | boxes_point_form[:, 2] = boxes[:, 0] + boxes[:, 2] 53 | boxes_point_form[:, 3] = boxes[:, 1] + boxes[:, 3] 54 | if self.class_arg is not None: 55 | class_args = np.ones((len(boxes_point_form), 1)) 56 | class_args = class_args * self.class_arg 57 | boxes_point_form = np.hstack((boxes_point_form, class_args)) 58 | return boxes_point_form.astype('int') 59 | -------------------------------------------------------------------------------- /paz/models/keypoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/paz/models/keypoint/__init__.py -------------------------------------------------------------------------------- /paz/models/keypoint/projector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow.keras.backend as K 3 | 4 | 5 | class Projector(object): 6 | """Projects keypoints from image coordinates to 3D space and viceversa. 7 | This model uses the camera focal length and the depth estimation of a point 8 | to project it to image coordinates. It works with numpy matrices or 9 | tensorflow values. See ``use_numpy``. 10 | 11 | # Arguments 12 | focal_length: Float. Focal length of camera used to generate keypoints. 13 | use_numpy: Boolean. If `True` both unproject and project functions 14 | take numpy arrays as inputs. If `False` takes tf.tensors as inputs. 15 | """ 16 | def __init__(self, focal_length, use_numpy=False): 17 | self.focal_length = focal_length 18 | self.project = self._project_keras 19 | self.unproject = self._unproject_keras 20 | if use_numpy: 21 | self.project = self._project_numpy 22 | self.unproject = self._unproject_numpy 23 | 24 | def _project_keras(self, xyzw): 25 | z = xyzw[:, :, 2:3] + 1e-8 26 | x = - (self.focal_length / z) * xyzw[:, :, 0:1] 27 | y = - (self.focal_length / z) * xyzw[:, :, 1:2] 28 | return K.concatenate([x, y, z], axis=2) 29 | 30 | def _project_numpy(self, xyzw): 31 | z = xyzw[:, :, 2:3] + 1e-8 32 | x = - (self.focal_length / z) * xyzw[:, :, 0:1] 33 | y = - (self.focal_length / z) * xyzw[:, :, 1:2] 34 | return np.concatenate([x, y, z], axis=2) 35 | 36 | def _unproject_keras(self, xyz): 37 | z = xyz[:, :, 2:3] 38 | x = - (z / self.focal_length) * xyz[:, :, 0:1] 39 | y = - (z / self.focal_length) * xyz[:, :, 1:2] 40 | w = K.ones_like(z) 41 | xyzw = K.concatenate([x, y, z, w], axis=2) 42 | return xyzw 43 | 44 | def _unproject_numpy(self, xyz): 45 | z = xyz[:, :, 2:3] 46 | x = - (z / self.focal_length) * xyz[:, :, 0:1] 47 | y = - (z / self.focal_length) * xyz[:, :, 1:2] 48 | w = np.ones_like(z) 49 | xyzw = np.concatenate([x, y, z, w], axis=2) 50 | return xyzw 51 | -------------------------------------------------------------------------------- /paz/models/pose_estimation/__init__.py: -------------------------------------------------------------------------------- 1 | from .higher_hrnet import HigherHRNet 2 | from .efficientpose import EfficientPosePhi0 3 | from .efficientpose import EfficientPosePhi1 4 | from .efficientpose import EfficientPosePhi2 5 | from .efficientpose import EfficientPosePhi3 6 | from .efficientpose import EfficientPosePhi4 7 | from .efficientpose import EfficientPosePhi5 8 | from .efficientpose import EfficientPosePhi6 9 | from .efficientpose import EfficientPosePhi7 10 | -------------------------------------------------------------------------------- /paz/models/pose_estimation/efficientpose/__init__.py: -------------------------------------------------------------------------------- 1 | from .efficientpose import EfficientPosePhi0 2 | from .efficientpose import EfficientPosePhi1 3 | from .efficientpose import EfficientPosePhi2 4 | from .efficientpose import EfficientPosePhi3 5 | from .efficientpose import EfficientPosePhi4 6 | from .efficientpose import EfficientPosePhi5 7 | from .efficientpose import EfficientPosePhi6 8 | from .efficientpose import EfficientPosePhi7 9 | -------------------------------------------------------------------------------- /paz/models/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .unet import UNET_VGG16 2 | from .unet import UNET_VGG19 3 | from .unet import UNET_RESNET50 4 | from .unet import UNET 5 | -------------------------------------------------------------------------------- /paz/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import MultiBoxLoss 2 | from .losses import KeypointNetLoss 3 | from .losses import DiceLoss 4 | from .losses import FocalLoss 5 | from .losses import JaccardLoss 6 | -------------------------------------------------------------------------------- /paz/optimization/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .multi_box_loss import MultiBoxLoss 2 | from .keypointnet_loss import KeypointNetLoss 3 | from .segmentation import DiceLoss 4 | from .segmentation import FocalLoss 5 | from .segmentation import JaccardLoss 6 | from .segmentation import WeightedReconstruction 7 | from .segmentation import WeightedReconstructionWithError 8 | -------------------------------------------------------------------------------- /paz/optimization/losses/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .focal_loss import FocalLoss 2 | from .focal_loss import compute_focal_loss 3 | 4 | from .dice_loss import DiceLoss 5 | from .dice_loss import compute_F_beta_score 6 | 7 | from .jaccard_loss import JaccardLoss 8 | from .jaccard_loss import compute_jaccard_score 9 | 10 | from .weighted_reconstruction import WeightedReconstruction 11 | from .weighted_reconstruction import WeightedReconstructionWithError 12 | from .weighted_reconstruction import ( 13 | compute_weighted_reconstruction_loss, 14 | compute_weighted_reconstruction_loss_with_error) 15 | -------------------------------------------------------------------------------- /paz/optimization/losses/segmentation/dice_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.losses import Loss 3 | 4 | 5 | def compute_F_beta_score(y_true, y_pred, beta=1.0, class_weights=1.0): 6 | """Computes the F beta score. The F beta score is the geometric mean 7 | of the precision and recall, where the recall is B times more important 8 | than the precision. 9 | 10 | # Arguments 11 | y_true: Tensor of shape ``(batch, H, W, num_channels)``. 12 | y_pred: Tensor of shape ``(batch, H, W, num_channels)``. 13 | beta: Float. 14 | class_weights: Float or list of floats of shape ``(num_classes)``. 15 | 16 | # Returns 17 | Tensor of shape ``(batch)`` containing the F beta score per sample. 18 | """ 19 | true_positives = tf.reduce_sum(y_true * y_pred, axis=[1, 2]) 20 | false_positives = tf.reduce_sum(y_pred, axis=[1, 2]) - true_positives 21 | false_negatives = tf.reduce_sum(y_true, axis=[1, 2]) - true_positives 22 | B_squared = tf.math.pow(beta, 2) 23 | numerator = (1.0 + B_squared) * true_positives 24 | denominator = numerator + (B_squared * false_negatives) + false_positives 25 | F_beta_score = numerator / (denominator + 1e-5) 26 | return class_weights * F_beta_score 27 | 28 | 29 | class DiceLoss(Loss): 30 | """Computes the F beta loss. The F beta score is the geometric mean 31 | of the precision and recall, where the recall is B times more important 32 | than the precision. 33 | 34 | # Arguments 35 | beta: Float. 36 | class_weights: Float or list of floats of shape ``(num_classes)``. 37 | """ 38 | def __init__(self, beta=1.0, class_weights=1.0): 39 | super(DiceLoss, self).__init__() 40 | self.beta = beta 41 | self.class_weights = class_weights 42 | 43 | def call(self, y_true, y_pred): 44 | args = (self.beta, self.class_weights) 45 | return 1.0 - compute_F_beta_score(y_true, y_pred, *args) 46 | -------------------------------------------------------------------------------- /paz/optimization/losses/segmentation/focal_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.losses import Loss 3 | 4 | 5 | def compute_focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25): 6 | """Computes the Focal loss. The Focal loss down weights 7 | properly classified examples. 8 | 9 | # Arguments 10 | y_true: Tensor of shape ``(batch, H, W, num_channels)``. 11 | y_pred: Tensor of shape ``(batch, H, W, num_channels)``. 12 | gamma: Float. 13 | alpha: Float. 14 | class_weights: Float or list of floats of shape ``(num_classes)``. 15 | 16 | # Returns 17 | Tensor of shape ``(batch)`` containing the F beta score per sample. 18 | """ 19 | y_pred = tf.clip_by_value(y_pred, 1e-5, 1.0 - 1e-5) 20 | modulator = alpha * tf.math.pow(1 - y_pred, gamma) 21 | focal_loss = - modulator * y_true * tf.math.log(y_pred) 22 | return focal_loss 23 | 24 | 25 | class FocalLoss(Loss): 26 | """Computes the Focal loss. The Focal loss down weights 27 | properly classified examples. 28 | 29 | # Arguments 30 | gamma: Float. 31 | alpha: Float. 32 | class_weights: Float or list of floats of shape ``(num_classes)``. 33 | """ 34 | def __init__(self, gamma=2.0, alpha=0.25): 35 | super(FocalLoss, self).__init__() 36 | self.gamma = gamma 37 | self.alpha = alpha 38 | 39 | def call(self, y_true, y_pred): 40 | return compute_focal_loss(y_true, y_pred, self.gamma, self.alpha) 41 | -------------------------------------------------------------------------------- /paz/optimization/losses/segmentation/jaccard_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.losses import Loss 3 | 4 | 5 | def compute_jaccard_score(y_true, y_pred, class_weights=1.0): 6 | """Computes the Jaccard score. The Jaccard score is the intersection 7 | over union of the predicted with respect to real masks. 8 | 9 | # Arguments 10 | y_true: Tensor of shape ``(batch, H, W, num_channels)``. 11 | y_pred: Tensor of shape ``(batch, H, W, num_channels)``. 12 | class_weights: Float or list of floats of shape ``(num_classes)``. 13 | 14 | # Returns 15 | Tensor of shape ``(batch)`` containing the F beta score per sample. 16 | """ 17 | intersection = tf.reduce_sum(y_true * y_pred, axis=[1, 2]) 18 | union = tf.reduce_sum(y_true + y_pred, axis=[1, 2]) - intersection 19 | jaccard_score = (intersection) / (union + 1e-5) 20 | return class_weights * jaccard_score 21 | 22 | 23 | class JaccardLoss(Loss): 24 | """Computes the Jaccard loss. The Jaccard score is the intersection 25 | over union of the predicted with respect to real masks. 26 | 27 | # Arguments 28 | class_weights: Float or list of floats of shape ``(num_classes)``. 29 | """ 30 | def __init__(self, class_weights=1.0): 31 | super(JaccardLoss, self).__init__() 32 | self.class_weights = class_weights 33 | 34 | def call(self, y_true, y_pred): 35 | return 1.0 - compute_jaccard_score(y_true, y_pred, self.class_weights) 36 | 37 | -------------------------------------------------------------------------------- /paz/pipelines/angles.py: -------------------------------------------------------------------------------- 1 | from paz import processors as pr 2 | from paz.models import IKNet 3 | from paz.datasets import MPIIHandJoints 4 | from paz.backend.keypoints import flip_along_x_axis 5 | 6 | 7 | class IKNetHandJointAngles(pr.Processor): 8 | """Estimate absolute and relative joint angle for the minimal hand joints 9 | using the 3D keypoint locations. 10 | 11 | # Arguments 12 | links_origin: Array. Reference pose of the minimal hand joints. 13 | parent: List. Parents of the keypoints from kinematic chain 14 | right_hand: Boolean. If 'True', estimate angles for right hand, else 15 | estimate angles for left hand. 16 | keypoints3D: Array [num_joints, 3]. 3D location of keypoints. 17 | 18 | # Returns 19 | absolute_angles: Array [num_joints, 4]. quaternion repesentation 20 | relative_angles: Array [num_joints, 3]. axis-angle repesentation 21 | """ 22 | def __init__(self, links_origin=MPIIHandJoints.links_origin, 23 | parents=MPIIHandJoints.parents, right_hand=False): 24 | super(IKNetHandJointAngles, self).__init__() 25 | self.calculate_orientation = pr.ComputeOrientationVector(parents) 26 | self.links_origin = links_origin 27 | self.right_hand = right_hand 28 | if self.right_hand: 29 | self.links_origin = flip_along_x_axis(self.links_origin) 30 | self.links_delta = self.calculate_orientation(self.links_origin) 31 | self.concatenate = pr.Concatenate(0) 32 | self.compute_absolute_angles = pr.SequentialProcessor( 33 | [pr.ExpandDims(0), IKNet(), pr.Squeeze(0)]) 34 | self.compute_relative_angles = pr.CalculateRelativeAngles() 35 | self.wrap = pr.WrapOutput(['absolute_angles', 'relative_angles']) 36 | 37 | def call(self, keypoints3D): 38 | delta = self.calculate_orientation(keypoints3D) 39 | pack = self.concatenate( 40 | [keypoints3D, delta, self.links_origin, self.links_delta]) 41 | absolute_angles = self.compute_absolute_angles(pack) 42 | relative_angles = self.compute_relative_angles(absolute_angles) 43 | return self.wrap(absolute_angles, relative_angles) 44 | 45 | -------------------------------------------------------------------------------- /paz/pipelines/heatmaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz import processors as pr 3 | 4 | 5 | class GetHeatmapsAndTags(pr.Processor): 6 | """Get Heatmaps and Tags from the model output. 7 | # Arguments 8 | model: Model weights trained on HigherHRNet model. 9 | flipped_keypoint_order: List of length 17 (number of keypoints). 10 | Flipped list of keypoint order. 11 | data_with_center: Boolean. True is the model is trained using the 12 | center. 13 | image: Numpy array. Input image of shape (H, W) 14 | 15 | # Returns 16 | heatmaps: Numpy array of shape (1, num_keypoints, H, W) 17 | Tags: Numpy array of shape (1, num_keypoints, H, W) 18 | """ 19 | def __init__(self, model, flipped_keypoint_order, with_flip, 20 | data_with_center, scale_output=True, axes=[0, 3, 1, 2]): 21 | super(GetHeatmapsAndTags, self).__init__() 22 | self.with_flip = with_flip 23 | self.predict = pr.SequentialProcessor( 24 | [pr.Predict(model), pr.TransposeOutput(axes), pr.ScaleOutput(2)]) 25 | self.get_heatmaps = pr.GetHeatmaps(flipped_keypoint_order) 26 | self.get_tags = pr.GetTags(flipped_keypoint_order) 27 | self.postprocess = pr.SequentialProcessor() 28 | if data_with_center: 29 | self.postprocess.add(pr.RemoveLastElement()) 30 | if scale_output: 31 | self.postprocess.add(pr.ScaleOutput(2, full_scaling=True)) 32 | 33 | def call(self, image): 34 | outputs = self.predict(image) 35 | heatmaps = self.get_heatmaps(outputs, with_flip=False) 36 | tags = self.get_tags(outputs, with_flip=False) 37 | if self.with_flip: 38 | outputs = self.predict(np.flip(image, [2])) 39 | heatmaps_flip = self.get_heatmaps(outputs, self.with_flip) 40 | tags_flip = self.get_tags(outputs, self.with_flip) 41 | heatmaps = [heatmaps, heatmaps_flip] 42 | tags = [tags, tags_flip] 43 | heatmaps = self.postprocess(heatmaps) 44 | tags = self.postprocess(tags) 45 | return heatmaps, tags 46 | -------------------------------------------------------------------------------- /paz/processors/groups.py: -------------------------------------------------------------------------------- 1 | from ..abstract import Processor 2 | from ..backend.groups import rotation_vector_to_quaternion 3 | from ..backend.groups import rotation_vector_to_rotation_matrix 4 | from ..backend.groups import to_affine_matrix 5 | 6 | 7 | class RotationVectorToQuaternion(Processor): 8 | """Transforms rotation vector into quaternion. 9 | """ 10 | def __init__(self): 11 | super(RotationVectorToQuaternion, self).__init__() 12 | 13 | def call(self, rotation_vector): 14 | quaternion = rotation_vector_to_quaternion(rotation_vector) 15 | return quaternion 16 | 17 | 18 | class RotationVectorToRotationMatrix(Processor): 19 | """Transforms rotation vector into a rotation matrix. 20 | """ 21 | def __init__(self): 22 | super(RotationVectorToRotationMatrix, self).__init__() 23 | 24 | def call(self, rotation_vector): 25 | return rotation_vector_to_rotation_matrix(rotation_vector) 26 | 27 | 28 | class ToAffineMatrix(Processor): 29 | """Builds affine matrix from a rotation matrix and a translation vector. 30 | """ 31 | def __init__(self): 32 | super(ToAffineMatrix, self).__init__() 33 | 34 | def call(self, rotation_matrix, translation): 35 | affine_matrix = to_affine_matrix(rotation_matrix, translation) 36 | return affine_matrix 37 | -------------------------------------------------------------------------------- /paz/processors/renderer.py: -------------------------------------------------------------------------------- 1 | from ..abstract import Processor 2 | 3 | 4 | class Render(Processor): 5 | """Render images and labels. 6 | 7 | # Arguments 8 | renderer: Object that renders images and labels using a method 9 | ''render_sample()''. 10 | """ 11 | def __init__(self, renderer): 12 | super(Render, self).__init__() 13 | self.renderer = renderer 14 | 15 | def call(self): 16 | return self.renderer.render() 17 | -------------------------------------------------------------------------------- /paz/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import write_dictionary 2 | from .logger import build_directory 3 | from .logger import make_directory 4 | from .logger import write_weights 5 | from .documentation import docstring 6 | -------------------------------------------------------------------------------- /paz/utils/documentation.py: -------------------------------------------------------------------------------- 1 | def docstring(original): 2 | """Doctors (documents) `target` `Callable` with `original` docstring. 3 | 4 | # Arguments: 5 | original: Object with documentation string. 6 | 7 | # Returns 8 | Function that replaces `target` docstring with `original` docstring. 9 | """ 10 | def wrapper(target): 11 | target.__doc__ = original.__doc__ 12 | return target 13 | return wrapper 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | import paz 4 | 5 | 6 | if __name__ == "__main__": 7 | 8 | setup(name='pypaz', 9 | version=paz.__version__, 10 | description='Perception for Autonomous Systems', 11 | long_description='Perception for Autonomous Systems', 12 | author='Octavio Arriaga', 13 | author_email='octavio.arriaga@dfki.de', 14 | url='https://github.com/oarriaga/paz/', 15 | license='MIT', 16 | classifiers=[ 17 | 'Intended Audience :: Science/Research', 18 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 19 | 'Topic :: Scientific/Engineering :: Image Recognition', 20 | 'Programming Language :: Python :: 3', 21 | 'License :: OSI Approved :: MIT License' 22 | ], 23 | install_requires=['opencv-python', 'tensorflow', 'numpy'], 24 | packages=find_packages()) 25 | -------------------------------------------------------------------------------- /tests/examples/pipelines.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import numpy as np 4 | 5 | from paz.backend.image import load_image, show_image, resize_image 6 | 7 | from paz.backend.camera import Camera 8 | from paz.pipelines import DetectMiniXceptionFER 9 | from paz.pipelines import DetectFaceKeypointNet2D32 10 | from paz.pipelines import HeadPoseKeypointNet2D32 11 | from paz.pipelines import SSD300FAT, SSD300VOC, SSD512COCO, SSD512YCBVideo 12 | 13 | parser = argparse.ArgumentParser(description='Real-time face classifier') 14 | parser.add_argument('-o', '--offset', type=float, default=0.1, 15 | help='Scaled offset to be added to bounding boxes') 16 | parser.add_argument('-s', '--score_thresh', type=float, default=0.6, 17 | help='Box/class score threshold') 18 | parser.add_argument('-n', '--nms_thresh', type=float, default=0.45, 19 | help='non-maximum suppression threshold') 20 | parser.add_argument('-p', '--image_path', type=str, 21 | help='full image path used for the pipelines') 22 | parser.add_argument('-c', '--camera_id', type=str, 23 | help='Camera/device ID') 24 | parser.add_argument('-d', '--dataset', type=str, default='COCO', 25 | choices=['VOC', 'COCO', 'YCBVideo', 'FAT'], 26 | help='Dataset name') 27 | args = parser.parse_args() 28 | 29 | 30 | 31 | name_to_model = {'VOC': SSD300VOC, 'FAT': SSD300FAT, 'COCO': SSD512COCO, 32 | 'YCBVideo': SSD512YCBVideo} 33 | 34 | 35 | image = load_image(args.image_path) 36 | H = 1000 37 | W = int((H / image.shape[0]) * image.shape[1]) 38 | # image = resize_image(image, (W, H)) 39 | 40 | focal_length = image.shape[1] 41 | image_center = (image.shape[1] / 2.0, image.shape[0] / 2.0) 42 | camera = Camera(args.camera_id) 43 | camera.distortion = np.zeros((4, 1)) 44 | camera.intrinsics = np.array([[focal_length, 0, image_center[0]], 45 | [0, focal_length, image_center[1]], 46 | [0, 0, 1]]) 47 | 48 | pipeline_A = DetectMiniXceptionFER([args.offset, args.offset]) 49 | pipeline_B = DetectFaceKeypointNet2D32() 50 | pipeline_C = HeadPoseKeypointNet2D32(camera) 51 | pipeline_D = name_to_model[args.dataset](args.score_thresh, args.nms_thresh) 52 | pipelines = [pipeline_A, pipeline_B, pipeline_C, pipeline_D] 53 | for pipeline in pipelines: 54 | predictions = pipeline(image.copy()) 55 | show_image(predictions['image']) 56 | -------------------------------------------------------------------------------- /tests/paz/abstract/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/tests/paz/abstract/__init__.py -------------------------------------------------------------------------------- /tests/paz/abstract/messages_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from paz.abstract.messages import Box2D, Pose6D 3 | import pytest 4 | 5 | 6 | @pytest.fixture(params=[[219, 49, 319, 179]]) 7 | def box_coordinates(request): 8 | return (request.param) 9 | 10 | 11 | @pytest.fixture(params=[60.0, 80.0]) 12 | def score(request): 13 | return request.param 14 | 15 | 16 | @pytest.fixture 17 | def quaternion(): 18 | return np.array([-0.4732069, 0.5253096, 0.4732069, 0.5255476]) 19 | 20 | 21 | @pytest.fixture 22 | def translation(): 23 | return np.array([1.0, 0.765, 0]) 24 | 25 | 26 | @pytest.fixture 27 | def rotation_vector(): 28 | return np.array([1., -0.994522, 0.104528]) 29 | 30 | 31 | @pytest.fixture 32 | def quaternion_result(): 33 | return np.array([0.45936268, -0.45684629, 0.04801626, 0.76024458]) 34 | 35 | 36 | @pytest.mark.parametrize("test_point", [[256, 60], [219, 49]]) 37 | def test_Box2D(box_coordinates, score, test_point): 38 | """ Unit test for Bounding box 2D with class 39 | and score 40 | """ 41 | box2D = Box2D(box_coordinates, score) 42 | assert (box2D.contains(test_point)) 43 | 44 | 45 | def test_Pose6D(quaternion, translation, rotation_vector, quaternion_result): 46 | """Unit test for Pose estimation 47 | """ 48 | pose6D = Pose6D(quaternion, translation) 49 | result = pose6D.from_rotation_vector(rotation_vector, translation) 50 | assert(result.quaternion.all() == quaternion_result.all()) 51 | -------------------------------------------------------------------------------- /tests/paz/abstract/sequence_test.py: -------------------------------------------------------------------------------- 1 | from paz.abstract import Processor, SequentialProcessor, ProcessingSequence 2 | from paz import processors as pr 3 | import numpy as np 4 | 5 | 6 | class FlipBoxesLeftRight(Processor): 7 | def __init__(self): 8 | super(FlipBoxesLeftRight, self).__init__() 9 | 10 | def call(self, image, boxes): 11 | width = image.shape[1] 12 | boxes[:, [0, 2]] = width - boxes[:, [2, 0]] 13 | image = image[:, ::-1] 14 | return image, boxes 15 | 16 | 17 | data = [{'value_A': np.array([[1.0, 2.0, 3.0, 4.0]]), 18 | 'value_B': np.array([[1.0, 1.1, 1.2], [2.0, 2.1, 2.2]])}] 19 | processor = SequentialProcessor() 20 | processor.add(pr.UnpackDictionary(['value_A', 'value_B'])) 21 | processor.add(FlipBoxesLeftRight()) 22 | processor.add(pr.SequenceWrapper( 23 | {0: {'value_A': [1, 4]}}, 24 | {1: {'value_B': [2, 3]}})) 25 | sequence = ProcessingSequence(processor, 1, data) 26 | 27 | for _ in range(10): 28 | batch = sequence.__getitem__(0) 29 | value_A, value_B = batch[0]['value_A'][0], batch[1]['value_B'][0] 30 | print(value_B) 31 | -------------------------------------------------------------------------------- /tests/paz/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/tests/paz/backend/__init__.py -------------------------------------------------------------------------------- /tests/paz/backend/groups/test_quaternion.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from paz.backend.groups.quaternion import rotation_vector_to_quaternion 4 | from paz.backend.groups.quaternion import get_quaternion_conjugate 5 | from paz.backend.groups.quaternion import rotation_matrix_to_quaternion 6 | from paz.backend.groups.quaternion import quaternion_to_rotation_matrix 7 | 8 | 9 | @pytest.fixture 10 | def quaternion_target(): 11 | return np.array([0.45936268, -0.45684629, 0.04801626, 0.76024458]) 12 | 13 | 14 | @pytest.fixture 15 | def rotation_vector(): 16 | return np.array([1., -0.994522, 0.104528]) 17 | 18 | 19 | @pytest.fixture 20 | def rotation_matrix(): 21 | rotation_matrix = np.array([[0.99394977, -0.02341585, -0.10731083], 22 | [0.02910355, 0.9982362, 0.05174612], 23 | [0.10590983, -0.05455617, 0.99287811]]) 24 | return rotation_matrix 25 | 26 | 27 | @pytest.fixture() 28 | def quaternion(): 29 | return np.array([-0.02662533, -0.05340496, 0.01315443, 0.99813126]) 30 | 31 | 32 | def test_rotation_vector_to_quaternion(rotation_vector, quaternion_target): 33 | result = rotation_vector_to_quaternion(rotation_vector) 34 | assert np.allclose(result, quaternion_target) 35 | 36 | 37 | @pytest.mark.parametrize("quaternion_conjugate", 38 | [[0.02662533, 0.05340496, -0.01315443, 0.99813126]]) 39 | def test_get_quaternion_conjugate(quaternion, quaternion_conjugate): 40 | estimated_quaternion_conjugate = get_quaternion_conjugate(quaternion) 41 | assert np.allclose(quaternion_conjugate, estimated_quaternion_conjugate) 42 | 43 | 44 | def test_rotation_matrix_to_quaternion(rotation_matrix, quaternion): 45 | estimated_quaternion = rotation_matrix_to_quaternion(rotation_matrix) 46 | estimates_rotation_matrix = quaternion_to_rotation_matrix( 47 | estimated_quaternion) 48 | assert np.allclose(quaternion, estimated_quaternion) 49 | assert np.allclose(rotation_matrix, estimates_rotation_matrix) 50 | -------------------------------------------------------------------------------- /tests/paz/backend/image/draw.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from paz.backend.image.draw import points3D_to_RGB 4 | 5 | 6 | @pytest.fixture 7 | def points3D(): 8 | return np.array([[10, 301, 30], 9 | [145, 253, 12], 10 | [203, 5, 299], 11 | [214, 244, 98], 12 | [23, 67, 16], 13 | [178, 48, 234], 14 | [267, 310, 2]]) 15 | 16 | 17 | @pytest.fixture 18 | def object_sizes(): 19 | object_sizes = np.array([280, 260, 240]) 20 | return object_sizes 21 | 22 | 23 | @pytest.fixture 24 | def object_colors(): 25 | return np.array([[136, 166, 159], 26 | [3, 119, 140], 27 | [56, 132, 189], 28 | [66, 110, 231], 29 | [148, 193, 144], 30 | [33, 174, 120], 31 | [114, 175, 129]]) 32 | 33 | 34 | def test_points3D_to_RGB(points3D, object_sizes, object_colors): 35 | values = points3D_to_RGB(points3D, object_sizes) 36 | assert np.allclose(values, object_colors) 37 | -------------------------------------------------------------------------------- /tests/paz/backend/image_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from paz.backend.image import replace_lower_than_threshold 5 | from paz.backend.image import image_to_normalized_device_coordinates 6 | from paz.backend.image import normalized_device_coordinates_to_image 7 | from paz.backend.image import normalize_min_max 8 | from paz.backend.image import get_scaling_factor 9 | 10 | 11 | def test_replace_lower_than_threshold(): 12 | source = np.ones((128, 128, 3)) 13 | target = replace_lower_than_threshold(source, 2.0, 5.0) 14 | assert np.allclose(target, 5.0) 15 | 16 | source = np.ones((128, 128, 3)) 17 | target = replace_lower_than_threshold(source, 0.0, -1.0) 18 | assert np.allclose(target, 1.0) 19 | 20 | 21 | def test_image_to_normalized_device_coordinates(): 22 | image = np.array([[0, 127.5, 255]]) 23 | values = image_to_normalized_device_coordinates(image) 24 | assert np.allclose(values, np.array([[-1.0, 0.0, 1.0]])) 25 | 26 | 27 | def test_normalized_device_coordinates_to_image(): 28 | coordinates = np.array([[-1.0, 0.0, 1.0]]) 29 | values = normalized_device_coordinates_to_image(coordinates) 30 | assert np.allclose(values, np.array([[0.0, 127.5, 255.0]])) 31 | 32 | 33 | def test_normalize_min_max(): 34 | x = np.array([-1.0, 0.0, 1.0]) 35 | values = normalize_min_max(x, np.min(x), np.max(x)) 36 | assert np.allclose(values, np.array([0.0, 0.5, 1.0])) 37 | 38 | 39 | @pytest.mark.parametrize("output_scaling_factor", [[12, 8]]) 40 | def test_get_scaling_factor(output_scaling_factor, scale=2, shape=(128, 128)): 41 | image = np.ones((512, 768, 3)) 42 | scaling_factor = get_scaling_factor(image, scale, shape) 43 | assert np.allclose(output_scaling_factor, scaling_factor) 44 | -------------------------------------------------------------------------------- /tests/paz/backend/keypoints_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from paz.backend.keypoints import rotate_point2D 4 | from paz.backend.keypoints import transform_keypoint 5 | from paz.backend.keypoints import add_offset_to_point 6 | from paz.backend.keypoints import rotate_keypoints3D 7 | 8 | 9 | @pytest.fixture(params=[[2, 1]]) 10 | def point2D_a(request): 11 | return request.param 12 | 13 | 14 | @pytest.fixture(params=[[1.8, 2.5]]) 15 | def point2D_b(request): 16 | return request.param 17 | 18 | 19 | @pytest.fixture(params=[[149.75, 261.75]]) 20 | def keypoint(request): 21 | return request.param 22 | 23 | 24 | @pytest.fixture 25 | def transform_matrix(): 26 | return np.array([[2.66601562e+00, 0.00000000e+00, 5.00000000e-01], 27 | [2.22044605e-16, 2.66601562e+00, 2.50000000e-01]]) 28 | 29 | 30 | @pytest.fixture(params=[0.25]) 31 | def offset(request): 32 | return request.param 33 | 34 | 35 | @pytest.fixture 36 | def keypoint3D(): 37 | keypoint = np.array([[4, 3, 9]]) 38 | return keypoint 39 | 40 | 41 | @pytest.fixture 42 | def rotation_matrix(): 43 | rotation_matrix = np.array([[0.99394977, -0.02341585, -0.10731083], 44 | [0.02910355, 0.9982362, 0.05174612], 45 | [0.10590983, -0.05455617, 0.99287811]]) 46 | return rotation_matrix 47 | 48 | 49 | @pytest.mark.parametrize("rotated_keypoint", [np.array([1, -2])]) 50 | def test_rotate_point2D(point2D_a, rotated_keypoint): 51 | point = rotate_point2D(point2D_a, -90) 52 | point = (np.array(point)).astype(np.int8) 53 | assert np.allclose(point, rotated_keypoint) 54 | 55 | 56 | @pytest.mark.parametrize("transformed_keypoint", [[399.73583984, 57 | 698.07958984]]) 58 | def test_transform_keypoints(keypoint, transform_matrix, transformed_keypoint): 59 | point = transform_keypoint(keypoint, transform_matrix) 60 | assert np.allclose(point, transformed_keypoint) 61 | 62 | 63 | @pytest.mark.parametrize("shifted_keypoint", [[150.0, 262.0]]) 64 | def test_add_offset_to_point(keypoint, offset, shifted_keypoint): 65 | point = add_offset_to_point(keypoint, offset) 66 | assert np.allclose(point, shifted_keypoint) 67 | 68 | 69 | @pytest.mark.parametrize( 70 | "rotated_keypoint", [[2.93975406, 3.57683788, 9.1958738]]) 71 | def test_rotate_keypoints(rotation_matrix, keypoint3D, rotated_keypoint): 72 | calculated_rotated_keypoint = rotate_keypoints3D( 73 | np.expand_dims(rotation_matrix, 0), keypoint3D) 74 | assert np.allclose(rotated_keypoint, calculated_rotated_keypoint) -------------------------------------------------------------------------------- /tests/paz/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/tests/paz/models/__init__.py -------------------------------------------------------------------------------- /tests/paz/models/detection/ssd300_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from paz.models import SSD300 3 | 4 | 5 | def test_SSD300_VOC_VOC(): 6 | try: 7 | SSD300(num_classes=21, 8 | base_weights='VOC', 9 | head_weights='VOC') 10 | except ValueError as valuerror: 11 | pytest.fail("SSD VOC-VOC loading failed: {}". format(valuerror)) 12 | 13 | 14 | def test_SSD300_VOC_None(): 15 | try: 16 | SSD300(num_classes=2, 17 | base_weights='VOC', 18 | head_weights=None) 19 | except ValueError as valuerror: 20 | pytest.fail("SSD VOC-None loading failed: {}". format(valuerror)) 21 | 22 | 23 | def test_SSD300_VGG_None(): 24 | try: 25 | SSD300(num_classes=21, 26 | base_weights='VGG', 27 | head_weights=None) 28 | except ValueError as valuerror: 29 | pytest.fail("SSD VGG-None loading failed: {}". format(valuerror)) 30 | -------------------------------------------------------------------------------- /tests/paz/models/segmentation/unet_test.py: -------------------------------------------------------------------------------- 1 | from paz.models import UNET_VGG16, UNET_VGG19, UNET_RESNET50 2 | 3 | 4 | def test_shapes_of_UNETVGG19(): 5 | model = UNET_VGG19(weights=None) 6 | assert model.input_shape[1:3] == model.output_shape[1:3] 7 | 8 | 9 | def test_shapes_of_UNETVGG16(): 10 | model = UNET_VGG16(weights=None) 11 | assert model.input_shape[1:3] == model.output_shape[1:3] 12 | 13 | 14 | def test_shapes_of_UNET_RESNET50V2(): 15 | model = UNET_RESNET50(weights=None) 16 | assert model.input_shape[1:3] == model.output_shape[1:3] 17 | -------------------------------------------------------------------------------- /tests/paz/optimization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/tests/paz/optimization/__init__.py -------------------------------------------------------------------------------- /tests/paz/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/tests/paz/pipelines/__init__.py -------------------------------------------------------------------------------- /tests/paz/pipelines/classification_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import numpy as np 4 | 5 | from tensorflow.keras.utils import get_file 6 | from paz.backend.image import load_image 7 | from paz.pipelines.classification import MiniXceptionFER 8 | 9 | 10 | @pytest.fixture 11 | def image_with_face(): 12 | URL = ('https://github.com/oarriaga/altamira-data/releases/download' 13 | '/v0.9.1/image_with_face.jpg') 14 | filename = os.path.basename(URL) 15 | fullpath = get_file(filename, URL, cache_subdir='paz/tests') 16 | image = load_image(fullpath) 17 | return image 18 | 19 | 20 | @pytest.fixture 21 | def labeled_scores(): 22 | return np.array([[6.9692191e-03, 6.5534514e-05, 3.6219540e-03, 23 | 8.2652807e-01, 4.4210157e-03, 1.0055617e-03, 24 | 1.5738861e-01]]) 25 | 26 | 27 | @pytest.fixture 28 | def labeled_emotion(): 29 | return 'happy' 30 | 31 | 32 | def test_MiniXceptionFER(image_with_face, labeled_emotion, labeled_scores): 33 | classifier = MiniXceptionFER() 34 | inferences = classifier(image_with_face) 35 | assert inferences['class_name'] == labeled_emotion 36 | assert np.allclose(inferences['scores'], labeled_scores) 37 | -------------------------------------------------------------------------------- /tests/paz/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oarriaga/paz/b87754a3769d7cc955ce6cfde746d6cddeff540f/tests/paz/processors/__init__.py -------------------------------------------------------------------------------- /tests/paz/processors/draw_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from paz import processors as pr 3 | 4 | 5 | def test_DrawBoxes2D_with_invalid_class_names_type(): 6 | with pytest.raises(TypeError): 7 | class_names = 'Face' 8 | colors = [[255, 0, 0]] 9 | pr.DrawBoxes2D(class_names, colors) 10 | 11 | 12 | def test_DrawBoxes2D_with_invalid_colors_type(): 13 | with pytest.raises(TypeError): 14 | class_names = ['Face'] 15 | colors = [255, 0, 0] 16 | pr.DrawBoxes2D(class_names, colors) 17 | -------------------------------------------------------------------------------- /tests/paz/processors/geometric_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import numpy as np 4 | 5 | from tensorflow.keras.utils import get_file 6 | from paz import processors as pr 7 | from paz.backend.image import load_image 8 | 9 | 10 | @pytest.fixture 11 | def boxes_with_label(): 12 | box_with_label = np.array([[47., 239., 194., 370., 12.], 13 | [7., 11., 351., 497., 15.], 14 | [138., 199., 206., 300., 19.], 15 | [122., 154., 214., 194., 18.], 16 | [238., 155., 306., 204., 9.]]) 17 | return box_with_label 18 | 19 | 20 | def test_expand_pass_by_reference(boxes_with_label): 21 | initial_boxes_with_label = boxes_with_label.copy() 22 | expand = pr.Expand(probability=1.0) 23 | expand(np.ones((300, 300, 3)), boxes_with_label) 24 | assert np.all(initial_boxes_with_label == boxes_with_label) 25 | 26 | 27 | def test_random_sample_crop_pass_by_reference(boxes_with_label): 28 | initial_boxes_with_label = boxes_with_label.copy() 29 | crop = pr.RandomSampleCrop(probability=1.0) 30 | crop(np.ones((300, 300, 3)), boxes_with_label) 31 | assert np.all(initial_boxes_with_label == boxes_with_label) 32 | 33 | 34 | def test_random_sample_crop(): 35 | URL = ('https://github.com/oarriaga/altamira-data/releases/download' 36 | '/v0.9/object_detection_augmentation.png') 37 | filename = os.path.basename(URL) 38 | image_fullpath = get_file(filename, URL, cache_subdir='paz/tutorials') 39 | true_image = load_image(image_fullpath) 40 | H, W = true_image.shape[:2] 41 | true_boxes = np.array([[200 / W, 60 / H, 300 / W, 200 / H, 1], 42 | [100 / W, 90 / H, 400 / W, 300 / H, 2]]) 43 | 44 | class AugmentBoxes(pr.SequentialProcessor): 45 | def __init__(self, mean=pr.BGR_IMAGENET_MEAN): 46 | super(AugmentBoxes, self).__init__() 47 | self.add(pr.ToImageBoxCoordinates()) 48 | self.add(pr.RandomSampleCrop()) 49 | self.add(pr.ToNormalizedBoxCoordinates()) 50 | 51 | augment_boxes = AugmentBoxes() 52 | for _ in range(1000): 53 | crop_image, crop_boxes = augment_boxes(true_image, true_boxes) 54 | assert len(crop_boxes.shape) == 2 55 | assert np.alltrue(crop_boxes[:, 0] < crop_boxes[:, 2]) 56 | assert np.alltrue(crop_boxes[:, 1] < crop_boxes[:, 3]) 57 | -------------------------------------------------------------------------------- /tests/paz/processors/pose_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import paz.processors as pr 4 | 5 | 6 | @pytest.mark.parametrize(('rotation_size, translation_size'), 7 | [(10, 50), 8 | (20, 40), 9 | (30, 30), 10 | (40, 20), 11 | (50, 10)]) 12 | def test_ConcatenatePoses(rotation_size, translation_size): 13 | num_rows = 10 14 | rotations = np.random.rand(num_rows, rotation_size) 15 | translations = np.random.rand(num_rows, translation_size) 16 | concatenate_poses = pr.ConcatenatePoses() 17 | poses = concatenate_poses(rotations, translations) 18 | assert np.all(poses[:, :rotation_size] == rotations) 19 | assert np.all(poses[:, rotation_size:] == translations) 20 | assert (poses.shape == (num_rows, rotation_size + translation_size)) 21 | 22 | 23 | @pytest.mark.parametrize(('pose_size, scale'), 24 | [(10, 0.1), 25 | (20, 0.2), 26 | (30, 0.3), 27 | (40, 0.4), 28 | (50, 0.5)]) 29 | def test_ConcatenateScale(pose_size, scale): 30 | num_rows = 10 31 | poses = np.random.rand(num_rows, pose_size) 32 | concatenate_scale = pr.ConcatenateScale() 33 | poses_concatenated = concatenate_scale(poses, scale) 34 | assert np.all(poses_concatenated[:, :-1] == poses) 35 | assert np.all(poses_concatenated[:, -1] == scale) 36 | assert (poses_concatenated.shape == (num_rows, pose_size + 1)) 37 | -------------------------------------------------------------------------------- /tests/paz/utils/documentation_test.py: -------------------------------------------------------------------------------- 1 | from paz.utils import docstring 2 | 3 | 4 | def documented_function(): 5 | """This is a dummy function 6 | """ 7 | return None 8 | 9 | 10 | @docstring(documented_function) 11 | def undocumented_function(): 12 | return None 13 | 14 | 15 | def test_docstring(): 16 | assert documented_function.__doc__ == undocumented_function.__doc__ 17 | --------------------------------------------------------------------------------