├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── demo ├── detection │ ├── detr │ │ └── detr_demo.ipynb │ └── yolov3 │ │ └── yolov3_demo.ipynb ├── face_detection │ └── facenet │ │ ├── imgs │ │ ├── BarackObama.jpeg │ │ ├── ManojBajpayee.jpeg │ │ └── MarkZuckerberg.jpeg │ │ ├── multiple_img.py │ │ ├── single_img.py │ │ └── zucktest.jpeg ├── gans │ └── deep_convolutional_gan │ │ ├── __init__.py │ │ └── dcgan_infernence_nb.ipynb ├── misc │ └── NeuralStyleTransfer │ │ └── nst_demo.ipynb └── segmentation │ └── pspnet │ └── pspnet_demo.ipynb ├── docs ├── contributing.md ├── developing.md └── weights.md ├── pyvision ├── __init__.py ├── detection │ ├── __init__.py │ ├── detr │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── weights_download.json │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── classes.txt │ │ │ └── coco.names │ │ ├── detr.py │ │ ├── model.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── backbone.py │ │ │ └── transformers.py │ │ ├── readme.md │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── box_utils.py │ │ │ ├── misc.py │ │ │ ├── pallete │ │ │ └── position_encoding.py │ ├── efficientdet │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── dataset_coco.yaml │ │ │ └── weights_download.json │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── losses.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── model.py │ │ ├── readme.md │ │ └── train.py │ ├── readme.md │ └── yolov3 │ │ ├── __init__.py │ │ ├── config │ │ ├── __init__.py │ │ ├── models_supported.txt │ │ ├── weights_download.json │ │ ├── yolov3-tiny.cfg │ │ └── yolov3.cfg │ │ ├── darknet.py │ │ ├── data │ │ ├── __init__.py │ │ └── coco.names │ │ ├── issues.md │ │ ├── model.py │ │ ├── readme.md │ │ └── utils │ │ ├── __init__.py │ │ ├── box_utils.py │ │ ├── layer_factory.py │ │ ├── pallete │ │ ├── parse_config.py │ │ ├── preprocess.py │ │ └── utils.py ├── face_detection │ ├── __init__.py │ └── facenet │ │ ├── __init__.py │ │ ├── config │ │ ├── __init__.py │ │ └── weights_download.json │ │ ├── model.py │ │ ├── models │ │ ├── InceptionResnetV1.py │ │ └── __init__.py │ │ ├── readme.md │ │ └── utils │ │ ├── __init__.py │ │ ├── extract_face.py │ │ └── layer_factory.py ├── gans │ ├── __init__.py │ ├── deep_convolutional_gan │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── weights_download.json │ │ ├── dcgan │ │ │ ├── __init__.py │ │ │ └── dcgan.py │ │ ├── docs │ │ │ └── documentation.md │ │ ├── model.py │ │ └── results │ │ │ ├── losses.png │ │ │ ├── result.png │ │ │ ├── result2.png │ │ │ └── results_img │ │ │ ├── __init__.py │ │ │ ├── generated_image_0.jpg │ │ │ ├── generated_image_1.jpg │ │ │ ├── generated_image_10.jpg │ │ │ ├── generated_image_11.jpg │ │ │ ├── generated_image_12.jpg │ │ │ ├── generated_image_13.jpg │ │ │ ├── generated_image_14.jpg │ │ │ ├── generated_image_15.jpg │ │ │ ├── generated_image_2.jpg │ │ │ ├── generated_image_3.jpg │ │ │ ├── generated_image_4.jpg │ │ │ ├── generated_image_5.jpg │ │ │ ├── generated_image_6.jpg │ │ │ ├── generated_image_7.jpg │ │ │ ├── generated_image_8.jpg │ │ │ └── generated_image_9.jpg │ └── wasserstein_gan │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config │ │ ├── __init__.py │ │ └── weights_download.json │ │ ├── current_output_imgs │ │ ├── __init__.py │ │ └── test36.png │ │ ├── model.py │ │ ├── train.py │ │ └── wgan.py ├── misc │ ├── NeuralStyleTransfer │ │ ├── Examples │ │ │ ├── images │ │ │ │ ├── content1.jpg │ │ │ │ ├── content2.jpg │ │ │ │ ├── content3.jpg │ │ │ │ ├── content4.jpg │ │ │ │ ├── content5.jpg │ │ │ │ ├── content6.jpeg │ │ │ │ ├── style1.jpg │ │ │ │ ├── style6.jpg │ │ │ │ └── style7.jpg │ │ │ └── output │ │ │ │ ├── content1+style6.png │ │ │ │ ├── content2+style1.png │ │ │ │ ├── content3+style6.png │ │ │ │ ├── content4+style1.png │ │ │ │ ├── content4+style7.png │ │ │ │ ├── content5+style1.png │ │ │ │ └── content6+style7.png │ │ ├── README.md │ │ ├── __init__.py │ │ └── neural_style.py │ ├── __init__.py │ ├── mtcnn │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config │ │ │ └── weights_download.json │ │ ├── detector.py │ │ ├── model.py │ │ ├── nets.py │ │ ├── requirements.txt │ │ ├── stage_one.py │ │ ├── stage_two.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── visualize.py │ └── noise2noise │ │ ├── README.md │ │ ├── __init__.py │ │ ├── assets │ │ ├── gauss_1.png │ │ ├── gauss_3.png │ │ ├── gdenoised_1.png │ │ ├── gdenoised_3.png │ │ ├── tdenoised_1.png │ │ ├── tdenoised_3.png │ │ ├── text_1.png │ │ └── text_3.png │ │ ├── config │ │ ├── __init__.py │ │ └── weights_download.json │ │ ├── dataset.py │ │ ├── model.py │ │ └── unet.py └── segmentation │ ├── __init__.py │ ├── fcn │ ├── README.md │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ └── weights_download.json │ ├── data │ │ ├── __init__.py │ │ ├── voc2012_classes.txt │ │ └── voc2012_colors.txt │ ├── examples │ │ ├── 16.jpg │ │ ├── 16_101_blend.png │ │ ├── 16_101_map.png │ │ ├── 16_50_blend.png │ │ ├── 16_50_map.png │ │ ├── pascal_voc.jpg │ │ ├── pascal_voc_101_blend.png │ │ ├── pascal_voc_101_map.png │ │ ├── pascal_voc_50_blend.png │ │ └── pascal_voc_50_map.png │ ├── model.py │ ├── models │ │ ├── __init__.py │ │ ├── backbone.py │ │ └── fcn_net.py │ └── util │ │ ├── __init__.py │ │ └── utils.py │ └── pspnet │ ├── __init__.py │ ├── config │ ├── __init__.py │ ├── data_config.json │ └── weights_download.json │ ├── data │ ├── __init__.py │ ├── ade20k_classes.txt │ ├── ade20k_colors.txt │ ├── cityscapes_classes.txt │ ├── cityscapes_colors.txt │ ├── voc2012_classes.txt │ └── voc2012_colors.txt │ ├── examples │ ├── 16.jpg │ ├── 16_blend.png │ ├── 16_map.png │ ├── ade20k.jpg │ ├── ade20k_blend.png │ ├── ade20k_map.png │ ├── cityscape.png │ ├── cityscapes_blend.png │ ├── cityscapes_map.png │ ├── pascal_voc.jpg │ ├── pascal_voc_blend.png │ └── pascal_voc_map.png │ ├── model.py │ ├── models │ ├── __init__.py │ ├── backbone.py │ └── pspnet.py │ ├── readme.md │ └── util │ ├── __init__.py │ └── utils.py ├── requirements.txt ├── setup.py ├── test.sh └── tests ├── detection ├── detr │ ├── cars_test.jpg │ ├── detr_test.py │ └── zebra_test.jpg ├── effdet │ ├── 2.jpg │ ├── 3.jpg │ ├── __init__.py │ └── test_effdet.py └── yolov3 │ ├── cars_test.jpg │ ├── yolo_test.py │ └── zebra_test.jpg ├── face_detection └── facenet │ ├── imgs │ ├── BarackObama.jpeg │ ├── ManojBajpayee.jpeg │ └── MarkZuckerberg.jpeg │ ├── multiple_img.py │ ├── single_img.py │ └── zucktest.jpeg ├── gans ├── deep_convolutional_gan │ └── gan_test.py └── wasserstein_gan │ └── gan_test.py ├── misc ├── NeuralStyleTransfer │ └── nst_test.py ├── mtcnn │ ├── images │ │ ├── class2.jpg │ │ ├── designated-survivor-2.jpg │ │ ├── person1.jpeg │ │ ├── scenery.jpeg │ │ └── test5.jpg │ ├── mtcnn_test.py │ └── net_test.py └── noise2noise │ ├── Output_gaussian │ ├── denoised_1.png │ ├── denoised_2.png │ ├── denoised_3.png │ ├── source_1.png │ ├── source_2.png │ └── source_3.png │ ├── Output_text │ ├── denoised_1.png │ ├── denoised_2.png │ ├── denoised_3.png │ ├── source_1.png │ ├── source_2.png │ └── source_3.png │ ├── n2n_test.py │ ├── test.py │ └── test_images │ ├── test.jpg │ ├── test1.jpg │ └── test2.jpg ├── readme.md └── segmentation ├── fcn ├── fcn101.py └── fcn50.py └── pspnet └── pspnet_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .DS_Store 3 | __pycache__/ 4 | pyvision/detection/yolov3/weights/ 5 | pyvision/misc/mtcnn/weights/ 6 | pyvision/face_detection/facenet/weights/ 7 | pyvision/detection/detr/weights/ 8 | pyvision/segmentation/pspnet/weights/ 9 | pyvision/misc/noise2noise/weights/ 10 | pyvision/segmentation/fcn/weights/ 11 | test.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Pranjal Datta 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyvision/detection/yolov3/config/*.cfg 2 | include pyvision/detection/yolov3/config/*.json 3 | include pyvision/detection/yolov3/config/*.txt 4 | include pyvision/detection/yolov3/data/*.names 5 | include pyvision/detection/yolov3/utils/pallete 6 | 7 | include pyvision/detection/detr/utils/pallete 8 | include pyvision/detection/detr/config/*.json 9 | include pyvision/detection/detr/data/*.txt 10 | 11 | include pyvision/detection/efficientdet/config/*.json 12 | include pyvision/detection/efficientdet/config/*.yaml 13 | 14 | include pyvision/segmentation/pspnet/config/*.json 15 | include pyvision/segmentation/pspnet/data/*.txt 16 | 17 | include pyvision/segmentation/fcn/config/*.json 18 | include pyvision/segmentation/fcn/data/*.txt 19 | 20 | include pyvision/misc/noise2noise/config/*.json 21 | 22 | include pyvision/misc/mtcnn/config/*.json 23 | 24 | include pyvision/face_detection/facenet/config/*.json 25 | 26 | include pyvision/gans/wasserstein_gan/config/*.json 27 | 28 | recursive-include pyvision *.md 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # PyVision 3 | 4 | Ready-to-use implementations of some of the most common computer vision algorithms. 5 | 6 | In PyTorch only! 7 | 8 | ## Currently available architectures 9 | 10 | - **Multi Task Cascaded Convolutional Neural Network (MTCNN)** : A SOTA face and facial-landmark detection architecture. Check out [this](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/misc/mtcnn) out for more details. 11 | 12 | - **YOLOv3:** The SOTA object detection algorithm. For more details, read the [docs](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/detection/yolov3). 13 | 14 | - **FaceNet: A Unified Embedding for Face Recognition and Clustering**: One of the most popular architectures used for facial recognition. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/face_detection/facenet). 15 | 16 | - **DEtection TRansformer (DETR)**: An end-to-end object detection architecture using transformers. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/detection/detr). 17 | 18 | - **Neural Style Transfer (NST)**: Transfer *style* from one Image into another. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/misc/NeuralStyleTransfer) 19 | 20 | - **Pyramid Scene Parsing Network (PSPNet)**: Instance Segmentation architecture that makes use of *Pyramid Pooling Module* for better results. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/segmentation/pspnet). 21 | 22 | For full list of architectures that has been ported or are **in the process** of being ported, check [here](https://github.com/pranjaldatta/PyVision/blob/master/docs/developing.md). 23 | 24 | ## Installation 25 | 26 | 1. Run the code in your terminal to clone the master branch which contains the working code 27 | 28 | ``` 29 | $ git clone https://github.com/pranjaldatta/PyVision.git --single-branch --branch master 30 | ``` 31 | 32 | 2. Then, go to the repository root by pasting the command given below into your terminal 33 | 34 | ``` 35 | $ cd PyVision 36 | ``` 37 | 38 | 3. Run the following command in the terminal to install PyVision into the current virtual or conda environment 39 | 40 | ``` 41 | $ pip install . 42 | ``` 43 | 44 | 4. You are good to go!. 45 | 46 | ## Contributing 47 | 48 | For contribution guidelines, please look [here](https://github.com/pranjaldatta/PyVision/tree/master/docs/contributing.md). Contributions are always welcome! 49 | 50 | ## ToDo 51 | 52 | - [ ] Populate with more architectures (obviously) 53 | 54 | - [x] ~~Come up with an efficient way to make the repository minimal i.e. assets (like weights) will only be downloaded on as-you-need basis.~~ All weights are hosted on SRM-MIC Google drive and downloaded using gdown 55 | 56 | - [x] ~~Come up with an efficient way to ensure that heavy architecture specific dependecies are installed only when required.~~ All heavy assets are installed only when model is being used. 57 | 58 | ## Note 59 | 60 | Currently, its working only in pre-configured conda environment with all dependencies installed. 61 | 62 | **P.S.** Star the repo if you liked the work! 63 | -------------------------------------------------------------------------------- /demo/face_detection/facenet/imgs/BarackObama.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/imgs/BarackObama.jpeg -------------------------------------------------------------------------------- /demo/face_detection/facenet/imgs/ManojBajpayee.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/imgs/ManojBajpayee.jpeg -------------------------------------------------------------------------------- /demo/face_detection/facenet/imgs/MarkZuckerberg.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/imgs/MarkZuckerberg.jpeg -------------------------------------------------------------------------------- /demo/face_detection/facenet/multiple_img.py: -------------------------------------------------------------------------------- 1 | from pyvision.face_detection.facenet import Facenet 2 | 3 | # In this example, we take all the imgs from the ./imgs folder and 4 | # generate embeddings for them. We also associate each embedding with their 5 | # filename which act as 'true labels'. Then we use these embeddings to 'classify' 6 | # whether a supplied image belongs to any one of given categories 7 | 8 | # First we instantiate the facenet object. saveLoc is the path to the 9 | # folder wherein the embeddings will be saved. By default it will be saved 10 | # as "embeddings.pkl" but can be changed with the "saveName" param 11 | fc = Facenet(saveLoc="save", saveName="embeddings2.pkl") 12 | 13 | embeddings = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs") 14 | 15 | did_match, preds, loss = fc.compare_embeddings( 16 | img="demo/face_detection/facenet/zucktest.jpeg", 17 | embedLoc="save/embeddings2.pkl", 18 | embeddings=None, 19 | label="MarkZuckerberg" 20 | ) 21 | print(did_match, preds, loss) 22 | print("For 'True' Image, we get: ", did_match) 23 | 24 | -------------------------------------------------------------------------------- /demo/face_detection/facenet/single_img.py: -------------------------------------------------------------------------------- 1 | from pyvision.face_detection.facenet import Facenet 2 | 3 | # In this example we take a single image from the ./imgs folder 4 | # Generate embeddings and store them. Then use those embeddings to 5 | # check whether a previously unseen image is classified accurately or not 6 | 7 | 8 | # First we instantiate the facenet object. saveLoc is the path to the 9 | # folder wherein the embeddings will be saved. By default it will be saved 10 | # as "embeddings.pkl" but can be changed with the "saveName" param 11 | fc = Facenet(saveLoc="save/") 12 | 13 | # generate embeds 14 | _ = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs/BarackObama.jpeg", label="Barack Obama") 15 | 16 | # now we compare it against a "False" image 17 | did_match, pred, loss = fc.compare_embeddings(None, img="demo/face_detection/facenet/imgs/ManojBajpayee.jpeg", label="Barack Obama", embedLoc="save/embeddings.pkl") 18 | print(did_match, pred, loss) 19 | print("Comparing against 'False' image, we get: ", did_match) -------------------------------------------------------------------------------- /demo/face_detection/facenet/zucktest.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/zucktest.jpeg -------------------------------------------------------------------------------- /demo/gans/deep_convolutional_gan/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | PyVision is meant to be a collection of all major (popular or otherwise) computer vision architectures made available in an easy-to-use, extensible format so that the net number of lines required to use the architecture, whether for training or inference purposes are reduced to **three/four** lines. 4 | 5 | The objective of such an effort is two fold: 6 | 7 | - To develop a library for our own use, that simplifies computer vision architecture use so that developers can focus on the project they are working on and not bother about the nuances and headaches of complex implementations. 8 | 9 | - To learn the nuances and deal with the headaches of complex architecture implementations and hopefully, become better engineers! 10 | 11 | 12 | ## Why Contribute? 13 | 14 | - Learn the details of seemingly complex architectures. 15 | 16 | - Learn the nuances of implementation. 17 | 18 | - Help make computer vision easier and more approachable! 19 | 20 | ## How to Contribute? 21 | 22 | The following steps detail roughly the contribution workflow 23 | 24 | 1. Decide on an architecture you want to implement! Once decided, **open an issue** at [issues](https://github.com/pranjaldatta/PyVision/issues). Be sure to classify the architecture under a given category. *For Example*, YOLOv3 falls under the category of *detection*. If unsure, ask in the issue. 25 | 26 | 2. Once you are sure no one else is working on the given architecture, clone the master repository. 27 | 28 | 3. Once in local repository root, create a branch with your name and architecture you are working on. *An example branch name:* **pranjal-yolov3**. 29 | To create a new branch, run the following command in the local repo root from your terminal, 30 | 31 | ``` 32 | $ git checkout -b 33 | ``` 34 | 35 | 4. Code! 36 | 37 | 5. **Important**: The most critical issue here is regarding the model **weights**. The weights of a given model, **does not come** pre-loaded with the repository. This is done because, 38 | - To reduce the repository size (obviously). 39 | - GitHub doesn't allow hosting of files of sizes more than 100 MB. 40 | - Also, making model weights available **lazily** is more efficient as people are downloading **only** those weights that they are using 41 | 42 | So, whats the solution? For the detailed process check [this](https://github.com/pranjaldatta/PyVision/blob/master/docs/weights.md). 43 | 44 | **TL;DR**: 45 | - Provide the maintainer with links to the downloadable weights in the issue. The maintainer will download the weights and upload it to SRM-MIC's Google Drive. 46 | 47 | - Will provide the **file id** to the contributor. 48 | 49 | - Download the weights in a **lazy** manner only when the **model is being initialized** using **gdown** 50 | 51 | - Check YOLOv3's download_weights() method for reference. 52 | 53 | 6. Add tests! The tests should be self-contained and folder structure should be maintained in the [tests folder](https://github.com/pranjaldatta/PyVision/blob/master/tests) as it is maintained in the repo root. 54 | 55 | 7. **Very Important**: Add docs! Add docstrings to classes, functions. **How to use** along with example code is a must. Try to cover everything in documentation whether as markup or in source code. 56 | 57 | 8. Once you are done, push the branch **referring** the issue! Resolve any problems/inconsistencies brought to your notice and wait for the merge! 58 | -------------------------------------------------------------------------------- /docs/developing.md: -------------------------------------------------------------------------------- 1 | # Architectures Being Ported to PyVision 2 | 3 | - [x] **Multi Task Cascaded Convolutional Neural Network in PyTorch (MTCNN)**: Link to [paper](https://arxiv.org/pdf/1604.02878.pdf). Contributed by [Sashrika Surya](https://github.com/sashrika15) and [Pranjal Datta](https://github.com/pranjaldatta). Can be accessed [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/mtcnn). 4 | 5 | - [x] **You Only Look Once v3 (YOLOv3)**: Link to [paper](https://arxiv.org/pdf/1804.02767.pdf). Contributed by [Pranjal Datta](https://github.com/pranjaldatta). Can be accessed [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/detection/yolov3). 6 | 7 | - [x] **Neural Style Transfer**: Link to [paper](https://arxiv.org/pdf/1508.06576.pdf). Being contributed by [Anushka Choudhary](https://github.com/Anushka0805) . 8 | 9 | - [x] **Wasserstein GAN**: Link to [paper](https://arxiv.org/pdf/1701.07875.pdf). Being contributed by [Paras Rawat](https://github.com/TrizteX). 10 | 11 | - [x] **Noise2Noise**: Link to [paper](https://arxiv.org/pdf/1803.04189.pdf). Being contributed by [Sashrika Surya](https://github.com/sashrika15). 12 | 13 | - [x] **FaceNet**: Link to [paper](https://arxiv.org/pdf/1503.03832.pdf). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta) 14 | 15 | - [x] **DEtection TRansformer (DETR)**: Link to [paper](https://scontent.fccu3-1.fna.fbcdn.net/v/t39.8562-6/101177000_245125840263462_1160672288488554496_n.pdf?_nc_cat=104&_nc_sid=ae5e01&_nc_ohc=sU420_xbxT8AX9LfbKI&_nc_ht=scontent.fccu3-1.fna&oh=455f6284084dfccdf0b9b39a878d290f&oe=5F0EB147). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta) 16 | 17 | - [x] **Pyramid Scene Parsing Network (PSPNet)**: Link to [paper](https://arxiv.org/pdf/1612.01105.pdf). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta). 18 | 19 | - [x] **Fully Convolutional Network for Segmentation (FCNNet)**: Link to [paper](https://arxiv.org/pdf/1605.06211v1.pdf). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta). 20 | 21 | - [x] **Deep Convolutional GAN (DCGAN)**: Link to [paper](https://arxiv.org/abs/1511.06434.pdf). Contributed by [Srijarko Roy](https://github.com/srijarkoroy) and [Indira Dutta](https://github.com/indiradutta). 22 | -------------------------------------------------------------------------------- /docs/weights.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/docs/weights.md -------------------------------------------------------------------------------- /pyvision/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) 4 | -------------------------------------------------------------------------------- /pyvision/detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/detr/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import DETR, available_models -------------------------------------------------------------------------------- /pyvision/detection/detr/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/config/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/detr/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "detr-resnet50" : "1yKx023hJV_CG6vqDRYSc2YV0FqiBGoXx", 3 | "detr-resnet101": "1koBQ-cIbHGwpafzGNDJCRRpf89trTpuR" 4 | } 5 | -------------------------------------------------------------------------------- /pyvision/detection/detr/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/data/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/detr/data/classes.txt: -------------------------------------------------------------------------------- 1 | N/A 2 | person 3 | bicycle 4 | car 5 | motorcycle 6 | airplane 7 | bus 8 | train 9 | truck 10 | boat 11 | traffic light 12 | fire hydrant 13 | N/A 14 | stop sign 15 | parking meter 16 | bench 17 | bird 18 | cat 19 | dog 20 | horse 21 | sheep 22 | cow 23 | elephant 24 | bear 25 | zebra 26 | giraffe 27 | N/A 28 | backpack 29 | umbrella 30 | N/A 31 | N/A 32 | handbag 33 | tie 34 | suitcase 35 | frisbee 36 | skis 37 | snowboard 38 | sports ball 39 | kite 40 | baseball bat 41 | baseball glove 42 | skateboard 43 | surfboard 44 | tennis racket 45 | bottle 46 | N/A 47 | wine glass 48 | cup 49 | fork 50 | knife 51 | spoon 52 | bowl 53 | banana 54 | apple 55 | sandwich 56 | orange 57 | broccoli 58 | carrot 59 | hot dog 60 | pizza 61 | donut 62 | cake 63 | chair 64 | couch 65 | potted plant 66 | bed 67 | N/A 68 | dining table 69 | N/A 70 | N/A 71 | toilet 72 | N/A 73 | tv 74 | laptop 75 | mouse 76 | remote 77 | keyboard 78 | cell phone 79 | microwave 80 | oven 81 | toaster 82 | sink 83 | refrigerator 84 | N/A 85 | book 86 | clock 87 | vase 88 | scissors 89 | teddy bear 90 | hair drier 91 | toothbrush 92 | -------------------------------------------------------------------------------- /pyvision/detection/detr/data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /pyvision/detection/detr/detr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils.misc import NestedTensor, nested_tensor_from_tensor_list 6 | from .utils.box_utils import box_wh_to_xy 7 | 8 | class MLP(nn.Module): 9 | """ 10 | A very simple multi layer perceptron also known as FFN 11 | """ 12 | def __init__(self, in_dims, hidden_dims, out_dims, num_layers): 13 | 14 | super().__init__() 15 | 16 | self.num_layers = num_layers 17 | h = [hidden_dims] * (num_layers - 1) 18 | self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dims]+h, h+[out_dims])) 19 | 20 | def forward(self, x): 21 | 22 | for i, layer in enumerate(self.layers): 23 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) 24 | return x 25 | 26 | class DETR_model(nn.Module): 27 | """ 28 | The main detr module that performs the forward pass 29 | """ 30 | def __init__(self, backbone, transformer, num_classes, num_queries, aux_loss=False): 31 | """The module that builds the detr model 32 | 33 | Parameters 34 | ---------- 35 | backbone : [nn.Module] 36 | the backbone to be used by the detr model. defined in backbone.py 37 | transformer : [nn.Module] 38 | the transformer to be used by the detr model. define din transformers.py 39 | num_classes : [int] 40 | number of object classses 41 | num_queries : [int] 42 | number of object queries i.e. detection slot i.e. the maximum number 43 | of objects that can be detected in a single image. For COCO, its 100 44 | aux_loss : bool, optional 45 | if auxiliary decoding losses are to be used, by default False 46 | """ 47 | 48 | super().__init__() 49 | 50 | self.backbone = backbone 51 | self.transformer = transformer 52 | self.num_classes = num_classes 53 | self.num_queries = num_queries 54 | self.aux_loss = aux_loss 55 | 56 | hidden_dim = self.transformer.d_model 57 | self.class_embed = nn.Linear(hidden_dim, num_classes+1) 58 | self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3) 59 | self.query_embed = nn.Embedding(num_queries, hidden_dim) 60 | self.input_proj = nn.Conv2d(backbone.num_channels, hidden_dim, 1) 61 | 62 | def forward(self, samples: NestedTensor): 63 | """ 64 | The forward method defines a single forward pass for the model. 65 | It expects a TensorList obect which consists of : 66 | - samples.tensor: batched images of shape [B, 3, H, W] 67 | - samples.mask: a binary mask of shape [B, H, W] containing 1 padded pixels 68 | 69 | It returns the following elements: 70 | - pred_logits = classification logits for all queries. 71 | Shape = [B, num_queries, (num_classes + 1)] 72 | - pred_boxes = normalized box coordinates for all object queries represented as 73 | (center_x, center_y, height, width). These values are normalized 74 | between [0, 1] relative to size of each input image. utils/postprocess 75 | retrieves unnormalized bounding boxes 76 | - aux_outputs = Optional 77 | 78 | """ 79 | if not isinstance(samples, NestedTensor): 80 | samples = nested_tensor_from_tensor_list(samples) 81 | 82 | # we run it through the backbone 83 | features, pos = self.backbone(samples) 84 | 85 | # now we get the tensors and masks for each image and make the transformer pass 86 | src, mask = features[-1].decompose() 87 | assert mask is not None 88 | hs = self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos[-1])[0] 89 | 90 | output_class = self.class_embed(hs) 91 | output_coord = self.bbox_embed(hs).sigmoid() 92 | out = { 93 | "pred_logits": output_class[-1], 94 | "pred_boxes": output_coord[-1] 95 | } 96 | if self.aux_loss: 97 | raise NotImplementedError("aux_loss not implemented yet") 98 | 99 | return out 100 | 101 | class DETR_postprocess(nn.Module): 102 | """ 103 | This module converts DETR output into a simple usable format""" 104 | def __init__(self, conf=0.7): 105 | super(DETR_postprocess, self).__init__() 106 | self.conf = conf 107 | 108 | @torch.no_grad() 109 | def forward(self, outputs, target_size): 110 | """ 111 | Converts raw DETR outputs into a usable format i.e. it takes the raw 112 | normalized (wrt to [0, 1]) bounding boxes predictions, unnormalizes it, 113 | scales it to original image size and returns a list of dictionaries of 114 | format {score, class_label, box_coords} for all the detections in a given image 115 | """ 116 | raw_logits, raw_boxes = outputs['pred_logits'], outputs["pred_boxes"] 117 | 118 | assert len(raw_logits) == len(target_size), "raw_logits and target size len mismatch" 119 | assert target_size.shape[1] == 2, "target_size shape dim 1 not equal to 2" 120 | 121 | probs = F.softmax(raw_logits, -1)[0,:,:-1] 122 | keep = probs.max(-1).values > self.conf 123 | probs = probs[keep] 124 | probs, labels = probs[...,:-1].max(-1) 125 | 126 | # converting boxes to [x1, y1, x2, y2] format 127 | raw_boxes = raw_boxes[:,keep,:] 128 | boxes = box_wh_to_xy(raw_boxes) 129 | 130 | if boxes.device is not "cpu": 131 | boxes = boxes.cpu() 132 | 133 | # convert coords relative to [0, 1] to absolute [H, W] coords 134 | img_height, img_width = target_size.unbind(1) 135 | scale_factors = torch.stack([img_width, img_height, img_width, img_height], dim=1) 136 | boxes = boxes * scale_factors[:, :] # remove none 137 | 138 | results = [{"scores": s.item(), "labels": l.item(), "coords": c.tolist()} for s, l, c in zip(probs, labels, boxes[0])] 139 | 140 | return results 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /pyvision/detection/detr/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/models/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/detr/models/backbone.py: -------------------------------------------------------------------------------- 1 | """ 2 | The backbone modules are defined here 3 | """ 4 | 5 | from typing import List, Dict 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torchvision 11 | from torchvision.models._utils import IntermediateLayerGetter 12 | 13 | from ..utils.misc import NestedTensor 14 | 15 | class FrozenBatchNorm2d(nn.Module): 16 | """ 17 | Custom batch norm layers where the batch stats and affine parameters 18 | are fixed. 19 | 20 | Copy-paste from torchvision.misc.ops with added eps before rqsrt, without 21 | which any other models other than resnet[18, 24, 50, 101] produce nans 22 | """ 23 | def __init__(self, size): 24 | super(FrozenBatchNorm2d, self).__init__() 25 | 26 | self.register_buffer("weight", torch.ones(size)) 27 | self.register_buffer("bias", torch.zeros(size)) 28 | self.register_buffer("running_mean", torch.zeros(size)) 29 | self.register_buffer("running_var", torch.ones(size)) 30 | 31 | def _load_from_state_dict(self, state_dict, prefix, local_metadata,strict, 32 | missing_keys, unexpected_keys, error_msgs): 33 | 34 | num_batches_tracked_key = prefix + "num_batches_tracked" 35 | if num_batches_tracked_key in state_dict: 36 | del state_dict[num_batches_tracked_key] 37 | 38 | super(FrozenBatchNorm2d, self)._load_from_state_dict( 39 | state_dict, prefix, local_metadata,strict, 40 | missing_keys, unexpected_keys, error_msgs 41 | ) 42 | 43 | def forward(self, x): 44 | 45 | w = self.weight.reshape(1, -1, 1, 1) 46 | b = self.bias.reshape(1, -1, 1, 1) 47 | rv = self.running_var.reshape(1, -1, 1, 1) 48 | rm = self.running_mean.reshape(1, -1, 1, 1) 49 | eps = 1e-5 50 | scale = w * (rv + eps).rsqrt() 51 | bias = b - rm * scale 52 | 53 | return x * scale + bias 54 | 55 | 56 | class BackboneBase(nn.Module): 57 | 58 | def __init__(self, backbone:nn.Module, train_backbone: bool, num_channels: int, 59 | return_interim_layers: bool): 60 | 61 | super().__init__() 62 | 63 | for name, param in backbone.named_parameters(): 64 | if not train_backbone or "layer_2" not in name or "layer_3" not in name or "layer_4" not in name: 65 | param.requires_grad_(False) 66 | 67 | if return_interim_layers: 68 | return_layers = {"layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3"} 69 | else: 70 | return_layers = {"layer4": "0"} 71 | 72 | self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) 73 | self.num_channels = num_channels 74 | 75 | def forward(self, tensor_list: NestedTensor): 76 | 77 | xs = self.body(tensor_list.tensors) 78 | out: Dict[str, NestedTensor] = {} 79 | for name, x in xs.items(): 80 | m = tensor_list.mask 81 | assert m is not None 82 | mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0] 83 | out[name] = NestedTensor(x, mask) 84 | 85 | return out 86 | 87 | class Backbone(BackboneBase): 88 | """ 89 | Resnet backbone with frozen batchnorm 90 | """ 91 | def __init__(self, name: str, train_backbone: bool, return_interim_layers: bool, 92 | dilation: bool): 93 | 94 | backbone = getattr(torchvision.models, name)( 95 | replace_stride_with_dilation=[False, False, dilation], 96 | pretrained=False, norm_layer=FrozenBatchNorm2d 97 | ) # make pretrained true if requried 98 | 99 | num_channels = 512 if name in ('resnet18', 'resnet34') else 2048 100 | super().__init__(backbone, train_backbone, num_channels=num_channels, return_interim_layers=return_interim_layers) 101 | 102 | 103 | class Joiner(nn.Sequential): 104 | 105 | def __init__(self, backbone, position_embedding): 106 | super().__init__(backbone, position_embedding) 107 | 108 | def forward(self, tensor_list: NestedTensor): 109 | xs = self[0](tensor_list) 110 | out: List[NestedTensor] = [] 111 | pos = [] 112 | for name, x in xs.items(): 113 | out.append(x) 114 | pos.append(self[1](x).to(x.tensors.dtype)) # postional encoding 115 | 116 | return out, pos 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /pyvision/detection/detr/readme.md: -------------------------------------------------------------------------------- 1 | # End-to-End Object Detection with Transformers (DEtection TRansformer) 2 | 3 | DETR successfully uses Transformers in a conventional computer vision task such as detection. It reimagines the object detection pipeline and proposes an end-to-end pipeline. It views object detection as a **direct set prediction** problem. 4 | 5 | Check out [usage](#Usage) to start using DETR or check [summary](#Summary) for implementation details. 6 | 7 | Do check out the [paper](https://scontent.fccu3-1.fna.fbcdn.net/v/t39.8562-6/101177000_245125840263462_1160672288488554496_n.pdf?_nc_cat=104&_nc_sid=ae5e01&_nc_ohc=sU420_xbxT8AX9LfbKI&_nc_ht=scontent.fccu3-1.fna&oh=455f6284084dfccdf0b9b39a878d290f&oe=5F0EB147) or visit the original GitHub [repository](https://github.com/facebookresearch/detr?fbclid=IwAR3Eqm_JaWigPZfi5Uk3Pdi24u_Y198n2twoTSvYnn22XmiBAN92lC3TgYA). (The visit is worth it! Not only they outline their approach in detail but also they demonstrate through a [colab notebook](https://colab.research.google.com/github/facebookresearch/detr/blob/colab/notebooks/detr_demo.ipynb) how easy it is to make your own DETR in approx. 50 pytorch lines!) 8 | 9 | 10 | Check out this standalone [notebook](https://github.com/pranjaldatta/PyVision/blob/master/demo/detection/detr/detr_demo.ipynb) to see how easily you can use YOLOv3 in 3-4 lines! 11 | If the notebook link doesn't work, please look [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/detection/detr/detr_demo.ipynb) as a workaround. 12 | 13 | ## Summary 14 | 15 | Currently, PyVision DETR supports the models listed below. The pretrained models were provided by the authors. More details can be accessed [here](https://github.com/facebookresearch/detr?fbclid=IwAR3Eqm_JaWigPZfi5Uk3Pdi24u_Y198n2twoTSvYnn22XmiBAN92lC3TgYA). 16 | 17 | *Note:* Panoptic models are being added. 18 | 19 | | Model| Train Dataset| Test Dataset | box AP | Available | 20 | |--------|------------|------|---|----| 21 | | DETR-Resnet50 (default) | COCO2017-val5k | COCO2017-val5k | 42.0 | Yes | 22 | | DETR-Resnet101 | COCO2017-val5k | COCO2017-val5k | 43.5 | Yes 23 | 24 | ## Usage 25 | 26 | For detailed documentation and parameters, refer to docstrings/source code. 27 | 28 | **Brief Usage Summary:** 29 | 30 | The model setup is done via the DETR class exposed in *PyVison.detection.detr* . All model related configuration ranging from model type to confidence thresholds can be set throught the class constructor. 31 | 32 | Detection is done through the *detect()* method in the DETR class. Again, it offers some parameters for customisation that can override the general class configuration. Refer to source code docstrings for more details. 33 | 34 | **Quick Start:** 35 | 36 | - To use the default *DETR-Resnet50* model, 37 | 38 | ``` 39 | from pyvision.detection import detr 40 | 41 | detr_obj = detr.DETR() 42 | 43 | # time_taken is the total time taken to perform the detection 44 | # result is the list of detections in a dict format {"scores": ..., "labels": ..., "coords": ...} 45 | 46 | time_taken, result = detr_obj.detect() 47 | ``` 48 | 49 | - To use *DTER-Resnet101* model: 50 | 51 | ``` 52 | from pyvision.detection import detr 53 | 54 | detr_obj = detr.DETR(model="detr-resnet101") 55 | 56 | # time_taken is the total time taken to perform the detection 57 | # result is the list of detections in a dict format {"scores": ..., "labels": ..., "coords": ...} 58 | 59 | time_taken, result = detr_obj.detect() 60 | ``` 61 | 62 | - To list supported models, 63 | 64 | ``` 65 | from pyvision.detection import detr 66 | 67 | print(detr.available_models()) 68 | ``` 69 | 70 | - To run **tests**, from repo root, run the following command from terminal 71 | 72 | ``` 73 | $ python tests/detection/detr/detr_test.py 74 | ``` 75 | 76 | ## Contributor 77 | 78 | - [Pranjal Datta](https://github.com/pranjaldatta) 79 | -------------------------------------------------------------------------------- /pyvision/detection/detr/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/utils/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/detr/utils/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.ops.boxes import box_area 3 | import random 4 | import cv2 5 | 6 | clip = lambda x, x_min, x_max : x if x_min <= x <= x_max else (x_min if x < x_min else x_max) 7 | 8 | def box_wh_to_xy(x): 9 | """ 10 | Converts co-ordinates from (x, y, w, h) to 11 | (x1, y1, x2, y2) format 12 | """ 13 | x, y, w, h = x.unbind(-1) 14 | 15 | x1 = x - 0.5 * w 16 | y1 = y - 0.5 * h 17 | x2 = x + 0.5 * w 18 | y2 = y + 0.5 * h 19 | 20 | return torch.stack([x1, y1, x2, y2], dim=-1) 21 | 22 | def box_xy_to_wh(x): 23 | """ 24 | Converts co-ordinates from (x1, y1, x2, y2) to 25 | (x, y, w, h) 26 | """ 27 | x1, y1, x2, y2 = x.unbind(-1) 28 | 29 | x = (x2 + x1)/2 30 | y = (y2 + y1)/2 31 | w = (x2 - x1) 32 | h = (y2 - y1) 33 | 34 | return torch.stack([x, y, w, h], dim=-1) 35 | 36 | def iou(box1, box2): 37 | """ 38 | Returns the iou between two boxes 39 | """ 40 | area1 = box_area(box1) 41 | area2 = box_area(box2) 42 | 43 | top_left = torch.max(box1[:, None, :2], box2[:, :2]) # remove None! very Irritating 44 | bottom_right = torch.min(box1[:, None, 2:], box2[:, 2:]) # remove None! Very Irritating 45 | 46 | wh = (bottom_right - top_left).clamp(min=0) 47 | inter = wh[:, :, 0] * wh[:, :, 1] 48 | 49 | union = area1 + area2 - inter #check this 50 | 51 | iou = inter / union 52 | 53 | return iou, union 54 | 55 | def draw_box(orig_img, box, _cls, _cls_idx, colors, annotate): 56 | 57 | #img_w, img_h = orig_img.shape[0], orig_img.shape[1] 58 | #box[0:2]= [clip(x, 0.0, img_w) for x in box[0:2]] 59 | #box[1:4] = [clip(x, 0.0, img_h) for x in box[1:4]] 60 | 61 | coords1 = (int(box[0]), int(box[1])) 62 | coords2 = (int(box[2]), int(box[3])) 63 | 64 | 65 | label = "{0}".format(_cls) 66 | color = colors[_cls_idx] 67 | cv2.rectangle(orig_img, coords1, coords2, color, 2) 68 | if annotate: 69 | text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] 70 | coords2 = coords1[0] + text_size[0] + 3, coords1[1] + text_size[1] + 4 71 | cv2.rectangle(orig_img, coords1, coords2, color, -1) 72 | cv2.putText(orig_img, label, (coords1[0], coords1[1]+text_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1) 73 | 74 | return orig_img 75 | 76 | 77 | def clamp(results, w_lim, h_lim): 78 | 79 | for idx in range(len(results)): 80 | box = results[idx]["coords"] 81 | box[0:2]= [clip(x, 0.0, w_lim) for x in box[0:2]] 82 | box[1:4] = [clip(x, 0.0, h_lim) for x in box[1:4]] 83 | results[idx]["coords"] = box 84 | return results 85 | 86 | -------------------------------------------------------------------------------- /pyvision/detection/detr/utils/misc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from typing import List, Optional 6 | 7 | class NestedTensor(object): 8 | 9 | def __init__(self, tensors, mask: Optional[torch.Tensor]): 10 | 11 | self.tensors = tensors 12 | self.mask = mask 13 | 14 | def to(self, device): 15 | 16 | cast_tensor = self.tensors.to(device) 17 | mask = self.mask 18 | if mask is not None: 19 | assert mask is not None 20 | cast_mask = mask.to(device) 21 | else: 22 | cast_mask = None 23 | 24 | return NestedTensor(cast_tensor, cast_mask) 25 | 26 | def decompose(self): 27 | return self.tensors, self.mask 28 | 29 | def __repr__(self): 30 | return str(self.tensors) 31 | 32 | 33 | def _max_by_axis(inp_list): 34 | """List[List[int]] -> List[int]""" 35 | maxes = inp_list[0] 36 | for sublist in inp_list[1:]: 37 | for index, item in enumerate(sublist): 38 | maxes[index] = max(maxes[index], item) 39 | return maxes 40 | 41 | 42 | 43 | def nested_tensor_from_tensor_list(tensor_list: List[torch.Tensor]): 44 | """ 45 | Converts a list of tensor-images[3, H, W] into nested tensor object for 46 | model input 47 | """ 48 | if tensor_list[0].ndim == 3: 49 | 50 | max_size = _max_by_axis([list(img.shape) for img in tensor_list]) 51 | batch_size = [len(tensor_list)] + max_size 52 | b, c, h, w = batch_size 53 | dtype = tensor_list[0].dtype 54 | device = tensor_list[0].device 55 | tensor = torch.zeros(batch_size, dtype=dtype, device=device) 56 | mask = torch.ones((b, h, w), dtype=torch.bool, device=device) 57 | 58 | for img, pad_img, m in zip(tensor_list, tensor, mask): 59 | pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].copy_(img) 60 | m[:img.shape[1], :img.shape[2]] = False 61 | else: 62 | raise ValueError("Images can have ndim == 3 but found ", tensor_list[0].ndim) 63 | 64 | return NestedTensor(tensor, mask) 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /pyvision/detection/detr/utils/pallete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/utils/pallete -------------------------------------------------------------------------------- /pyvision/detection/detr/utils/position_encoding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .misc import NestedTensor 6 | 7 | class PositionEmbeddingSine(nn.Module): 8 | """ 9 | This is a more standard version of the position embedding as defined 10 | in the attention is all you need paper, but is generalized to work on 11 | images 12 | """ 13 | def __init__(self, num_pos_feats=64, temp=10000, norm=False, scale=None): 14 | super().__init__() 15 | 16 | self.num_pos_feats = num_pos_feats 17 | self.temp = temp 18 | self.norm = norm 19 | 20 | 21 | if scale is not None and norm is False: 22 | raise ValueError("normalize should be true if scale is passed") 23 | if scale is None: 24 | scale = 2 * math.pi 25 | 26 | self.scale = scale 27 | 28 | def forward(self, tensor_list: NestedTensor): 29 | 30 | x = tensor_list.tensors 31 | mask = tensor_list.mask 32 | 33 | assert mask is not None 34 | 35 | not_mask = ~mask 36 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 37 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 38 | if self.norm: 39 | eps = 1e-6 40 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 41 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 42 | 43 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 44 | dim_t = self.temp ** (2 * (dim_t // 2) / self.num_pos_feats) 45 | 46 | x_pos = x_embed[:, :, :, None] / dim_t 47 | y_pos = y_embed[:, :, :, None] / dim_t 48 | x_pos = torch.stack((x_pos[:,:,:,0::2].sin(), x_pos[:,:,:,1::2].cos()), dim=4).flatten(3) 49 | y_pos = torch.stack((y_pos[:,:,:,0::2].sin(), y_pos[:,:,:,1::2].cos()), dim=4).flatten(3) 50 | 51 | pos = torch.cat((y_pos, x_pos), dim=3).permute(0, 3, 1, 2) 52 | 53 | return pos 54 | -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import EffdetInferAPI as EfficientDet -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/efficientdet/config/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/config/dataset_coco.yaml: -------------------------------------------------------------------------------- 1 | class_list : ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", 2 | "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", 3 | "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", 4 | "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", 5 | "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", 6 | "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", 7 | "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", 8 | "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 9 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", 10 | "teddy bear", "hair drier", "toothbrush"] 11 | 12 | model_name: "effdet_coco" 13 | 14 | colors : [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), (14, 89, 122), 15 | (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, 132), (169, 158, 85), (188, 185, 26), (103, 1, 17), 16 | (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60), 17 | (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, 159), (182, 173, 32), (34, 113, 133), (90, 135, 34), 18 | (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, 112), (89, 60, 55), (15, 54, 88), (112, 75, 181), 19 | (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108), 20 | (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, 148), (182, 101, 89), (44, 65, 179), (1, 33, 26), 21 | (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50), 22 | (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33), 23 | (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91), 24 | (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, 106), (113, 68, 54), (136, 116, 112), (119, 139, 130), 25 | (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, 180), (49, 119, 155), (153, 50, 183), (125, 38, 3), 26 | (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, 148), (28, 144, 118), (29, 9, 24), (175, 45, 108), 27 | (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95), 28 | (2, 20, 184), (122, 37, 185)] -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/config/weights_download.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "effdet_coco": "1jvcGIWyZ3jjTltiErp-OPNTA7SLWlslR" 4 | } 5 | -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/efficientdet/lib/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/lib/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | 5 | from torch.utils.data import Dataset, DataLoader 6 | from pycocotools.coco import COCO 7 | 8 | import cv2 9 | 10 | class CustomDataset(Dataset): 11 | 12 | def __init__(self, root_dir, img_dir="images", set_name="train2017", transform=None): 13 | 14 | self.root_dir = root_dir 15 | self.img_dir = img_dir 16 | self.set_name = set_name 17 | self.transform = transform 18 | 19 | self.coco_tool = COCO(os.path.join(self.root_dir, 'annotations', 'instances_'+self.set_name+'.json')) 20 | self.image_ids = self.coco_tool.getImgIds() 21 | 22 | self.load_classes() 23 | 24 | def load_classes(self): 25 | 26 | categories = self.coco_tool.loadCats(self.coco_tool.getCatIds()) 27 | categories.sort(key = lambda x: x["id"]) 28 | 29 | # load name -> label 30 | self.classes = {} 31 | self.coco_labels = {} 32 | self.coco_labels_inverse = {} 33 | for category in categories: 34 | self.coco_labels[len(self.classes)] = category['id'] 35 | self.coco_labels_inverse[category['id']] = len(self.classes) 36 | self.classes[category['name']] = len(self.classes) 37 | 38 | # load label -> name 39 | self.labels = {} 40 | for key, value in self.classes.items(): 41 | self.labels[value] = key 42 | 43 | 44 | def load_image(self, idx): 45 | 46 | img_info = self.coco_tool.loadImgs(self.image_ids[idx])[0] 47 | img_path = os.path.join( 48 | self.root_dir, self.img_dir, self.set_name, img_info['file_name'] 49 | ) 50 | img = cv2.imread(img_path) 51 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 52 | 53 | img = img.astype(np.float32) / 255.0 54 | 55 | return img 56 | 57 | def coco_label_to_label(self, coco_label): 58 | return self.coco_labels_inverse[coco_label] 59 | 60 | def label_to_coco_label(self, label): 61 | return self.coco_labels[label] 62 | 63 | def num_classes(self): 64 | return len(self.classes) 65 | 66 | def load_annotations(self, idx): 67 | 68 | anno_ids = self.coco_tool.getAnnIds( 69 | imgIds=self.image_ids[idx], iscrowd=False 70 | ) 71 | annotations = np.zeros((0, 5)) 72 | 73 | # if some images miss annotations 74 | if len(anno_ids) == 0: 75 | return annotations 76 | 77 | # parsing the annotations here 78 | coco_annotations = self.coco_tool.loadAnns(anno_ids) 79 | for idx, a in enumerate(coco_annotations): 80 | 81 | # skip the annotations that have no height/width 82 | if a['bbox'][2] < 1 or a['bbox'][3] < 1: 83 | continue 84 | 85 | annotation = np.zeros((1, 5)) 86 | annotation[0, :4] = a['bbox'] 87 | annotation[0, 4] = self.coco_label_to_label(a['category_id']) 88 | annotations = np.append(annotations, annotation, axis=0) 89 | 90 | # transform [x, y, w, h] -> [x1, y1, x2, y2] 91 | annotations[:, 2] = annotations[:, 0] + annotations[:, 2] 92 | annotations[:, 3] = annotations[:, 1] + annotations[:, 3] 93 | 94 | return annotations 95 | 96 | 97 | def __len__(self): 98 | return len(self.image_ids) 99 | 100 | 101 | def __getitem__(self, idx): 102 | 103 | img = self.load_image(idx) 104 | annot = self.load_annotations(idx) 105 | 106 | data = { 107 | "img": img, 108 | "annot": annot 109 | } 110 | 111 | if self.transform: 112 | data = self.transform(data) 113 | 114 | return data 115 | 116 | 117 | def collater(data): 118 | imgs = [s['img'] for s in data] 119 | annots = [s['annot'] for s in data] 120 | scales = [s['scale'] for s in data] 121 | 122 | imgs = torch.from_numpy(np.stack(imgs, axis=0)) 123 | 124 | max_num_annots = max(annot.shape[0] for annot in annots) 125 | 126 | if max_num_annots > 0: 127 | 128 | annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1 129 | 130 | if max_num_annots > 0: 131 | for idx, annot in enumerate(annots): 132 | if annot.shape[0] > 0: 133 | annot_padded[idx, :annot.shape[0], :] = annot 134 | else: 135 | annot_padded = torch.ones((len(annots), 1, 5)) * -1 136 | 137 | imgs = imgs.permute(0, 3, 1, 2) 138 | 139 | return {'img': imgs, 'annot': annot_padded, 'scale': scales} 140 | 141 | 142 | class Resizer(object): 143 | """Convert ndarrays in sample to Tensors.""" 144 | 145 | def __call__(self, sample, common_size=512): 146 | image, annots = sample['img'], sample['annot'] 147 | height, width, _ = image.shape 148 | if height > width: 149 | scale = common_size / height 150 | resized_height = common_size 151 | resized_width = int(width * scale) 152 | else: 153 | scale = common_size / width 154 | resized_height = int(height * scale) 155 | resized_width = common_size 156 | 157 | image = cv2.resize(image, (resized_width, resized_height)) 158 | 159 | new_image = np.zeros((common_size, common_size, 3)) 160 | new_image[0:resized_height, 0:resized_width] = image 161 | 162 | annots[:, :4] *= scale 163 | 164 | return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale} 165 | 166 | 167 | class Augmenter(object): 168 | """Convert ndarrays in sample to Tensors.""" 169 | 170 | def __call__(self, sample, flip_x=0.5): 171 | if np.random.rand() < flip_x: 172 | image, annots = sample['img'], sample['annot'] 173 | image = image[:, ::-1, :] 174 | 175 | rows, cols, channels = image.shape 176 | 177 | x1 = annots[:, 0].copy() 178 | x2 = annots[:, 2].copy() 179 | 180 | x_tmp = x1.copy() 181 | 182 | annots[:, 0] = cols - x2 183 | annots[:, 2] = cols - x_tmp 184 | 185 | sample = {'img': image, 'annot': annots} 186 | 187 | return sample 188 | 189 | 190 | class Normalizer(object): 191 | 192 | def __init__(self): 193 | self.mean = np.array([[[0.485, 0.456, 0.406]]]) 194 | self.std = np.array([[[0.229, 0.224, 0.225]]]) 195 | 196 | def __call__(self, sample): 197 | image, annots = sample['img'], sample['annot'] 198 | 199 | return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots} 200 | 201 | -------------------------------------------------------------------------------- /pyvision/detection/efficientdet/readme.md: -------------------------------------------------------------------------------- 1 | # EfficientDet: Scalable and Efficient Object Detection 2 | 3 | A model zoo implementation of the EfficientDet algorithm. 4 | 5 | ## Current Stat 6 | 7 | * Efficientdet-b0 trained on Dataset-v3 with a loss of 0.13 8 | 9 | ## Usage 10 | 11 | * To Train, from repo root, 12 | 13 | ```shell 14 | !python src/models/efficientdet/train.py 15 | ``` 16 | 17 | ## To Do 18 | 19 | - [ ] Training b1 - b7 models. Experimenting with focal loss values. 20 | - [ ] Train API 21 | -------------------------------------------------------------------------------- /pyvision/detection/readme.md: -------------------------------------------------------------------------------- 1 | # Detection 2 | 3 | Contains popular Object Detection architectures. 4 | 5 | ## Currently Supported: 6 | - YOLOv3 7 | - DEtection TRansformer (DETR) 8 | -------------------------------------------------------------------------------- /pyvision/detection/yolov3/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import * -------------------------------------------------------------------------------- /pyvision/detection/yolov3/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/config/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/yolov3/config/models_supported.txt: -------------------------------------------------------------------------------- 1 | yolov3 2 | yolov3-tiny -------------------------------------------------------------------------------- /pyvision/detection/yolov3/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "yolov3.weights":"1edDTm9BwkNylyFfv6BJmS_OzSNw5QncR", 3 | "yolov3-tiny.weights":"1U1xYO5ubw0_JiNkHIe8KwJk8alOySBaK" 4 | } -------------------------------------------------------------------------------- /pyvision/detection/yolov3/config/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | # 0 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # 1 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | # 2 40 | [convolutional] 41 | batch_normalize=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | # 3 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | # 4 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | # 5 63 | [maxpool] 64 | size=2 65 | stride=2 66 | 67 | # 6 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | # 7 77 | [maxpool] 78 | size=2 79 | stride=2 80 | 81 | # 8 82 | [convolutional] 83 | batch_normalize=1 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | # 9 91 | [maxpool] 92 | size=2 93 | stride=2 94 | 95 | # 10 96 | [convolutional] 97 | batch_normalize=1 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | # 11 105 | [maxpool] 106 | size=2 107 | stride=1 108 | 109 | # 12 110 | [convolutional] 111 | batch_normalize=1 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | ########### 119 | 120 | # 13 121 | [convolutional] 122 | batch_normalize=1 123 | filters=256 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | # 14 130 | [convolutional] 131 | batch_normalize=1 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | # 15 139 | [convolutional] 140 | size=1 141 | stride=1 142 | pad=1 143 | filters=255 144 | activation=linear 145 | 146 | 147 | 148 | # 16 149 | [yolo] 150 | mask = 3,4,5 151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 152 | classes=80 153 | num=6 154 | jitter=.3 155 | ignore_thresh = .7 156 | truth_thresh = 1 157 | random=1 158 | 159 | # 17 160 | [route] 161 | layers = -4 162 | 163 | # 18 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | # 19 173 | [upsample] 174 | stride=2 175 | 176 | # 20 177 | [route] 178 | layers = -1, 8 179 | 180 | # 21 181 | [convolutional] 182 | batch_normalize=1 183 | filters=256 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | # 22 190 | [convolutional] 191 | size=1 192 | stride=1 193 | pad=1 194 | filters=255 195 | activation=linear 196 | 197 | # 23 198 | [yolo] 199 | mask = 1,2,3 200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 201 | classes=80 202 | num=6 203 | jitter=.3 204 | ignore_thresh = .7 205 | truth_thresh = 1 206 | random=1 -------------------------------------------------------------------------------- /pyvision/detection/yolov3/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/data/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/yolov3/data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /pyvision/detection/yolov3/issues.md: -------------------------------------------------------------------------------- 1 | - [ ] check gpu flags throughout repo -------------------------------------------------------------------------------- /pyvision/detection/yolov3/readme.md: -------------------------------------------------------------------------------- 1 | # You Only Look Once v3 (YOLOv3) 2 | 3 | YOLOv3 is a state of the art object detection algorithm. 4 | 5 | Check out [usage](#Usage) to start using YOLOv3 in your project or check [summary](#Summary) for implementation details. 6 | 7 | Do check out their [website](https://pjreddie.com/darknet/yolo/) or read the [paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf). 8 | 9 | Check out this standalone [notebook](https://github.com/pranjaldatta/PyVision/blob/master/demo/detection/yolov3/yolov3_demo.ipynb) to see how easily you can use YOLOv3 in 3-4 lines! 10 | 11 | If the above link does not work, please look [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/detection/yolov3/yolov3_demo.ipynb). 12 | 13 | ## Summary 14 | 15 | Currently, PyVision YOLOv3 supports the model listed below. The pretrained models were provided by the author. More details can be accessed [here](https://pjreddie.com/darknet/yolo/). 16 | 17 | | Model | Train Dataset| Test Dataset | mAP | FPS| Available | 18 | --------|------------|------|---|----|-----| 19 | | YOLOv3-416 (default) | COCO-trainval | test-dev | 55.3 | 35 | Yes | 20 | | YOLOv3-tiny | COCO-trainval | test-dev | 33.1 | 220| Yes 21 | 22 | ## Usage 23 | 24 | For detailed documentation and parameters, refer to docstrings/source code. 25 | 26 | **Brief Usage Summary:** 27 | 28 | The model setup is done via the YOLOv3 class exposed in *PyVison.detection.yolov3* . All model related configuration ranging from model type to confidence thresholds can be set throught the class constructor. 29 | 30 | Detection is done through the *detect()* method in the YOLOv3 class. Again, it offers some parameters for customisation that can override the general class configuration. Refer to source code docstrings for more details. 31 | 32 | **Quick Start:** 33 | 34 | - To use the default *YOLOv3-416* model, 35 | 36 | ``` 37 | from pyvision.detection import yolov3 38 | 39 | yolo = yolov3.YOLOv3() 40 | 41 | # img is the images in array format with boxes drawn 42 | # objs is the list of detections and box coordinates 43 | imgs, objs = yolo.detect() 44 | ``` 45 | 46 | - To use *YOLOv3-tiny* model: 47 | 48 | ``` 49 | from pyvision.detection import yolov3 50 | 51 | yolo = yolov3.YOLOv3(model="yolov3-tiny") 52 | 53 | # img is the images in array format with boxes drawn 54 | # objs is the list of detections and box coordinates 55 | imgs, objs = yolo.detect() 56 | ``` 57 | 58 | - To list supported models, 59 | 60 | ``` 61 | from pyvision.detection import yolov3 62 | 63 | print(yolov3.available_models()) 64 | ``` 65 | 66 | - To run **tests**, from repo root, run the following command from terminal 67 | 68 | ``` 69 | $ python tests/detection/yolov3/yolo_test.py 70 | ``` 71 | 72 | ## Contributor 73 | 74 | - [Pranjal Datta](https://github.com/pranjaldatta) -------------------------------------------------------------------------------- /pyvision/detection/yolov3/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/utils/__init__.py -------------------------------------------------------------------------------- /pyvision/detection/yolov3/utils/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import numpy as np 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | import pickle as pkl 9 | import random 10 | 11 | def iou(box1, box2, device): 12 | """ 13 | calculates iou between two boxes box1 and box2 14 | """ 15 | b1x1, b1y1, b1x2, b1y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3] 16 | b2x1, b2y1, b2x2, b2y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3] 17 | 18 | inter_x1 = torch.max(b1x1, b2x1) 19 | inter_y1 = torch.max(b1y1, b2y1) 20 | inter_x2 = torch.min(b1x2, b2x2) 21 | inter_y2 = torch.min(b1y2, b2y2) 22 | 23 | inter_shape = inter_x1.shape 24 | 25 | if torch.cuda.is_available() and device is not "cpu": 26 | inter_area = torch.max(inter_x2-inter_x1+1.0, torch.zeros(inter_shape).cuda())*torch.max(inter_y2-inter_y1+1.0, torch.zeros(inter_shape).cuda()) 27 | else: 28 | inter_area = torch.max(inter_x2-inter_x1+1.0, torch.zeros(inter_shape))*torch.max(inter_y2-inter_y1+1.0, torch.zeros(inter_shape)) 29 | 30 | box1_area = (b1x2 - b1x1 + 1.0) * (b1y2 - b1y1 + 1.0) 31 | box2_area = (b2x2 - b2x1 + 1.0) * (b2y2 - b2y1 + 1.0) 32 | 33 | iou = inter_area / (box1_area + box2_area - inter_area) 34 | 35 | return iou 36 | 37 | 38 | def draw_box(pred, orig_img, cls, colors): 39 | """ 40 | draw the predicted bounding boxes on a given image. 41 | designed for single images. 42 | For multi batch support, supply singular image iteratively 43 | """ 44 | 45 | coords1 = tuple(pred[1:3].int()) 46 | coords2 = tuple(pred[3:5].int()) 47 | label = "{0}".format(cls) 48 | color = random.choice(colors) 49 | cv2.rectangle(orig_img, coords1, coords2, color, 2) 50 | text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] 51 | coords2 = coords1[0] + text_size[0] + 3, coords1[1] + text_size[1] + 4 52 | cv2.rectangle(orig_img, coords1, coords2, color, -1) 53 | cv2.putText(orig_img, label, (coords1[0], coords1[1]+text_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1) 54 | return orig_img 55 | -------------------------------------------------------------------------------- /pyvision/detection/yolov3/utils/pallete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/utils/pallete -------------------------------------------------------------------------------- /pyvision/detection/yolov3/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_config(path): 2 | """ 3 | This method parses a config file and constructs a list of blocks. 4 | 5 | Each block is a singular unit in the architecture as explained in 6 | the paper. Blocks are represented as a dictionary in the list. 7 | 8 | Input: 9 | - path: path to the config file. 10 | 11 | Returns: 12 | - a list containing a dictionary of individual block information. 13 | """ 14 | cfg_file = open(path, "r") 15 | 16 | lines = cfg_file.read().split("\n") 17 | lines = [line for line in lines if len(line) > 0] 18 | lines = [line for line in lines if line[0] != '#'] 19 | lines = [line.strip() for line in lines] 20 | 21 | block = {} 22 | blocks_list = [] 23 | 24 | for line in lines: 25 | if line[0] == "[": 26 | if len(block) != 0: 27 | blocks_list.append(block) 28 | block = {} 29 | block["type"] = line[1:-1].rstrip() 30 | else: 31 | idx, value = line.split("=") 32 | block[idx.rstrip()] = value.lstrip() 33 | blocks_list.append(block) 34 | 35 | return blocks_list 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /pyvision/detection/yolov3/utils/preprocess.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import numpy as np 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | from PIL import Image 9 | 10 | 11 | def letterbox_img(img, dims): 12 | """ 13 | resize image keeping aspect ratio intact using padding 14 | """ 15 | img_w, img_h = img.shape[1], img.shape[0] 16 | w, h = dims 17 | new_width = int(img_w * min(w/img_w, h/img_h)) 18 | new_height = int(img_h * min(w/img_w, h/img_h)) 19 | img_resized = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 20 | 21 | canvas = np.full((dims[1], dims[0], 3), 128) 22 | 23 | canvas[(h-new_height)//2:(h-new_height)//2 + new_height, (w-new_width)//2:(w-new_width)//2 + new_width, :] = img_resized 24 | 25 | return canvas 26 | 27 | def prepare_img_cv2(img, dims): 28 | """ 29 | prepare image for forward pass. 30 | 31 | returns a Tensor 32 | """ 33 | # type check 34 | if not isinstance(img, np.ndarray): 35 | raise TypeError("expected . got <{}>".format(type(img))) 36 | 37 | img_dims = (img.shape[1], img.shape[0]) 38 | _img = (letterbox_img(img, (dims, dims))) 39 | _img_new = _img[:,:,::-1].transpose((2,0,1)).copy() 40 | _img_new = torch.from_numpy(_img_new).float().div(255.0).unsqueeze(0) 41 | 42 | return _img_new, img, img_dims 43 | 44 | 45 | def prepare_img_pil(img, dims): 46 | """ 47 | prepares a PIL image for forward pass 48 | 49 | returns a Tensor 50 | """ 51 | # type check 52 | if not isinstance(img, Image.Image): 53 | raise TypeError("expected . got <{}>".format(type(img))) 54 | 55 | original_img = img 56 | img = img.convert("RGB") 57 | img_dims = img.size 58 | img = img.resize(dims) 59 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 60 | img = img.view(*dims, 3).transpose(0,1).transpose(0,2).contiguous() 61 | img = img.view(1, 3, *dims) 62 | img = img.float().div(255.0) 63 | return (img, original_img, img_dims) 64 | 65 | 66 | -------------------------------------------------------------------------------- /pyvision/detection/yolov3/utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import numpy as np 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | 9 | from .box_utils import iou 10 | 11 | 12 | def load_classes(path): 13 | with open(path) as class_file: 14 | class_names = class_file.read().split("\n")[:-1] 15 | return class_names 16 | 17 | def predict_transforms(preds, input_dims, anchors, n_classes, device='cpu'): 18 | 19 | batch_size = preds.size(0) 20 | stride = input_dims // preds.size(2) 21 | grid_size = input_dims // stride 22 | box_attrs = 5 + n_classes 23 | n_anchors = len(anchors) 24 | 25 | anchors = [(a[0]/stride, a[1]/stride) for a in anchors] 26 | 27 | preds = preds.view(batch_size, box_attrs*n_anchors, grid_size*grid_size) 28 | preds = preds.transpose(1,2).contiguous() 29 | preds = preds.view(batch_size, grid_size*grid_size*n_anchors, box_attrs) 30 | 31 | preds[:,:,0] = torch.sigmoid(preds[:,:,0]) 32 | preds[:,:,1] = torch.sigmoid(preds[:,:,1]) 33 | preds[:,:,4] = torch.sigmoid(preds[:,:,4]) 34 | 35 | grid_len = np.arange(grid_size) 36 | a, b = np.meshgrid(grid_len, grid_len) 37 | 38 | x_offset = torch.FloatTensor(a).view(-1,1) 39 | y_offset = torch.FloatTensor(b).view(-1,1) 40 | 41 | 42 | x_offset = x_offset.to(device) 43 | y_offset = y_offset.to(device) 44 | 45 | x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, n_anchors) 46 | x_y_offset = x_y_offset.view(-1, 2).unsqueeze(0) 47 | 48 | preds[:,:,:2] += x_y_offset 49 | 50 | anchors = torch.FloatTensor(anchors) 51 | 52 | anchors = anchors.to(device) 53 | 54 | anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0) 55 | preds[:,:,2:4] = torch.exp(preds[:,:,2:4])*anchors 56 | 57 | preds[:,:,5:5+n_classes] = torch.sigmoid(preds[:,:,5:5+n_classes]) 58 | 59 | preds[:,:,:4] *= stride 60 | 61 | 62 | return preds 63 | 64 | 65 | def _unique(t): 66 | 67 | t_numpy = t.cpu().numpy() 68 | t_np_unique = np.unique(t_numpy) 69 | t_unique = torch.from_numpy(t_np_unique) 70 | 71 | unique_tensor = torch.zeros(t_unique.shape) # error prone 72 | unique_tensor.copy_(t_unique) 73 | return unique_tensor 74 | 75 | 76 | 77 | 78 | 79 | 80 | def postprocess(preds, device, confidence, n_classes, nms=True, nms_conf=0.5): 81 | """ 82 | We perform confidence thresholding and nms suppression in this 83 | method 84 | """ 85 | 86 | # confidence thresholding 87 | 88 | conf_mask = (preds[:,:,4] > confidence).float().unsqueeze(2) 89 | preds = preds*conf_mask 90 | 91 | 92 | # checks for non zero indices. If no non zero index remains 93 | # shape of ind_nz will be (x, 0). In that case we return 0 94 | try: 95 | ind_nz = torch.nonzero(preds[:,:,4]).transpose(0,1).contiguous() 96 | if ind_nz.size(1) == 0: 97 | raise Exception 98 | except: 99 | return 0 100 | 101 | 102 | # translate the coords from (center_x, center_y, height, width) 103 | # to (top_left_x, top_left_y, bottom_right_x, bottom_right_y) 104 | 105 | box_corners = torch.zeros_like(preds) #error prone 106 | box_corners[:,:,0] = (preds[:,:,0] - preds[:,:,2]/2) 107 | box_corners[:,:,1] = (preds[:,:,1] - preds[:,:,3]/2) 108 | box_corners[:,:,2] = (preds[:,:,0] + preds[:,:,2]/2) 109 | box_corners[:,:,3] = (preds[:,:,1] + preds[:,:,3]/2) 110 | preds[:,:,:4] = box_corners[:,:,:4] 111 | 112 | batch_size = preds.size(0) 113 | 114 | output = torch.zeros(1, preds.size(2) + 1) 115 | write = False 116 | 117 | for index in range(batch_size): 118 | 119 | image_preds = preds[index] 120 | 121 | max_conf, max_conf_score = torch.max(image_preds[:,5:5+n_classes], 1) 122 | max_conf = max_conf.float().unsqueeze(1) 123 | max_conf_score = max_conf_score.float().unsqueeze(1) 124 | _seq = (image_preds[:,:5], max_conf, max_conf_score) 125 | image_preds = torch.cat(_seq, 1) 126 | 127 | non_zero_indices = (torch.nonzero(image_preds[:,4])) 128 | 129 | _image_preds = image_preds[non_zero_indices.squeeze(), :].view(-1,7) 130 | 131 | try: 132 | img_classes = _unique(_image_preds[:,-1]) 133 | img_classes = img_classes.to(device) 134 | except: 135 | continue 136 | 137 | # now we do nms classwise 138 | for _class in img_classes: 139 | 140 | cls_mask = _image_preds*(_image_preds[:,-1] == _class).float().unsqueeze(1) 141 | cls_mask_index = torch.nonzero(cls_mask[:, -2]).squeeze() 142 | 143 | image_pred_class = _image_preds[cls_mask_index].view(-1, 7) 144 | 145 | # sort the detections such that the entry with maximum objectness 146 | # score is at the top 147 | conf_sort_index = torch.sort(image_pred_class[:,4], descending=True)[1] 148 | image_pred_class = image_pred_class[conf_sort_index] 149 | num_dets = image_pred_class.size(0) 150 | 151 | if nms: 152 | 153 | # we run nms for each detection 154 | for i in range(num_dets): 155 | 156 | try: 157 | ious = iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:], device) 158 | except ValueError: 159 | #print("ValueError: at iou calculation") 160 | break 161 | except IndexError: 162 | #print("IndexError: at iou calculation") 163 | break 164 | 165 | # zero out all the entries whose iou value exceed the threshold 166 | iou_mask = (ious < nms_conf).float().unsqueeze(1) 167 | image_pred_class[i+1:] *= iou_mask 168 | 169 | # Remove the zero entries 170 | non_zero_idx = torch.nonzero(image_pred_class[:,4]).squeeze() 171 | image_pred_class = image_pred_class[non_zero_idx].view(-1, 7) 172 | 173 | 174 | batch_inds = torch.zeros(image_pred_class.size(0), 1).fill_(index) 175 | batch_inds = batch_inds.to(device) 176 | _to_cat = (batch_inds, image_pred_class) 177 | 178 | if not write: 179 | output = torch.cat(_to_cat, 1) 180 | write = True 181 | else: 182 | _outs = torch.cat(_to_cat, 1) 183 | output = torch.cat((output, _outs)) 184 | 185 | 186 | return output 187 | -------------------------------------------------------------------------------- /pyvision/face_detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/face_detection/__init__.py -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import Facenet -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/face_detection/facenet/config/__init__.py -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "facenet-vggface2.pt" : "1m_CVVqbGNL-2LSFaVf1mHm_VhoSvVLHe", 3 | "facenet-casia-webface.pt" : "1JkzXiTWvsMgIz-ViOPicF8_PUrJFYfEM" 4 | } -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from ....misc.mtcnn import MTCNN -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/readme.md: -------------------------------------------------------------------------------- 1 | # FaceNet: A Unified Embedding for Face Recognition and Clustering 2 | 3 | FaceNet is one of the most popular face recognition architectures. 4 | 5 | ## Paper 6 | The paper can be read [here](https://arxiv.org/pdf/1503.03832.pdf). 7 | 8 | ## Summary 9 | 10 | - The implementation uses a **Inception-Resnet-v1** architecture to generate the embeddings. 11 | 12 | - Currently two models pretrained on *casia-webface* and *vggface2* are made available. These weights were originally made available by David Sandberg's implementation [here](https://github.com/davidsandberg/facenet). 13 | 14 | - For usage details check **Usage**, but to summarize, the implementation essentially exposes functions for embedding generation and embedding verification as well i.e. a basic Facial Recognition pipeline has been made available 15 | 16 | ## Quick Usage 17 | 18 | Check [demo](https://github.com/pranjaldatta/PyVision/tree/master/demo/face_detection/facenet). 19 | 20 | ## Usage 21 | 22 | - Import facenet. 23 | 24 | ``` 25 | from pyvision.face_detection.facenet import Facenet 26 | ``` 27 | 28 | - Initialize the class. Pretrained for the moment can be casia-webface or vggface2 models. 29 | 30 | ``` 31 | fc = Facenet(pretrianed="casia-webface", saveLoc="save", saveName="det.pkl") 32 | ``` 33 | 34 | - Now we gotta generate embeddings and store the embeddings for comparison. For this we use the **generate_embeddings()** function. There are two ways images can be supplied to this function: 35 | 36 | 1. Pass a directory containing images. In that case, the individual image names will be used as image labels 37 | 38 | 2. Pass a singular image/path. In this case, a label has to be passed by the user. This gives the most flexibility and hence is recommended. 39 | 40 | Also, the *save* parameter can be used to specify a custom location for a given embedding that is different than the one specified during model init. 41 | 42 | Also it returns a list of dicts containing labels and their associated embeddings. 43 | 44 | 45 | ``` 46 | embeddings = fc.generate_embeddings(...) 47 | ``` 48 | 49 | - Now to run "recognition" on an image, we use the **verify_embeddings()** function. Unline the generate_embeddings() function, this function only accepts singular image or image paths i.e. no directories are allowed. 50 | A few things to note regarding the function: 51 | 52 | 1. Embeddings can either be passed directly as a parameter (a list of dicts) or a path to a stored embedding can be passed. 53 | 54 | 2. The comparison function uses *l2_norm* to calculate distances between embeddings. Other distance calculation metrics like *cosine_similarity* can be added in the future. 55 | 56 | 3. The *compare_embeddings()* function needs to be supplied with a label and the function will check whether the given embedding is *similar* to the previously known embeddings associated with the supplied label. 57 | 58 | 4. Return a tuple (True/False, prediction, min_l2_loss) 59 | 60 | ``` 61 | did_match, pred_label, l2_loss = fc.compare_embeddings(...) 62 | ``` 63 | 64 | - For more details look [tests](https://github.com/pranjaldatta/PyVision/tree/master/tests/face_detection/facenet). 65 | 66 | ## Note 67 | While implementing the pretrained models, it was found that often in many cases classifications were not accurate. So it is recommended that care is taken while using facenet. 68 | -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/face_detection/facenet/utils/__init__.py -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/utils/extract_face.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from PIL import Image 4 | import numpy as np 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def crop_and_tensorify(img, box, size=160, margin=0, save=None, show=True): 12 | """Extract face + margin from PIL Image given bounding box coordinates 13 | 14 | Arguments: 15 | -> img: PIL Image from which faces have to be extracted 16 | -> box: Bounding box coordinates in (top_left_x, top_left_y, bottom_right_x, bottom_right_y) 17 | -> size: size of the crop 18 | -> margin: around bounding boxes 19 | -> save: location to save the crop 20 | -> show: show the crops 21 | 22 | Returns: 23 | -> torch.Tensor: The face in a tensor format 24 | """ 25 | if not isinstance(img, Image.Image): 26 | raise TypeError("PIL Image accepted. Got img of type: ", type(img)) 27 | 28 | box = box[:4] 29 | 30 | margin = [ 31 | margin * (box[2] - box[0]) / (size - margin), 32 | margin * (box[3] - box[1]) / (size - margin) 33 | ] 34 | 35 | box = [ 36 | int(max(box[0] - margin[0]/2 , 0)), 37 | int(max(box[1] - margin[1]/2 , 0)), 38 | int(min(box[2] + margin[0]/2 , img.size[0])), 39 | int(min(box[3] + margin[1]/2 , img.size[1])) 40 | ] 41 | 42 | face = img.crop(box).resize((size, size), Image.BILINEAR) 43 | 44 | if save is not None: 45 | face.save(save+"/detection.png") 46 | if show: 47 | face.show() 48 | 49 | face = torch.tensor(np.float32(face)) 50 | 51 | return face 52 | 53 | def prewhiten_func(x): 54 | mean = x.mean() 55 | std = x.std() 56 | std_adj = std.clamp(min=1.0/(float(x.numel())**5)) 57 | y = (x - mean) / std_adj 58 | return y 59 | 60 | 61 | def extract_face(mtcnn_module, img, prewhiten=True, conf_thresh=.6): 62 | """ 63 | extract_face takes in a PIL or cv2 image or a path to an image. 64 | Runs MTCNN on the image to detect the face, crop the faces, convert 65 | to tensor and return a tensor and the associated face confidences 66 | 67 | Argument: 68 | -> img: PIL or cv2 Image. Can be a path to 69 | -> conf_thresh: Minimum confidence threshold for MTCNN 70 | 71 | Returns: 72 | -> face_tensors, props = cropped faces converted into tensors and their 73 | associated confidences repectively 74 | """ 75 | 76 | if mtcnn_module is None: 77 | raise ValueError("mtcnn_module cannot be None") 78 | 79 | if isinstance(img, str): 80 | img = Image.open(img) 81 | elif isinstance(img, np.ndarray): 82 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 83 | img = Image.fromarray(img) 84 | 85 | # generating detections 86 | detections = mtcnn_module.detect(img) 87 | 88 | # crop every face, convert to tensor 89 | faces_list = [] 90 | for detection in detections: 91 | 92 | face = crop_and_tensorify(img, detection, show=False) 93 | if prewhiten: 94 | face = prewhiten_func(face) 95 | faces_list.append(face) 96 | 97 | faces_list = torch.stack(faces_list) 98 | 99 | return faces_list # return face detections probs also 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /pyvision/face_detection/facenet/utils/layer_factory.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import os 6 | import numpy as np 7 | 8 | class BasicConv2d(nn.Module): 9 | 10 | def __init__(self, in_channels, out_channels, size, stride, padding=0): 11 | 12 | super().__init__() 13 | 14 | self.conv = nn.Conv2d(in_channels, out_channels, size, 15 | stride, padding, bias=False) 16 | # batch normalize values are defined the Sandberg Implementation 17 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.1, affine=True) 18 | self.relu_fn = nn.ReLU(inplace=False) 19 | 20 | def forward(self, x): 21 | x = self.conv(x) 22 | x = self.bn(x) 23 | x = self.relu_fn(x) 24 | 25 | return x 26 | 27 | class Block35_A(nn.Module): 28 | """ 29 | Builds the 32x32 block. (Referred in the paper as Inception- 30 | Resnet-A) 31 | """ 32 | def __init__(self, scale=1.0): 33 | 34 | super().__init__() 35 | 36 | self.scale = scale 37 | 38 | # now we construct the different branches. 39 | # Refer to Inception-Resnet-A diagram in the paper 40 | self.branch0 = BasicConv2d(256, 32, 1, 1) 41 | 42 | self.branch1 = nn.Sequential( 43 | BasicConv2d(256, 32, 1, 1), 44 | BasicConv2d(32, 32, 3, 1, 1) 45 | ) 46 | 47 | self.branch2 = nn.Sequential( 48 | BasicConv2d(256, 32, 1, 1), 49 | BasicConv2d(32, 32, 3, 1, 1), 50 | BasicConv2d(32, 32, 3, 1, 1) 51 | ) 52 | 53 | self.conv2d = nn.Conv2d(96, 256, 1, 1) 54 | self.relu_fn = nn.ReLU(inplace=False) 55 | 56 | def forward(self, x): 57 | 58 | x0 = self.branch0(x) 59 | x1 = self.branch1(x) 60 | x2 = self.branch2(x) 61 | 62 | x_cat = torch.cat((x0, x1, x2), 1) 63 | 64 | out = self.conv2d(x_cat) 65 | out = out * self.scale + x 66 | out = self.relu_fn(out) 67 | 68 | return out 69 | 70 | class Block17_B(nn.Module): 71 | """ 72 | Builds the 17x17 Block. (referred to as Inception-Resnet-B) 73 | """ 74 | def __init__(self, scale=1.0): 75 | 76 | super().__init__() 77 | 78 | self.scale = scale 79 | 80 | self.branch0 = BasicConv2d(896, 128, 1, 1) 81 | 82 | self.branch1 = nn.Sequential( 83 | BasicConv2d(896, 128, 1, 1), 84 | BasicConv2d(128, 128, size=(1, 7), stride=1, padding=(0, 3)), 85 | BasicConv2d(128, 128, size=(7, 1), stride=1, padding=(3, 0)) 86 | ) 87 | 88 | self.conv2d = nn.Conv2d(256, 896, 1, 1) 89 | self.relu_fn = nn.ReLU(inplace=False) 90 | 91 | def forward(self, x): 92 | 93 | x0 = self.branch0(x) 94 | x1 = self.branch1(x) 95 | 96 | x_cat = torch.cat((x0, x1), 1) 97 | 98 | out = self.conv2d(x_cat) 99 | out = out * self.scale + x 100 | out = self.relu_fn(out) 101 | 102 | return out 103 | 104 | class Block8_C(nn.Module): 105 | """ 106 | Implements the 8x8 Block. (Referred to as Inception-Resnet-C in the paper) 107 | """ 108 | def __init__(self, scale=1.0, relu=True): 109 | 110 | super().__init__() 111 | 112 | self.scale = scale 113 | self.relu = relu 114 | 115 | self.branch0 = BasicConv2d(1792, 192, 1, 1) 116 | 117 | self.branch1 = nn.Sequential( 118 | BasicConv2d(1792, 192, 1, 1), 119 | BasicConv2d(192, 192, (1, 3), 1, (0, 1)), 120 | BasicConv2d(192, 192, (3, 1), 1, (1, 0)) 121 | ) 122 | 123 | self.conv2d = nn.Conv2d(384, 1792, 1, 1) 124 | if self.relu: 125 | self.relu_fn = nn.ReLU(inplace=False) 126 | 127 | def forward(self, x): 128 | 129 | x0 = self.branch0(x) 130 | x1 = self.branch1(x) 131 | 132 | x_cat = torch.cat((x0, x1), 1) 133 | out = self.conv2d(x_cat) 134 | out = out * self.scale + x 135 | if self.relu: 136 | out = self.relu_fn(out) 137 | 138 | return out 139 | 140 | 141 | class Reduction_A(nn.Module): 142 | """ 143 | Builds the Reduction A module. Refer to paper for details 144 | """ 145 | def __init__(self): 146 | 147 | super().__init__() 148 | 149 | self.branch0 = BasicConv2d(256, 384, 3, 2) 150 | 151 | self.branch1 = nn.Sequential( 152 | BasicConv2d(256, 192, 1, 1), 153 | BasicConv2d(192, 192, 3, 1, 1), 154 | BasicConv2d(192, 256, 3, 2) 155 | ) 156 | 157 | self.branch2 = nn.MaxPool2d(3, stride=2) 158 | 159 | def forward(self, x): 160 | 161 | x0 = self.branch0(x) 162 | x1 = self.branch1(x) 163 | x2 = self.branch2(x) 164 | 165 | out = torch.cat((x0, x1, x2), 1) 166 | 167 | return out 168 | 169 | class Reduction_B(nn.Module): 170 | """ 171 | Builds Reduction B module. For more details check the paper 172 | """ 173 | def __init__(self): 174 | 175 | super().__init__() 176 | 177 | self.branch0 = nn.Sequential( 178 | BasicConv2d(896, 256, 1, 1), 179 | BasicConv2d(256, 384, 3, 2) 180 | ) 181 | 182 | self.branch1 = nn.Sequential( 183 | BasicConv2d(896, 256, 1, 1), 184 | BasicConv2d(256, 256, 3, 2) 185 | ) 186 | 187 | self.branch2 = nn.Sequential( 188 | BasicConv2d(896, 256, 1, 1), 189 | BasicConv2d(256, 256, 3, 1, 1), 190 | BasicConv2d(256, 256, 3, 2) 191 | ) 192 | 193 | self.branch3 = nn.MaxPool2d(3, 2) 194 | 195 | def forward(self, x): 196 | 197 | x0 = self.branch0(x) 198 | x1 = self.branch1(x) 199 | x2 = self.branch2(x) 200 | x3 = self.branch3(x) 201 | out = torch.cat((x0, x1, x2, x3), 1) 202 | return out -------------------------------------------------------------------------------- /pyvision/gans/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/__init__.py -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/README.md: -------------------------------------------------------------------------------- 1 | # Deep Convolutional GAN 2 | This is an implementation of the research paper "Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks" written by Alec Radford, Luke Metz, Soumith Chintala. 3 | 4 | Check out this notebook and run the DC_GAN inferences in just 3 lines. 5 | 6 | ## Dependencies 7 | - torch==1.8.0 8 | - torchvision==0.9.0 9 | - numpy==1.20.3 10 | - matplotlib==3.3.4 11 | - IPython==7.23.1 12 | - gdown==3.13.0 13 | 14 | ## Dataset 15 | The original paper had used three datasets for training the DCGAN namely - *Large-scale Scene Understanding (LSUN) (Yu et al., 2015), Imagenet-1k and a newly assembled Faces dataset*. However due to computational and other limitations, we have used Large-scale CelebFaces Attributes (CelebA) Dataset. 16 | 17 | ### Guidelines to download, setup and use the dataset 18 | The CelebA dataset may be downloaded here as a file named *img_align_celeba.zip*. 19 | 20 | **Please write the following commands on your terminal to extract the file in the proper directory** 21 | ``` 22 | $ mkdir celeba 23 | $ unzip -d 24 | ``` 25 | The resulting directory structure should be: 26 | ``` 27 | /path/to/celeba 28 | -> img_align_celeba 29 | -> 188242.jpg 30 | -> 173822.jpg 31 | -> 284702.jpg 32 | -> 537394.jpg 33 | ... 34 | ``` 35 |
36 | 37 | **Note**: You may use any other dataset of your choice. However, please ensure that the directory structure remains the same for the code to be compatible with it. 38 | 39 | ## Quick Start 40 | - Incase you want to use some other dataset to train the DCGAN (with 1 GPU), please initialize the DCGAN module with your desired dataset path and train as: 41 | 42 | ```python 43 | from pyvision.gans.deep_convolutional_gan.dcgan import DCGAN 44 | 45 | dc_gan = DCGAN(data = ) 46 | img_list, G_losses, D_losses = dc_gan.train() 47 | ``` 48 | 49 | - Incase you have either no GPU (0) or more than 1 GPU on your machine, consider changing the ngpu parameter while initializing the DCGAN module with your desired dataset path and train as: 50 | 51 | 52 | ```python 53 | from pyvision.gans.deep_convolutional_gan.dcgan import DCGAN 54 | 55 | dc_gan = DCGAN(data = , ngpu = ) 56 | img_list, G_losses, D_losses = dc_gan.train() 57 | ``` 58 | 59 | **Note**: Is is advisable to use a GPU for training because training the DCGAN is computationally very expensive. 60 | 61 | - To get the inferences directly with our pre-trained model please initialize the DeepConvGAN with the desired path to the model and get the inferences as: 62 | 63 | ```python 64 | 65 | from pyvision.gans.deep_convolutional_gan import DeepConvGAN 66 | 67 | DeepConvGAN.inference(DeepConvGAN, set_weight_dir='dcgan-model.pth' , set_gen_dir='') 68 | ``` 69 | 70 | ## Tests 71 | To run tests from PyVision root, run, 72 | 73 | $ python tests/gans/deep_convolutional_gan/gan_test.py 74 | 75 | ## Results from implementation 76 | - Plot to see how D and G’s losses changed during training 77 | 78 | 79 | 80 | - Batches of fake data from G 81 | 82 |     83 | 84 | Check out the documentation here. 85 | 86 | ### Citation 87 | ``` 88 | @inproceedings{liu2015faceattributes, 89 | title = {Deep Learning Face Attributes in the Wild}, 90 | author = {Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou}, 91 | booktitle = {Proceedings of International Conference on Computer Vision (ICCV)}, 92 | month = {December}, 93 | year = {2015} 94 | } 95 | ``` 96 | 97 | ## Contributed by: 98 | - Indira Dutta 99 | - Srijarko Roy 100 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import DeepConvGAN 2 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/config/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "dcgan-model.pth": "1EMm3rdVZvNoT2y4VtULFNzIwkIQslWQT" 3 | } 4 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/dcgan/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcgan import DCGAN, Generator, Discriminator -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/docs/documentation.md: -------------------------------------------------------------------------------- 1 | ## Model Components 2 | The DCGAN Architecture has the following components: 3 | 4 | - The Generator uses fractional-strided convolutions followed by batch normalisation and ReLU activation for all layers except for the last that uses tanh activation. 5 | - The Discriminator uses strided convolutions followed by batch normalisation and LeakyReLU activation for all layers except for a single sigmoid output. 6 | 7 | 8 | ## Parameters 9 | 10 | Parameter |      Value      | 11 | :------------: | :---: | 12 | batch_size | 128 | 13 | image_size | 64 | 14 | nc | 3 | 15 | nz | 100 | 16 | ngf | 64 | 17 | ndf | 64 | 18 | num_epochs | 5 | 19 | lr | 0.0002 | 20 | beta1 | 0.5 | 21 | ngpu | 1 | 22 | 23 | ## Result Documentation 24 | After running *DCGAN* on the CelebA Dataset for 5 epochs on GPU (computationally very expensive) we got the following output images along with the Generator and Discriminator losses. 25 | 26 | ## Batch of images from the Generator after 5 epochs 27 | 28 | 29 | ## Losses after each epoch 30 | No. of Epochs | Generator Loss | Discriminator Loss | 31 | :------------: | :------------: | :------------: | 32 | 1 | 0.7894 | 1.0838 | 33 | 2 | 0.7277 | 1.0489 | 34 | 3 | 0.7796 | 0.9256 | 35 | 4 | 0.6330 | 1.1345 | 36 | 5 | 0.7519 | 1.0138 | 37 | 38 | ## Plot for Generator Loss and Discriminator Loss w.r.t number of iterations 39 | 40 | 41 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.parallel 4 | import torch.backends.cudnn as cudnn 5 | import torch.optim as optim 6 | import torch.utils.data 7 | 8 | import torchvision.datasets as dset 9 | import torchvision.transforms as transforms 10 | import torchvision.utils as vutils 11 | 12 | import numpy as np 13 | import os 14 | import json 15 | import gdown 16 | 17 | import matplotlib.pyplot as plt 18 | import matplotlib.animation as animation 19 | from IPython.display import HTML 20 | 21 | from .dcgan import Generator 22 | 23 | __PREFIX__ = os.path.dirname(os.path.realpath(__file__)) 24 | 25 | class DeepConvGAN(object): 26 | 27 | def __init__(self, nc = 3, nz = 100, ngf = 64, ngpu = 1): 28 | 29 | ''' 30 | The constructor has the Parameters which are going to be used to generate the images 31 | 32 | Parameters: 33 | 34 | - nc(default: 3): number of color channels in an image, we have used 3 channels(RGB). 35 | 36 | - nz(default: 100): length of the latent vector that is initially passed into the Generator, according to the paper it is 100. 37 | 38 | - ngf(default: 64): denotes the depth of the feature maps passed through the Generator, according to the paper it is 64. 39 | 40 | - ndf(default: 64): denotes the depth of the feature maps passed through the Discriminator, according to the paper it is 64. 41 | 42 | - ngpu(default: 1): number of GPUs available for training. If no GPU is available, the model will train on CPU. Here, we have only 1 GPU available. 43 | ''' 44 | 45 | if ngpu > 0 and not torch.cuda.is_available(): 46 | raise ValueError('ngpu > 0 but cuda not available') 47 | 48 | self.nc = nc 49 | self.nz = nz 50 | self.ngf = ngf 51 | self.ngpu = ngpu 52 | self.device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") 53 | 54 | def inference(self, set_weight_dir = 'dcgan-model.pth', set_gen_dir = 'result_img'): 55 | 56 | set_weight_dir = __PREFIX__ + "/weights/" + set_weight_dir 57 | 58 | ''' saving generated images in a directory ''' 59 | def save_image(set_gen_dir): 60 | if os.path.exists(set_gen_dir): 61 | print("Found directory for saving generated images") 62 | return 1 63 | else: 64 | print("Directory for saving images not found, making a directory named 'result_img'") 65 | os.mkdir(set_gen_dir) 66 | return 1 67 | 68 | ''' checking if weights are present ''' 69 | def check_weights(set_weight_dir): 70 | if os.path.exists(set_weight_dir): 71 | print("Found weights") 72 | return 1 73 | else: 74 | print("Downloading weights") 75 | download_weights() 76 | 77 | ''' downloading weights if not present ''' 78 | def download_weights(): 79 | with open(__PREFIX__+"/config/weights_download.json") as fp: 80 | json_file = json.load(fp) 81 | if not os.path.exists(__PREFIX__+"/weights/"): 82 | os.mkdir(__PREFIX__+"/weights/") 83 | url = 'https://drive.google.com/uc?id={}'.format(json_file['dcgan-model.pth']) 84 | gdown.download(url, __PREFIX__+"/weights/dcgan-model.pth", quiet=False) 85 | set_weight_dir = "dcgan-model.pth" 86 | print("Download finished") 87 | 88 | ''' checking if weights are present ''' 89 | check_weights(set_weight_dir) 90 | 91 | '''saving the generated images ''' 92 | save_image(set_gen_dir) 93 | 94 | '''calling the DCGAN for inference ''' 95 | model_GAN = Generator(1, 100, 64, 3) 96 | 97 | ''' uploading the model ''' 98 | checkpoint = torch.load(set_weight_dir) 99 | model_GAN.load_state_dict(checkpoint['generator_state_dict']) 100 | model_GAN.eval() 101 | 102 | ''' saving the generated images''' 103 | def save_new_img(): 104 | 105 | b_size = 512 106 | noise = torch.randn(b_size, 100, 1, 1) 107 | out = model_GAN(noise).detach().cpu() 108 | print("The generated images are saved in the given directory") 109 | 110 | ''' saving the generated images in a list ''' 111 | img_list = [] 112 | for i in range(b_size): 113 | img_list.append(out[i,:,:,:]) 114 | 115 | ''' saving the generated images in jpg format ''' 116 | for i in range(len(img_list)): 117 | generated_image = '{}/generated_image_{}.jpg'.format(set_gen_dir,i) 118 | vutils.save_image(img_list[i], generated_image, padding = 0) 119 | 120 | save_new_img() 121 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/losses.png -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/result.png -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/result2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/result2.png -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_0.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_1.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_10.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_11.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_12.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_13.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_14.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_15.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_2.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_3.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_4.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_5.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_6.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_7.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_8.jpg -------------------------------------------------------------------------------- /pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_9.jpg -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/README.md: -------------------------------------------------------------------------------- 1 | # Wasserstein GAN 2 | 3 | This is a PyTorch 1.5.0 implementation of WGAN. 4 | 5 | Check out the paper [here](https://arxiv.org/pdf/1701.07875.pdf). 6 | 7 | **Requirements:** 8 | 9 | 1. Python 3.6+ 10 | 2. Numpy 1.18.5 11 | 3. PyTorch 1.5+ 12 | 4. Gdown 3.11.0 13 | 5. Matplotlib 3.2.1 14 | 6. CUDA - 10.1 15 | 16 | **Dataset** 17 | 18 | CelebA was used for the training of this model, which can be downloaded at [this httpURL](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html). 19 | 20 | 1. If you wish to use your own dataset, the structure should be "datasets/sub_dir/img.png". The dataset will download as a file named img_align_celeba.zip. 21 | 22 | 2. Once downloaded, create a directory named **celeba/** and extract the zip file into that directory. 23 | 24 | 3. The resulting directory structure should be: 25 | ``` 26 | /path/to/celeba 27 | -> img_align_celeba 28 | -> 188242.jpg 29 | -> 173822.jpg 30 | -> 284702.jpg 31 | -> 537394.jpg 32 | ``` 33 | 34 | This is an important step because we will be using the ImageFolder dataset class, which requires there to be subdirectories in the dataset�s root folder. 35 | 36 | **Usage** 37 | 38 | ```python 39 | from model import WassGAN 40 | 41 | # To train the GAN with default parameters 42 | WassGAN(run_type="train") 43 | 44 | # To run inference using the GAN 45 | WassGAN() 46 | ``` 47 | 48 | **Train** 49 | 50 | To train on your own dataset: 51 | 52 | 1. Specify dataset path in wgan.py " dataroot = 'path' " line 44. 53 | 2. You can change other parameters such as batch_size, etc but we suggest to use the ones already provided. 54 | 3. Please change the number of workers defined in wgan.py " workers = " line 49. 55 | 4. More parameters regarding training length, learning rate, etc can be changed in train.py, starting line 178. 56 | 5. Number of epochs can be changed by altering the n_epoch in model.py, line 74. 57 | 58 | ## Inference 59 | 60 | ##### Weights from a pretrained model on CelebA will be downloaded automatically if not specified elsewise. 61 | 62 | In order to run inference on your own trained model: 63 | 64 | 1. Change set_ckpt_dir in model.py 65 | 2. You can change the number of images generated by changing the "len" parameter in model.py, line 57 66 | 67 | ## Training details 68 | 69 | * Number of epochs: 135 70 | * Learning rate: 0.00001 71 | * Clamp size: 0.01 72 | * Batch size: 64 73 | * Gpu Used: Nvidia 1660ti 6GB 74 | * Training time: 9 Hrs 75 | 76 | ### Current output 77 | 78 | ![Image](current_output_imgs/test36.png) 79 | -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import WassGAN -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/wasserstein_gan/config/__init__.py -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "WGAN-disc.pt": "17yn3z1lYY2HevRrfAOOzEFVqNX68lld8", 3 | "WGAN-gen.pt": "17SP_KIS1iL_kdk0B45UWRe6SBqiXIszt", 4 | "WGAN-stats.pkl": "1E8zIl4tDwRmntGy8x2yvB5h9hLXZpiNN" 5 | } 6 | -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/current_output_imgs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/wasserstein_gan/current_output_imgs/__init__.py -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/current_output_imgs/test36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/wasserstein_gan/current_output_imgs/test36.png -------------------------------------------------------------------------------- /pyvision/gans/wasserstein_gan/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | import torchvision.utils 6 | import numpy as np 7 | import argparse 8 | import os 9 | import subprocess as sp 10 | from .wgan import * 11 | import json 12 | import gdown 13 | from .train import * 14 | 15 | __PREFIX__ = os.path.dirname(os.path.realpath(__file__)) 16 | 17 | class WassGAN: 18 | 19 | 20 | def __init__(self, run_type = "inference"): 21 | print("run_type = ",run_type) 22 | if run_type == "inference": 23 | #self.inference() 24 | pass 25 | 26 | 27 | elif run_type == "train": 28 | #self.train(train_params, ckpt_params, gan_params, n_epoch, data_loader) 29 | pass 30 | 31 | 32 | def train(self, train_params, ckpt_params, gan_params, n_epoch, data_loader): 33 | 34 | raise NotImplementedError("training mode not supported") 35 | 36 | model = CelebA(train_params, ckpt_params, gan_params) 37 | data_loader = wgan.load_dataset() 38 | 39 | torch.manual_seed(100) 40 | n_epoch = 135 # Number of epochs to train for 41 | model.train(n_epoch, data_loader) 42 | 43 | def inference(self, set_ckpt_dir="WGAN-gen.pt", set_gen_dir="gen", device="cpu"): 44 | 45 | set_ckpt_dir = __PREFIX__ + "/weights/" + set_ckpt_dir 46 | 47 | if device is not "cpu": 48 | 49 | if not torch.cuda.is_available(): 50 | raise ValueError("cuda not available but got device=", device) 51 | device = "cuda" 52 | 53 | 54 | def gen(set_gen_dir): 55 | #set_gen_dir = "gen" # path to save img directory 56 | if os.path.exists(set_gen_dir): 57 | print("Found gen directory") 58 | return 1 59 | else: 60 | print("Directory for saving images not found, making one") 61 | os.mkdir(set_gen_dir) 62 | set_gen_dir = "gen" 63 | return 1 64 | 65 | def check_weights(): 66 | if os.path.exists(set_ckpt_dir): 67 | print("Found weights") 68 | return 1 69 | else: 70 | print("Downloading weigths") 71 | download_weights() 72 | 73 | def download_weights(): 74 | with open(__PREFIX__+"/config/weights_download.json") as fp: 75 | json_file = json.load(fp) 76 | if not os.path.exists(__PREFIX__+"/weights/"): 77 | os.mkdir(__PREFIX__+"/weights/") 78 | url = 'https://drive.google.com/uc?id={}'.format(json_file['WGAN-gen.pt']) 79 | gdown.download(url, __PREFIX__+"/weights/WGAN-gen.pt", quiet=False) 80 | set_ckpt_dir = "WGAN-gen.pt" 81 | print("Download finished") 82 | 83 | #device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 84 | check_weights() 85 | gen(set_gen_dir) 86 | gan = WGAN(device=device) 87 | gan.eval() 88 | gan = gan.to(device) 89 | gan.load_model(filename=set_ckpt_dir) 90 | 91 | def save_new_img(): 92 | len = 20 # number of images to be generated 93 | for i in range(len): 94 | vec = gan.create_latent_var(1, random.randint(1, 200)) # batch, seed value 95 | img = gan.generate_img(vec) 96 | img = unnormalize(img) 97 | fname_in = '{}/frame{}.png'.format(set_gen_dir, i) 98 | torchvision.utils.save_image(img, fname_in, padding=0) 99 | print("All images are saved in gen") 100 | 101 | save_new_img() 102 | -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/content1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content1.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/content2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content2.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/content3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content3.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/content4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content4.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/content5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content5.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/content6.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content6.jpeg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/style1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/style1.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/style6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/style6.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/images/style7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/style7.jpg -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content1+style6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content1+style6.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content2+style1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content2+style1.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content3+style6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content3+style6.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style1.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style7.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content5+style1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content5+style1.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/Examples/output/content6+style7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content6+style7.png -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/README.md: -------------------------------------------------------------------------------- 1 | # Neural Style Transfer 2 | 3 | An implementation of the paper _A Neural Algorithm of Artistic Style_ 4 | The paper can be read [here](https://arxiv.org/pdf/1508.06576.pdf). 5 | 6 | The idea is to extract the _content_ from one image, the 'content image', and the _style_ or _texture_ from another image, the 'style image', to get a single output which has a combination of the two. 7 | 8 | To check out a notebook demonstrating how you can use the Neural Style Transfer Module in 3 lines, check [here](https://github.com/pranjaldatta/PyVision/blob/master/demo/misc/NeuralStyleTransfer/nst_demo.ipynb). 9 | 10 | If the link above does not work check [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/misc/NeuralStyleTransfer/nst_demo.ipynb). 11 | 12 | ## A Few details about the implementation 13 | 14 | - By default, due to computational limitations, both style and content images are resized to 512x512 if using a GPU or 128x128 if on a CPU. If the *retain_dims* is set to True, the output is **UPSAMPLED** to the original content image dimensions but this upsampling especially for 128x128 images reduces quality. 15 | 16 | - This behavior can be disabled by setting the param *downsample* to False. This ensures that the style image is resized to the size of the content image and style transfer is run with original content image dimensions. 17 | 18 | **Note**: Using Neural Style Transfer is computationally expensive so it is recommended to use GPU for optimal timing. 19 | 20 | ## Quick Start 21 | 22 | - Using default settings, to run style transfer on a CPU or GPU 23 | 24 | ```python 25 | from pyvision.misc.NeuralStyleTransfer import NeuralStyle 26 | 27 | style_img, content_img = (, 'path to content img or content img') 28 | 29 | nst = Neural_Style(save = "output.jpg") 30 | 31 | output, time_taken = nst.run_style_transfer(style_img, content_img) 32 | ``` 33 | 34 | - To disable downsampling and run style transfer on original content image dimensions, 35 | 36 | ```python 37 | from pyvision.misc.NeuralStyleTransfer import NeuralStyle 38 | 39 | style_img, content_img = (, 'path to content img or content img') 40 | 41 | nst = Neural_Style(save = "output.jpg", downsample=False, use_gpu=True) 42 | 43 | output, time_taken = nst.run_style_transfer(style_img, content_img) 44 | ``` 45 | 46 | ## Examples 47 | 48 | For more examples, check [Examples](https://github.com/pranjaldatta/PyVision/tree/nst/pyvision/misc/NeuralStyleTransfer/Examples). 49 | 50 | Content Image | Style Image | Result | 51 | :-------------: | :---------: | :-----: | 52 | | | | 53 | | | | 54 | | | | 55 | 56 | -------------------------------------------------------------------------------- /pyvision/misc/NeuralStyleTransfer/__init__.py: -------------------------------------------------------------------------------- 1 | from .neural_style import NeuralStyle -------------------------------------------------------------------------------- /pyvision/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/__init__.py -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/README.md: -------------------------------------------------------------------------------- 1 | # Multi Task Cascaded Convolutional Neural Network in PyTorch (MTCNN) 2 | 3 | State of the art face and facial-landmark detection architecture. 4 | 5 | ## Paper 6 | 7 | Read the paper [here](https://arxiv.org/pdf/1604.02878.pdf). 8 | 9 | ## Contributed By 10 | 11 | - [Sashrika Surya](https://github.com/sashrika15) 12 | 13 | - [Pranjal Datta](https://github.com/pranjaldatta) 14 | 15 | ## Tests 16 | 17 | **All tests passing.** 18 | 19 | To check, from PyVision root, run: 20 | 21 | ``` 22 | python tests/misc/mtcnn/mtcnn_test.py 23 | ``` 24 | 25 | ## Usage 26 | 27 | This Usage guide assumes that the PyVision repository has already been cloned. If not follow instructions given in PyVision repository root and clone the repository. Then follow the steps listed below: 28 | 29 | ``` 30 | from pyvision.misc.mtcnn import mtcnn 31 | from PIL import Image 32 | from pyvision.misc.mtcnn.utils.visualize import show_boxes 33 | 34 | path = 35 | 36 | img = Image.open(path) 37 | 38 | mtcnn = MTCNN() 39 | boxes = mtcnn.detect(img) # returns bounding boxes 40 | 41 | img = show_boxes(img, b) 42 | img.show() 43 | ``` 44 | 45 | For a more detailed usage, check out [mtcnn_test.py](https://github.com/pranjaldatta/PyVision/blob/master/tests/misc/mtcnn/mtcnn_test.py) 46 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import * 2 | from .utils import * 3 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "pnet": "1Fw-Jrei12NqYhEkwqtS1jP3WvClr8a0P", 3 | "onet": "11--NPbveLKQ9-f-UH3Kl8CzgfAYQoPch", 4 | "rnet": "1BqF021ltiNmBIDFMHrr_zv7x4zBtSnmH" 5 | 6 | } -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import torch 4 | from .nets import PNet, RNet, ONet 5 | from .stage_one import first_stage 6 | from .stage_two import get_image_boxes 7 | from .utils.visualize import show_boxes 8 | from .utils.utils import nms, convert_to_square, calibrate_boxes 9 | 10 | 11 | 12 | def detector(image, min_face_size = 20.0, conf_thresh=[0.7, 0.7, 0.8], nms_thresh=[0.7, .7, .7]): 13 | """ 14 | method that accepts an image and returns bounding boxes around faces 15 | 16 | Parameters: 17 | -> image (PIL.Image): Image in PIL format 18 | -> min_face_size (float): minimum size of face to look for 19 | -> conf_thresh (list): list of confidence thresholds for various parts 20 | parts in the pipeine. (Size = 3) 21 | -> nms_thresh (list): list of overlap thresholds for nms (sizze = 3) 22 | """ 23 | 24 | try: 25 | if not isinstance(image, Image.Image): 26 | raise TypeError 27 | if len(conf_thresh) != 3 or len(nms_thresh) != 3: 28 | raise AssertionError 29 | except AssertionError: 30 | print("ERROR: conf_thresh or nms_thresh of len :{},{} while expected size: 3".format(len(conf_thresh), len(nms_thresh))) 31 | exit() 32 | except TypeError: 33 | print("ERROR: Image type found:{}, expected: PIL.Image".format(type(image))) 34 | exit() 35 | 36 | pnet = PNet() 37 | rnet = RNet() 38 | onet = ONet() 39 | 40 | w, h = image.size 41 | min_length = min(h, w) 42 | min_detection_size = 12 43 | scale_factor = 0.709 #not sure why its .709 44 | scales = [] 45 | m = min_detection_size/min_face_size 46 | min_length *= m 47 | factor_count = 0 48 | 49 | while min_length > min_detection_size: 50 | scales += [m * np.power(scale_factor,factor_count)] 51 | min_length *= scale_factor 52 | factor_count += 1 53 | 54 | ################## Stage 1 ############################# 55 | 56 | bounding_boxes = [] 57 | 58 | for s in scales: 59 | boxes = first_stage(image, s, pnet, nms_thresh[0]) 60 | bounding_boxes.append(boxes) 61 | #bounding_boxes has shape [n_scales, n_boxes, 9] 62 | 63 | #remove those scales for which bounding boxes were none 64 | bounding_boxes = [i for i in bounding_boxes if i is not None] 65 | 66 | #Add all the boxes for each scale 67 | if len(bounding_boxes)==0: 68 | return bounding_boxes 69 | 70 | bounding_boxes = np.vstack(bounding_boxes) # returns array of shape [n_boxes, 9] 71 | 72 | 73 | #------------------------- Stage 2 ------------------------------------- 74 | 75 | img_box = get_image_boxes(bounding_boxes,image,size=24) 76 | img_box = torch.tensor(img_box, dtype=torch.float32, requires_grad=False) 77 | 78 | probs, boxes = rnet(img_box) 79 | 80 | probs = probs.data.numpy() #Shape [boxes, 2] 81 | boxes = boxes.data.numpy() #Shape [boxes, 4] 82 | 83 | ind = np.where(probs[:, 1] >= conf_thresh[1])[0] 84 | 85 | bounding_boxes = bounding_boxes[ind] 86 | bounding_boxes[:, 4] = probs[ind, 1].reshape((-1,)) 87 | boxes = boxes[ind] 88 | 89 | keep = nms(bounding_boxes, nms_thresh[1], mode="union") 90 | bounding_boxes = bounding_boxes[keep] 91 | boxes = boxes[keep] 92 | 93 | bounding_boxes = calibrate_boxes(bounding_boxes, boxes) 94 | bounding_boxes = convert_to_square(bounding_boxes) 95 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 96 | 97 | #--------------------STAGE 3------------------------------------------------- 98 | 99 | img_box = get_image_boxes(bounding_boxes, image, size=48) 100 | 101 | if len(img_box) == 0: 102 | return [], [] 103 | 104 | img_box = torch.tensor(img_box, dtype=torch.float32, requires_grad=False) 105 | probs, boxes, landmarks = onet(img_box) 106 | 107 | probs = probs.data.numpy() 108 | boxes = boxes.data.numpy() 109 | landmarks = landmarks.data.numpy() 110 | 111 | 112 | keep = np.where(probs[:,1] > conf_thresh[2])[0] 113 | 114 | bounding_boxes = bounding_boxes[keep] 115 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) 116 | boxes = boxes[keep] 117 | landmarks = landmarks[keep] 118 | 119 | bounding_boxes = calibrate_boxes(bounding_boxes, boxes) 120 | 121 | 122 | keep = nms(bounding_boxes, overlap_thresh=nms_thresh[2], mode="min") 123 | bounding_boxes = bounding_boxes[keep] 124 | bounding_boxes = convert_to_square(bounding_boxes) 125 | 126 | 127 | return bounding_boxes 128 | 129 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/nets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from collections import OrderedDict 6 | import os 7 | 8 | WEIGHTS_PATH = os.path.dirname(os.path.realpath(__file__))+"/weights/" 9 | 10 | 11 | class FlattenTensorCustom(nn.Module): 12 | 13 | def __init__(self): 14 | 15 | super(FlattenTensorCustom, self).__init__() 16 | 17 | def forward(self, x): 18 | """ 19 | Input: 20 | 21 | A Tensor x of shape [batch_no, c, h, w] 22 | 23 | Output: 24 | 25 | A Tensor x of shape [batch_no, c*h*w] 26 | """ 27 | 28 | x = x.transpose(3,2).contiguous() #wierd fix 29 | 30 | return x.view(x.size(0), -1) 31 | 32 | 33 | class PNet(nn.Module): 34 | 35 | def __init__(self): 36 | 37 | super(PNet, self).__init__() 38 | 39 | self.features = nn.Sequential(OrderedDict([ 40 | 41 | ("conv1", nn.Conv2d(3, 10, 3, 1)), 42 | ("prelu1", nn.PReLU(10)), 43 | ("pool1", nn.MaxPool2d(2,2,ceil_mode=True)), 44 | 45 | ("conv2", nn.Conv2d(10, 16, 3, 1)), 46 | ("prelu2", nn.PReLU(16)), 47 | 48 | ("conv3", nn.Conv2d(16, 32, 3, 1)), 49 | ("prelu3", nn.PReLU(32)), 50 | 51 | ])) 52 | 53 | self.conv4_1 = nn.Conv2d(32, 2, 1, 1) 54 | self.conv4_2 = nn.Conv2d(32, 4, 1, 1) 55 | 56 | try: 57 | self.weights = np.load(WEIGHTS_PATH+"pnet.npy", allow_pickle=True)[()] 58 | for idx, wts in self.named_parameters(): 59 | wts.data = torch.FloatTensor(self.weights[idx]) 60 | except Exception as err: 61 | print("ERROR: At Pnet Weight Init: {}".format(err)) 62 | exit() 63 | 64 | 65 | def summary(self): 66 | print("PNet Summary:") 67 | print(self.features) 68 | print(self.conv4_1) 69 | print(self.conv4_2) 70 | 71 | def forward(self, x): 72 | x = self.features(x) 73 | probs = F.softmax(self.conv4_1(x), dim=1) #ERROR PRONE #holds probilities and box preds respec. 74 | boxes = self.conv4_2(x) 75 | 76 | return probs, boxes 77 | 78 | 79 | class RNet(nn.Module): 80 | 81 | 82 | def __init__(self): 83 | 84 | super(RNet, self).__init__() 85 | 86 | self.features = nn.Sequential(OrderedDict([ 87 | ("conv1", nn.Conv2d(3, 28, 3, 1)), 88 | ("prelu1", nn.PReLU(28)), 89 | ("pool1", nn.MaxPool2d(3, 2, ceil_mode=True)), 90 | 91 | ("conv2", nn.Conv2d(28, 48, 3, 1)), 92 | ("prelu2", nn.PReLU(48)), 93 | ("pool2", nn.MaxPool2d(3, 2, ceil_mode=True)), 94 | 95 | ("conv3", nn.Conv2d(48, 64, 2, 1)), 96 | 97 | ("flatten", FlattenTensorCustom()), 98 | ("conv4", nn.Linear(576, 128)), 99 | ("prelu4", nn.PReLU(128)), 100 | ])) 101 | 102 | self.conv5_1 = nn.Linear(128, 2) #boxes 103 | self.conv5_2 = nn.Linear(128, 4) 104 | 105 | try: 106 | self.weights = np.load(WEIGHTS_PATH+"rnet.npy", allow_pickle=True)[()] 107 | for idx, wts in self.named_parameters(): 108 | wts.data = torch.FloatTensor(self.weights[idx]) 109 | except Exception as err: 110 | 111 | print("ERROR: at loading rnet weights: {}".format(err)) 112 | exit() 113 | 114 | def summary(self): 115 | print("RNet Summary:") 116 | print(self.features) 117 | print("\n") 118 | print(self.conv5_1) 119 | print(self.conv5_2) 120 | 121 | def forward(self, x): 122 | 123 | x = self.features(x) 124 | probs = F.softmax(self.conv5_1(x), dim=1) 125 | boxes = self.conv5_2(x) 126 | 127 | return probs, boxes 128 | 129 | 130 | class ONet(nn.Module): 131 | 132 | 133 | def __init__(self): 134 | 135 | super(ONet, self).__init__() 136 | 137 | self.features = nn.Sequential(OrderedDict([ 138 | ("conv1", nn.Conv2d(3, 32, 3, 1)), 139 | ("prelu1", nn.PReLU(32)), 140 | ("pool1", nn.MaxPool2d(3, 2, ceil_mode=True)), 141 | 142 | ("conv2", nn.Conv2d(32, 64, 3, 1)), 143 | ("prelu2", nn.PReLU(64)), 144 | ("pool2", nn.MaxPool2d(3, 2, ceil_mode=True)), 145 | 146 | ("conv3", nn.Conv2d(64, 64, 3)), 147 | 148 | ("prelu3", nn.PReLU(64)), 149 | ("pool3", nn.MaxPool2d(2, 2, ceil_mode=True)), 150 | 151 | ("conv4", nn.Conv2d(64,128,2)), 152 | ("prelu4", nn.PReLU(128)), 153 | ("flatten", FlattenTensorCustom()), 154 | ("conv5", nn.Linear(1152,256)), 155 | ("prelu5", nn.PReLU(256)), 156 | 157 | ])) 158 | 159 | self.conv6_1 = nn.Linear(256,2) #prob of face in bb 160 | self.conv6_2 = nn.Linear(256,4) #box 161 | self.conv6_3 = nn.Linear(256,10) #facial landmarks 162 | 163 | try: 164 | self.weights = np.load(WEIGHTS_PATH+"onet.npy", allow_pickle=True)[()] 165 | for idx, wts in self.named_parameters(): 166 | wts.data = torch.FloatTensor(self.weights[idx]) 167 | except Exception as err: 168 | print("ERROR: at loading onet weights: {}".format(err)) 169 | exit() 170 | 171 | def summary(self): 172 | print("ONet Summary:") 173 | print(self.features) 174 | print("\n") 175 | print(self.conv6_1) 176 | print(self.conv6_2) 177 | print(self.conv6_3) 178 | 179 | def forward(self, x): 180 | x = self.features(x) 181 | probs = F.softmax(self.conv6_1(x), dim=1) 182 | boxes = self.conv6_2(x) 183 | points = self.conv6_3(x) 184 | return probs, boxes, points 185 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/requirements.txt: -------------------------------------------------------------------------------- 1 | python=3.7.1 2 | pytorch=1.4.0=py3.7_cpu_0 3 | pillow=7.1.1=py37h718be6c_0 4 | numpy=1.18.1=py37h8960a57_1 5 | torchvision=0.5.0=py37_cpu 6 | opencv=4.1.1 -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/stage_one.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | from .utils.utils import preprocess, nms 5 | import cv2 6 | from PIL import Image 7 | 8 | 9 | 10 | 11 | def scale_boxes(probs, boxes, scale, thresh=.8): 12 | """ 13 | A method that takes in the outputs of pnet, probabilities and 14 | box cords for a scaled image and returns box cords for the 15 | original image. 16 | 17 | Params: 18 | -> probs: probilities of a face for a given bbox; shape: [a,b] 19 | -> boxes: box coords for a given scaled image; shape" [1, 4, a, b] 20 | -> scale: a float denoting the scale factor of the image 21 | -> thresh: minimum confidence required for a facce to qualify 22 | 23 | Returns: 24 | -> returns a float numpy array of shape [num_boxes, 9] #9 because bbox + confidence + offset (4+1+4) 25 | """ 26 | stride = 2 27 | cell_size = 12 28 | inds = np.where(probs > thresh) 29 | if inds[0].size == 0: 30 | return np.array([]) 31 | 32 | tx1, ty1, tx2, ty2 = [boxes[0, i, inds[0], inds[1]] for i in range(4)] 33 | offsets = np.array([tx1, ty1, tx2, ty2]) 34 | 35 | confidence = probs[inds[0], inds[1]] 36 | 37 | bboxes = np.vstack([ 38 | np.round((stride*inds[1] + 1.0)/scale), 39 | np.round((stride*inds[0] + 1.0)/scale), 40 | np.round((stride*inds[1] + 1.0 + cell_size)/scale), 41 | np.round((stride*inds[0] + 1.0 + cell_size)/scale), 42 | confidence, 43 | offsets 44 | ]) 45 | 46 | return bboxes.T 47 | 48 | 49 | def first_stage(img, scale, pnet, nms_thresh): 50 | """ 51 | A method that accepts a PIL Image, 52 | runs it through pnet and does nms. 53 | 54 | Params: 55 | -> img: PIL image 56 | -> scale: a float that determines the scaling factor 57 | -> pnet: an instance of the pnet 58 | -> thresh: threshold below which facial probs are unacceptable 59 | 60 | Returns: 61 | -> numpy array of type float of shape [num_boxes, 9] 62 | which contain box cords for a givens scale, confidence, 63 | and offsets to actual size 64 | """ 65 | 66 | orig_w, orig_h = img.size 67 | scaled_w, scaled_h = math.ceil(scale*orig_w), math.ceil(scale*orig_h) 68 | 69 | img = img.resize((scaled_w, scaled_h), Image.BILINEAR) 70 | img = preprocess(img) 71 | 72 | probs, boxes = pnet(img) 73 | 74 | 75 | probs = probs.data.numpy()[0,1,:,:] 76 | boxes = boxes.data.numpy() 77 | 78 | bounding_boxes = scale_boxes(probs, boxes, scale) 79 | if len(bounding_boxes) == 0: 80 | return None 81 | 82 | selected_ids = nms(bounding_boxes[:,0:5], nms_thresh) #indices to be kept 83 | return bounding_boxes[selected_ids] 84 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/stage_two.py: -------------------------------------------------------------------------------- 1 | from .utils.utils import preprocess 2 | import numpy as np 3 | from PIL import Image 4 | from .utils.visualize import show_boxes 5 | 6 | 7 | def get_image_boxes(bounding_boxes, img, size=24): 8 | 9 | """ 10 | Cut out boxes from the image for rnet input 11 | """ 12 | 13 | num_boxes = len(bounding_boxes) 14 | w, h = img.size 15 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(bounding_boxes, w, h) 16 | img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') 17 | 18 | for i in range(num_boxes): 19 | img_box = np.zeros((tmph[i], tmpw[i], 3), 'uint8') 20 | 21 | img_array = np.asarray(img, 'uint8') 22 | 23 | #Copies the values from img_array to empty img_box 24 | #x,ex,y,ey are the actual coords in the image 25 | try: 26 | img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\ 27 | img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] 28 | except ValueError as ve: 29 | print("Value error at index {}".format(i)) 30 | 31 | img_box = Image.fromarray(img_box) 32 | img_box = img_box.resize((size, size), Image.BILINEAR) 33 | img_box = np.asarray(img_box, 'float32') 34 | 35 | img_boxes[i, :, :, :] = preprocess(img_box) 36 | 37 | return img_boxes 38 | 39 | 40 | def pad(bboxes, width, height): 41 | """ 42 | Output: 43 | dy, dx, edy, edx: Coordinates of cut boxes 44 | y, x, ey, ex: Coordinates of box in image 45 | h, w: Heights and widths of boxes. 46 | """ 47 | 48 | #No idea why 1 is added and subtracted from w and h 49 | #e stands for end. So its (x,ex) 50 | 51 | x, y, ex, ey = [bboxes[:, i] for i in range(4)] 52 | w, h = ex - x + 1.0, ey - y + 1.0 53 | num_boxes = bboxes.shape[0] 54 | dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) 55 | edx, edy = w.copy() - 1.0, h.copy() - 1.0 56 | 57 | #For top left corner 58 | ind = np.where(x < 0.0)[0] 59 | dx[ind] = 0.0 - x[ind] 60 | x[ind] = 0.0 61 | 62 | ind = np.where(y < 0.0)[0] 63 | dy[ind] = 0.0 - y[ind] 64 | y[ind] = 0.0 65 | 66 | #For bottom right corner 67 | ind = np.where(ex > width - 1.0 )[0] 68 | edx[ind] = w[ind] + width - 2.0 - ex[ind] 69 | ex[ind] = width - 1.0 70 | 71 | ind = np.where(ey > height - 1.0)[0] 72 | edy[ind] = h[ind] + height - 2.0 - ey[ind] 73 | ey[ind] = height - 1.0 74 | 75 | return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] 76 | return_list = [r.astype('int32') for r in return_list] 77 | 78 | return return_list 79 | 80 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/mtcnn/utils/__init__.py -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import torch 4 | 5 | def nms(boxes, overlap_thresh=.5, mode='union'): 6 | """ 7 | An utility function that performs nms over the bounding box 8 | 9 | Params: 10 | -> boxes: the bounding box proposals 11 | -> overlap_thresh: maximum permissible overlap ratio 12 | -> mode: default - union (IoU) 13 | 14 | Output: 15 | -> bounding box list with overlapping boxes removed 16 | """ 17 | 18 | if len(boxes) == 0: 19 | return [] 20 | 21 | x1, y1, x2, y2, confidence = [boxes[:, i] for i in range(5)] 22 | 23 | areas = (x2 - x1 + 1.0)*(y2 - y1 + 1.0) 24 | selected = [] 25 | ids_sorted = np.argsort(confidence) 26 | 27 | while len(ids_sorted) > 0: 28 | """ 29 | we loop through the sorted ids. 30 | 1. select the last id 31 | 2. compare the chosen bbox IoU with all the others 32 | 3. del the ones above the threshold. 33 | 4. return selected ids 34 | """ 35 | 36 | last_idx = len(ids_sorted) - 1 37 | idx = ids_sorted[last_idx] 38 | selected.append(idx) 39 | 40 | 41 | xi1 = np.maximum(x1[idx], x1[ids_sorted[:last_idx]]) 42 | yi1 = np.maximum(y1[idx], y1[ids_sorted[:last_idx]]) 43 | 44 | xi2 = np.minimum(x2[idx], x2[ids_sorted[:last_idx]]) 45 | yi2 = np.minimum(y2[idx], y2[ids_sorted[:last_idx]]) 46 | 47 | inter_h = np.maximum(0.0, (yi2 - yi1 + 1.0)) 48 | inter_w = np.maximum(0.0, (xi2 - xi1 + 1.0)) 49 | inter_area = inter_h*inter_w 50 | 51 | if mode == "union": 52 | overlap = inter_area/(areas[idx] + areas[ids_sorted[:last_idx]] - inter_area) 53 | elif mode == "min": 54 | overlap = inter_area/np.minimum(areas[idx], areas[ids_sorted[:last_idx]]) 55 | 56 | to_del = np.concatenate([[last_idx], np.where(overlap > overlap_thresh)[0]]) 57 | ids_sorted = np.delete(ids_sorted, to_del) 58 | 59 | #print("nms complete. returning {}/{} boxes".format(len(selected), len(boxes))) 60 | return selected 61 | 62 | 63 | 64 | def preprocess(img): 65 | """ 66 | A utiity function that takes a numpy image array or PIL 67 | Image and returns a tensor 68 | 69 | Input: 70 | -> img: input image in array or PIL format 71 | Output: 72 | -> tensor 73 | """ 74 | if isinstance(img, Image.Image): 75 | img = np.asarray(img, 'float') 76 | img = torch.tensor(img, dtype=torch.float32, requires_grad=False) 77 | img = img.permute(2,0,1) 78 | img = torch.unsqueeze(img, 0) 79 | img = (img - 127.5)*0.0078125 #normalize 80 | return img 81 | 82 | def convert_to_square(bbox): 83 | """ 84 | Convert bounding boxes to square shape 85 | 86 | """ 87 | 88 | square = np.zeros((bbox.shape)) 89 | 90 | x1, y1, x2, y2 = [bbox[:, i] for i in range(4)] 91 | h = y2 - y1 + 1.0 92 | w = x2 - x1 + 1.0 93 | max_side = np.maximum(h, w) 94 | 95 | square[:,0] = x1 + w*0.5 - max_side*0.5 96 | square[:,1] = y1 + h*0.5 - max_side*0.5 97 | square[:, 2] = square[:, 0] + max_side - 1.0 98 | square[:, 3] = square[:, 1] + max_side - 1.0 99 | 100 | return square 101 | 102 | def calibrate_boxes(boxes, offsets): 103 | ''' 104 | offset the original bounding boxes by an amount as predicted by the 105 | rnet. 106 | 107 | Arguments: 108 | -> boxes: original bounding box list (shape: [n, 9]) 109 | -> offsets: output of the rnet (shape [n, 4]) 110 | 111 | Returns: 112 | -> numpy array of shape [n, 5] 113 | ''' 114 | 115 | x1, y1, x2, y2 = [boxes[:,i] for i in range(4)] 116 | 117 | width = (x2 - x1 + 1.0) 118 | height = (y2 - y1 + 1.0) 119 | 120 | height = np.reshape(height, (-1, 1)) 121 | width = np.reshape(width, (-1, 1)) 122 | 123 | tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] 124 | t = [x1, y1, x2, y2, tx1, ty1, tx2, ty2] 125 | t = list(map(lambda x: np.reshape(x,(-1, 1)), t)) 126 | x1, y1, x2, y2, tx1, ty1, tx2, ty2 = t[:] 127 | 128 | """ 129 | it was supposed to be x1t = x1+tx1*width but that was providing negative indices so swapped 130 | tx1 and tx2 131 | """ 132 | x1t = x1 + tx2*width 133 | y1t = y1 + ty1*height 134 | x2t = x2 + tx1*width 135 | y2t = y2 + ty2*height 136 | 137 | t = [x1t, y1t, x2t, y2t] 138 | 139 | t = list(map(lambda x: np.reshape(x, (-1,)), t)) 140 | for i in range(4): 141 | boxes[:,i] = t[i] 142 | return boxes 143 | -------------------------------------------------------------------------------- /pyvision/misc/mtcnn/utils/visualize.py: -------------------------------------------------------------------------------- 1 | from PIL import ImageDraw 2 | 3 | def show_boxes(img, bounding_boxes): 4 | 5 | im = img.copy() 6 | draw = ImageDraw.Draw(im) 7 | 8 | 9 | for i in bounding_boxes: 10 | draw.rectangle([ 11 | (i[0],i[1]), 12 | (i[2],i[3]) 13 | ], outline = 'red') 14 | 15 | return im 16 | 17 | def _show_boxes(img, boxes): 18 | im = img.copy() 19 | draw = ImageDraw.Draw(im) 20 | boxes = boxes[0] 21 | for i in boxes: 22 | draw.rectangle([ 23 | (i[0],i[1]), 24 | (i[2],i[3]) 25 | ], outline = 'red') 26 | return im 27 | -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/README.md: -------------------------------------------------------------------------------- 1 | # Noise2Noise: Learning Image Restoration without Clean Data 2 | 3 | Noise2Noise is an image-denoising model which is trained on noisy data only. 4 | This implementation is based on the ICML 2018 [paper](https://arxiv.org/abs/1803.04189) by Jaakko Lehtinen et al. 5 | 6 | ## Some Specific Details 7 | 8 | ### For denoising Gaussian noise 9 | 10 | For Gaussian denoising, the model was trained with a *mean* of **10** and a *standard deviation* in the range [20, 50] (sampled randomly from an uniform distribution). 11 | 12 | ### For Text Removal 13 | 14 | During the training of text removal model, random number of text units were added. 15 | 16 | *For more details*, check out [dataset.py](https://github.com/pranjaldatta/PyVision/blob/master/pyvision/misc/noise2noise/dataset.py). 17 | 18 | ## Summary 19 | 20 | - This model works for additive gaussian noise and text removal only. It does not include poisson noise and Monte Carlo Rendering discussed in the paper. 21 | - U-Net architecture is followed throughout the model. The original paper used a “RED30” network (Mao et al., 2016) for additive gaussian noise. 22 | - The weights were made available by Joey Litalien's implementation [here](https://github.com/joeylitalien/noise2noise-pytorch). 23 | - For additive gaussian noise, sigma or the standard deviation is an important hyperparameter. If the **noise level is greater than thrice of sigma, the denoiser is unable to present a clear image**. 24 | - The text overlay function works within a random integer range to add a random string to the image. The denoiser works better for small sized strings which cover less pixels. 25 | 26 | ### Test 27 | 28 | To run test from PyVision root: 29 | 30 | ```python 31 | python tests/misc/noise2noise/n2n_test.py 32 | ``` 33 | 34 | ### Usage 35 | 36 | - The model setup is done through Noise2Noise class via pyvision.misc.noise2noise.model 37 | - The model is initialised with the noise type. For 'test' mode, a data_path is required which contains the path to test images. For 'inference' mode, a PIL image or the path to the image is required as input. The show parameter can be set to 'True' to display the images after denoising. 38 | - The available noise types are: gaussian, text 39 | 40 | ```python 41 | from pyvision.misc.noise2noise.model import Noise2Noise 42 | from PIL import Image 43 | 44 | n2n = Noise2Noise(noise="gaussian") 45 | 46 | img_path = "Path to Image" 47 | img = Image.open(img_path) 48 | 49 | n2n.inference(img, show=False, save="Denoised.png") 50 | 51 | ``` 52 | 53 | ### Example 54 | Gaussian Noise: 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 |
Source ImageDenoised Image
70 | 71 | Text Overlay 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 |
Source ImageDenoised Image
87 | 88 | -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import * -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/gauss_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gauss_1.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/gauss_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gauss_3.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/gdenoised_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gdenoised_1.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/gdenoised_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gdenoised_3.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/tdenoised_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/tdenoised_1.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/tdenoised_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/tdenoised_3.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/text_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/text_1.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/assets/text_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/text_3.png -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/config/__init__.py -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "n2n-gaussian.pt" : "1n_yfTcF4Oz9RqTfHL2ARBQykN5r92yD1" , 3 | "n2n-text.pt" : "1TdjEE4NjZb7m1zNoThGN13HKU5TTzLkJ" 4 | } 5 | -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import os 4 | import random 5 | from torch.utils.data import Dataset, DataLoader 6 | import torchvision.transforms.functional as tvf 7 | from PIL import Image, ImageDraw, ImageFont 8 | from string import ascii_letters 9 | 10 | 11 | class NoisyDataset(Dataset): 12 | ''' 13 | Loads dataset. 14 | NoisyDataset inherits from an abstract class representing Dataset 15 | ''' 16 | 17 | def __init__(self, data_dir, noise, crop_size): 18 | ''' 19 | Initialise dataset 20 | ''' 21 | self.data_dir = data_dir 22 | self.imgs = [] 23 | self.crop_size = 320 24 | self.noise = noise 25 | 26 | for file in os.listdir(data_dir): 27 | if file.endswith(".jpg"): 28 | self.imgs.append( os.path.join(data_dir,file)) 29 | 30 | def gaussian_noise(self,img): 31 | ''' 32 | Add Gaussian noise in dataset 33 | Input: img of type PIL.Image 34 | Output: Noisy image of type PIL.Image 35 | ''' 36 | w,h = img.size 37 | c = len(img.getbands()) 38 | 39 | sigma = np.random.uniform(20,50) 40 | gauss = np.random.normal(10,sigma,(h,w,c)) 41 | noisy = np.array(img) + gauss 42 | 43 | #Values less than 0 become 0 and more than 255 become 255 44 | noisy = np.clip(noisy, 0, 255).astype(np.uint8) 45 | img = Image.fromarray(noisy) 46 | 47 | return img 48 | 49 | 50 | def add_text(self,img): 51 | ''' 52 | Add random string of text to images 53 | Input: img of type PIL.Image 54 | Output: Noisy image of type PIL.Image 55 | ''' 56 | w,h = img.size 57 | c = len(img.getbands()) 58 | im = img.copy() 59 | draw = ImageDraw.Draw(im) 60 | for i in range(random.randint(5,15)): 61 | font_type = ImageFont.truetype(font='Arial.ttf',size=np.random.randint(10,20)) 62 | len_text = np.random.randint(4,20) 63 | text = ''.join(random.choice(ascii_letters) for i in range(len_text)) 64 | x = np.random.randint(0,w) 65 | y = np.random.randint(0,h) 66 | col = tuple(np.random.randint(0,255,c)) 67 | draw.text((x,y),text,fill=col,font=font_type) 68 | 69 | return im 70 | 71 | 72 | def crop_image(self,img): 73 | ''' 74 | Crops the image to a square of size (crop_size, crop_size) 75 | Input: img of type PIL.Image 76 | Output: Cropped image of type PIL.Image 77 | ''' 78 | 79 | w,h = img.size 80 | m = min(w,h) 81 | img = tvf.crop(img, 0,0,m,m) 82 | img = tvf.resize(img, (self.crop_size, self.crop_size)) 83 | 84 | return img 85 | 86 | 87 | def __len__(self): 88 | ''' 89 | Returns length of dataset 90 | ''' 91 | return len(self.imgs) 92 | 93 | 94 | def __getitem__(self,index): 95 | ''' 96 | Compiles dataset 97 | ''' 98 | 99 | img = Image.open(self.imgs[index]).convert('RGB') 100 | resized_img = self.crop_image(img) 101 | 102 | if self.noise == 'text': 103 | source = tvf.to_tensor(self.add_text(resized_img)) 104 | target = tvf.to_tensor(self.add_text(resized_img)) 105 | else: 106 | source = tvf.to_tensor(self.gaussian_noise(resized_img)) 107 | target = tvf.to_tensor(self.gaussian_noise(resized_img)) 108 | 109 | return source,target 110 | 111 | 112 | -------------------------------------------------------------------------------- /pyvision/misc/noise2noise/unet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Unet(nn.Module): 6 | ''' 7 | Unet architecture for n2n. 8 | No batch norm, dropout 9 | ''' 10 | 11 | def __init__(self, in_channels=3, out_channels=3): 12 | """Initializes U-Net.""" 13 | 14 | super(Unet, self).__init__() 15 | 16 | self._block1 = nn.Sequential( 17 | nn.Conv2d(in_channels, 48, 3, stride=1, padding=1), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(48, 48, 3, padding=1), 20 | nn.ReLU(inplace=True), 21 | nn.MaxPool2d(2)) 22 | 23 | self._block2 = nn.Sequential( 24 | nn.Conv2d(48, 48, 3, stride=1, padding=1), 25 | nn.ReLU(inplace=True), 26 | nn.MaxPool2d(2)) 27 | 28 | self._block3 = nn.Sequential( 29 | nn.Conv2d(48, 48, 3, stride=1, padding=1), 30 | nn.ReLU(inplace=True), 31 | nn.ConvTranspose2d(48, 48, 3, stride=2, padding=1, output_padding=1)) 32 | 33 | self._block4 = nn.Sequential( 34 | nn.Conv2d(96, 96, 3, stride=1, padding=1), 35 | nn.ReLU(inplace=True), 36 | nn.Conv2d(96, 96, 3, stride=1, padding=1), 37 | nn.ReLU(inplace=True), 38 | nn.ConvTranspose2d(96, 96, 3, stride=2, padding=1, output_padding=1)) 39 | 40 | self._block5 = nn.Sequential( 41 | nn.Conv2d(144, 96, 3, stride=1, padding=1), 42 | nn.ReLU(inplace=True), 43 | nn.Conv2d(96, 96, 3, stride=1, padding=1), 44 | nn.ReLU(inplace=True), 45 | nn.ConvTranspose2d(96, 96, 3, stride=2, padding=1, output_padding=1)) 46 | 47 | self._block6 = nn.Sequential( 48 | nn.Conv2d(96 + in_channels, 64, 3, stride=1, padding=1), 49 | nn.ReLU(inplace=True), 50 | nn.Conv2d(64, 32, 3, stride=1, padding=1), 51 | nn.ReLU(inplace=True), 52 | nn.Conv2d(32, out_channels, 3, stride=1, padding=1), 53 | nn.LeakyReLU(0.1)) 54 | 55 | 56 | def forward(self, x): 57 | 58 | #Encoder 59 | #print("X size = ", str(x.size())) 60 | pool1 = self._block1(x) 61 | #print(pool1.size()) 62 | pool2 = self._block2(pool1) 63 | #print(pool2.size()) 64 | pool3 = self._block2(pool2) 65 | #print(pool3.size()) 66 | pool4 = self._block2(pool3) 67 | #print(pool4.size()) 68 | pool5 = self._block2(pool4) 69 | #print(pool5.size()) 70 | 71 | #Decoder 72 | upsample5 = self._block3(pool5) 73 | #print(upsample5.size()) 74 | concat5 = torch.cat((upsample5, pool4), dim=1) 75 | #print(concat5.size()) 76 | upsample4 = self._block4(concat5) 77 | #print(upsample4.size()) 78 | concat4 = torch.cat((upsample4, pool3), dim=1) 79 | #print(concat4.size()) 80 | upsample3 = self._block5(concat4) 81 | #print(upsample3.size()) 82 | concat3 = torch.cat((upsample3, pool2), dim=1) 83 | #print(concat3.size()) 84 | upsample2 = self._block5(concat3) 85 | #print(upsample2.size()) 86 | concat2 = torch.cat((upsample2, pool1), dim=1) 87 | #print(concat2.size()) 88 | upsample1 = self._block5(concat2) 89 | #print(upsample1.size()) 90 | concat1 = torch.cat((upsample1, x), dim=1) 91 | #print(concat1.size()) 92 | output = self._block6(concat1) 93 | #print(output.size()) 94 | return output 95 | 96 | def summary(self): 97 | print('Unet summary: ') 98 | print(self._block1) 99 | print(self._block2) 100 | print(self._block3) 101 | print(self._block4) 102 | print(self._block5) 103 | print(self._block6) 104 | 105 | -------------------------------------------------------------------------------- /pyvision/segmentation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/README.md: -------------------------------------------------------------------------------- 1 | # Fully Convolutional Networks for Semantic Segmentation (FCN) 2 | 3 | FCN uses a fully convolutional network to segment images into classes of objects. You can read the paper [here](https://arxiv.org/pdf/1605.06211v1.pdf). Check [summary](#Summary) for implementation details or [Usage](#Usage) for Usage details. 4 | 5 | ## Summary 6 | 7 | The implementation makes use of the pretrained models made available by [PyTorch](https://github.com/pytorch). The models were trained on a subset of the COCO dataset containing only those classes that are present in the VOC2012 dataset. 8 | 9 | | Model | Backbone | Dataset | Mean IoU/Global Pixelwise Accuracy| 10 | |----|----|---|-----| 11 | fcn-resnet-50 | resnet-50 | coco2017-train | 60.5 / 91.4 | 12 | |fcn-resnet-101 | resnet-101 | coco2017-train | 63.7 / 91.9 | 13 | 14 | ## Usage 15 | 16 | **Brief** 17 | 18 | The model setup is done via the FCN class exposed via *pyvision.segmentation.fcn*. All models and related config parameters can be configured from the class constructor. 19 | 20 | Inference is run through the *inference()* method. 21 | 22 | **Quick Start** 23 | 24 | * To use the default *fcn-resnet50-coco* model, 25 | 26 | ```python 27 | 28 | from pyvision.segmentation import fcn 29 | 30 | fcn_model = fcn.FCN(device="cpu", show=False) 31 | 32 | # item = path to an image or a cv2 image or a PIL Image 33 | preds, seg_map, blend_map = fcn_model.inference(item, save="preds") 34 | 35 | ``` 36 | 37 | * To use the *fcn-resnet101-coco* model, 38 | 39 | ```python 40 | 41 | from pyvision.segmentation import fcn 42 | 43 | fcn_model = fcn.FCN(model="fcn-resnet101-coco", device="cpu", show=False) 44 | 45 | # item = path to an image or a cv2 image or a PIL Image 46 | preds, seg_map, blend_map = fcn_model.inference(item, save="preds") 47 | 48 | ``` 49 | 50 | * To list available models run, 51 | 52 | ```python 53 | 54 | from pyvision.segmentation import fcn 55 | 56 | print(fcn.available_models()) 57 | 58 | ``` 59 | 60 | * To run tests, from repo root, run, 61 | 62 | ```shell 63 | 64 | $ python tests/segmentation/fcn/fcn50.py 65 | $ python tests/segmentation/fcn/fcn101.py 66 | 67 | ``` 68 | 69 | ## Examples 70 | 71 | **Dataset: COCO2017-train (VOC2012 classes)** 72 | 73 | | Model | Original Image | Segmentation Map | Blend Image | 74 | |---|-----|----|----| 75 | |**fcn-resnet50-coco**|| | | 76 | |**fcn-resnet50-coco**|| | | 77 | |**fcn-resnet101-coco**|| | | 78 | |**fcn-resnet101-coco**|| | | 79 | 80 | ## Contributed By 81 | 82 | [Pranjal Datta](https://github.com/pranjaldatta) 83 | -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import FCN, available_models -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/config/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "fcn-resnet50-coco": "1wQ8davh3KzDspnbuZ6e4OnrVggjaYVo8", 3 | "fcn-resnet101-coco": "1AcxXcQRW8dPdDtwQcQRxBp5vAW-yeFgi" 4 | } -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/data/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/data/voc2012_classes.txt: -------------------------------------------------------------------------------- 1 | background 2 | aeroplane 3 | bicycle 4 | bird 5 | boat 6 | bottle 7 | bus 8 | car 9 | cat 10 | chair 11 | cow 12 | diningtable 13 | dog 14 | horse 15 | motorbike 16 | person 17 | pottedplant 18 | sheep 19 | sofa 20 | train 21 | tvmonitor -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/data/voc2012_colors.txt: -------------------------------------------------------------------------------- 1 | 0 0 0 2 | 128 0 0 3 | 0 128 0 4 | 128 128 0 5 | 0 0 128 6 | 128 0 128 7 | 0 128 128 8 | 128 128 128 9 | 64 0 0 10 | 192 0 0 11 | 64 128 0 12 | 192 128 0 13 | 64 0 128 14 | 192 0 128 15 | 64 128 128 16 | 192 128 128 17 | 0 64 0 18 | 128 64 0 19 | 0 192 0 20 | 128 192 0 21 | 0 64 128 -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16.jpg -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/16_101_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_101_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/16_101_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_101_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/16_50_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_50_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/16_50_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_50_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/pascal_voc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc.jpg -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/pascal_voc_101_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_101_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/pascal_voc_101_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_101_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/pascal_voc_50_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_50_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/examples/pascal_voc_50_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_50_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/models/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/models/fcn_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision.models._utils import IntermediateLayerGetter 5 | 6 | from .backbone import resnet101, resnet50 7 | 8 | from collections import OrderedDict 9 | 10 | __backbones__ = { 11 | "resnet50" : resnet50, 12 | "resnet101" : resnet101 13 | } 14 | 15 | def _build_fcn(name, num_classes, aux, pretrained=False): 16 | 17 | backbone = __backbones__[name]( 18 | pretrained=pretrained, 19 | replace_stride_with_dilation=[False, True, True] 20 | ) 21 | 22 | final_layers = {'layer4': 'out'} 23 | if aux: 24 | final_layers['layer3'] = "aux" 25 | backbone = IntermediateLayerGetter(backbone, return_layers=final_layers) 26 | 27 | aux_classifier = None 28 | if aux: 29 | inplanes = 1024 30 | aux_classifier = FCNHead(inplanes, num_classes) 31 | 32 | inplanes = 2048 33 | classifier = FCNHead(inplanes, num_classes) 34 | #base_model = FCNModel() 35 | 36 | fcn_model = FCNModel(backbone, classifier, aux_classifier) 37 | 38 | return fcn_model 39 | 40 | 41 | class FCNModel(nn.Module): 42 | 43 | def __init__(self, backbone, classifier, aux_classifier=None): 44 | 45 | super(FCNModel, self).__init__() 46 | 47 | self.backbone = backbone 48 | self.classifier = classifier 49 | self.aux_classifier = aux_classifier 50 | 51 | def forward(self, x): 52 | 53 | input_shape = x.shape[-2:] 54 | features = self.backbone(x) 55 | 56 | result = OrderedDict() 57 | x = features["out"] 58 | x = self.classifier(x) 59 | x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=True) 60 | result["out"] = x 61 | 62 | if self.aux_classifier is not None: 63 | x = features["aux"] 64 | x = self.aux_classifier(x) 65 | x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=True) 66 | result["aux"] = x 67 | 68 | return result 69 | 70 | 71 | class FCNHead(nn.Sequential): 72 | 73 | def __init__(self, inchannels, channels): 74 | 75 | intermediate_channels = inchannels // 4 76 | layers = [ 77 | nn.Conv2d(inchannels, intermediate_channels, 3, padding=1, bias=False), 78 | nn.BatchNorm2d(intermediate_channels), 79 | nn.ReLU(), 80 | nn.Dropout(0.1), 81 | nn.Conv2d(intermediate_channels, channels, 1) 82 | ] 83 | 84 | super(FCNHead, self).__init__(*layers) 85 | 86 | 87 | -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/util/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/fcn/util/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | 4 | def make_color_seg_map(seg_map_np, palette): 5 | color_img = Image.fromarray(seg_map_np.astype(np.uint8)).convert('P') 6 | color_img.putpalette(palette) 7 | return color_img -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import PSPNet, available_models -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/config/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/config/data_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "voc2012": { 3 | "classes": "21" 4 | }, 5 | "ade20k" : { 6 | "classes": "150" 7 | }, 8 | "cityscapes" : { 9 | "classes" : "19" 10 | } 11 | 12 | } -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/config/weights_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "pspnet-resnet50-voc2012": "1T8NiMaAVNRiS_i4rHVK6oe59oK0pvGGR", 3 | "pspnet-resnet101-voc2012": "1Qca7YOipac981SvGQGGw6kuphDiQzl3Z", 4 | "pspnet-resnet50-ade20k": "1BTH9_hIulIndSOcYj1F_lCyMrEGfsaYW", 5 | "pspnet-resnet50-cityscapes": "1EwX6UxMGeiNi29XL9IS0WUa4KZ7lkbI2" 6 | 7 | } -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/data/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/ade20k_classes.txt: -------------------------------------------------------------------------------- 1 | wall 2 | building 3 | sky 4 | floor 5 | tree 6 | ceiling 7 | road 8 | bed 9 | windowpane 10 | grass 11 | cabinet 12 | sidewalk 13 | person 14 | earth 15 | door 16 | table 17 | mountain 18 | plant 19 | curtain 20 | chair 21 | car 22 | water 23 | painting 24 | sofa 25 | shelf 26 | house 27 | sea 28 | mirror 29 | rug 30 | field 31 | armchair 32 | seat 33 | fence 34 | desk 35 | rock 36 | wardrobe 37 | lamp 38 | bathtub 39 | railing 40 | cushion 41 | base 42 | box 43 | column 44 | signboard 45 | chest of drawers 46 | counter 47 | sand 48 | sink 49 | skyscraper 50 | fireplace 51 | refrigerator 52 | grandstand 53 | path 54 | stairs 55 | runway 56 | case 57 | pool table 58 | pillow 59 | screen door 60 | stairway 61 | river 62 | bridge 63 | bookcase 64 | blind 65 | coffee table 66 | toilet 67 | flower 68 | book 69 | hill 70 | bench 71 | countertop 72 | stove 73 | palm 74 | kitchen island 75 | computer 76 | swivel chair 77 | boat 78 | bar 79 | arcade machine 80 | hovel 81 | bus 82 | towel 83 | light 84 | truck 85 | tower 86 | chandelier 87 | awning 88 | streetlight 89 | booth 90 | television receiver 91 | airplane 92 | dirt track 93 | apparel 94 | pole 95 | land 96 | bannister 97 | escalator 98 | ottoman 99 | bottle 100 | buffet 101 | poster 102 | stage 103 | van 104 | ship 105 | fountain 106 | conveyer belt 107 | canopy 108 | washer 109 | plaything 110 | swimming pool 111 | stool 112 | barrel 113 | basket 114 | waterfall 115 | tent 116 | bag 117 | minibike 118 | cradle 119 | oven 120 | ball 121 | food 122 | step 123 | tank 124 | trade name 125 | microwave 126 | pot 127 | animal 128 | bicycle 129 | lake 130 | dishwasher 131 | screen 132 | blanket 133 | sculpture 134 | hood 135 | sconce 136 | vase 137 | traffic light 138 | tray 139 | ashcan 140 | fan 141 | pier 142 | crt screen 143 | plate 144 | monitor 145 | bulletin board 146 | shower 147 | radiator 148 | glass 149 | clock 150 | flag -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/ade20k_colors.txt: -------------------------------------------------------------------------------- 1 | 120 120 120 2 | 180 120 120 3 | 6 230 230 4 | 80 50 50 5 | 4 200 3 6 | 120 120 80 7 | 140 140 140 8 | 204 5 255 9 | 230 230 230 10 | 4 250 7 11 | 224 5 255 12 | 235 255 7 13 | 150 5 61 14 | 120 120 70 15 | 8 255 51 16 | 255 6 82 17 | 143 255 140 18 | 204 255 4 19 | 255 51 7 20 | 204 70 3 21 | 0 102 200 22 | 61 230 250 23 | 255 6 51 24 | 11 102 255 25 | 255 7 71 26 | 255 9 224 27 | 9 7 230 28 | 220 220 220 29 | 255 9 92 30 | 112 9 255 31 | 8 255 214 32 | 7 255 224 33 | 255 184 6 34 | 10 255 71 35 | 255 41 10 36 | 7 255 255 37 | 224 255 8 38 | 102 8 255 39 | 255 61 6 40 | 255 194 7 41 | 255 122 8 42 | 0 255 20 43 | 255 8 41 44 | 255 5 153 45 | 6 51 255 46 | 235 12 255 47 | 160 150 20 48 | 0 163 255 49 | 140 140 140 50 | 250 10 15 51 | 20 255 0 52 | 31 255 0 53 | 255 31 0 54 | 255 224 0 55 | 153 255 0 56 | 0 0 255 57 | 255 71 0 58 | 0 235 255 59 | 0 173 255 60 | 31 0 255 61 | 11 200 200 62 | 255 82 0 63 | 0 255 245 64 | 0 61 255 65 | 0 255 112 66 | 0 255 133 67 | 255 0 0 68 | 255 163 0 69 | 255 102 0 70 | 194 255 0 71 | 0 143 255 72 | 51 255 0 73 | 0 82 255 74 | 0 255 41 75 | 0 255 173 76 | 10 0 255 77 | 173 255 0 78 | 0 255 153 79 | 255 92 0 80 | 255 0 255 81 | 255 0 245 82 | 255 0 102 83 | 255 173 0 84 | 255 0 20 85 | 255 184 184 86 | 0 31 255 87 | 0 255 61 88 | 0 71 255 89 | 255 0 204 90 | 0 255 194 91 | 0 255 82 92 | 0 10 255 93 | 0 112 255 94 | 51 0 255 95 | 0 194 255 96 | 0 122 255 97 | 0 255 163 98 | 255 153 0 99 | 0 255 10 100 | 255 112 0 101 | 143 255 0 102 | 82 0 255 103 | 163 255 0 104 | 255 235 0 105 | 8 184 170 106 | 133 0 255 107 | 0 255 92 108 | 184 0 255 109 | 255 0 31 110 | 0 184 255 111 | 0 214 255 112 | 255 0 112 113 | 92 255 0 114 | 0 224 255 115 | 112 224 255 116 | 70 184 160 117 | 163 0 255 118 | 153 0 255 119 | 71 255 0 120 | 255 0 163 121 | 255 204 0 122 | 255 0 143 123 | 0 255 235 124 | 133 255 0 125 | 255 0 235 126 | 245 0 255 127 | 255 0 122 128 | 255 245 0 129 | 10 190 212 130 | 214 255 0 131 | 0 204 255 132 | 20 0 255 133 | 255 255 0 134 | 0 153 255 135 | 0 41 255 136 | 0 255 204 137 | 41 0 255 138 | 41 255 0 139 | 173 0 255 140 | 0 245 255 141 | 71 0 255 142 | 122 0 255 143 | 0 255 184 144 | 0 92 255 145 | 184 255 0 146 | 0 133 255 147 | 255 214 0 148 | 25 194 194 149 | 102 255 0 150 | 92 0 255 -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/cityscapes_classes.txt: -------------------------------------------------------------------------------- 1 | road 2 | sidewalk 3 | building 4 | wall 5 | fence 6 | pole 7 | traffic light 8 | traffic sign 9 | vegetation 10 | terrain 11 | sky 12 | person 13 | rider 14 | car 15 | truck 16 | bus 17 | train 18 | motorcycle 19 | bicycle -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/cityscapes_colors.txt: -------------------------------------------------------------------------------- 1 | 128 64 128 2 | 244 35 232 3 | 70 70 70 4 | 102 102 156 5 | 190 153 153 6 | 153 153 153 7 | 250 170 30 8 | 220 220 0 9 | 107 142 35 10 | 152 251 152 11 | 70 130 180 12 | 220 20 60 13 | 255 0 0 14 | 0 0 142 15 | 0 0 70 16 | 0 60 100 17 | 0 80 100 18 | 0 0 230 19 | 119 11 32 -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/voc2012_classes.txt: -------------------------------------------------------------------------------- 1 | background 2 | aeroplane 3 | bicycle 4 | bird 5 | boat 6 | bottle 7 | bus 8 | car 9 | cat 10 | chair 11 | cow 12 | diningtable 13 | dog 14 | horse 15 | motorbike 16 | person 17 | pottedplant 18 | sheep 19 | sofa 20 | train 21 | tvmonitor -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/data/voc2012_colors.txt: -------------------------------------------------------------------------------- 1 | 0 0 0 2 | 128 0 0 3 | 0 128 0 4 | 128 128 0 5 | 0 0 128 6 | 128 0 128 7 | 0 128 128 8 | 128 128 128 9 | 64 0 0 10 | 192 0 0 11 | 64 128 0 12 | 192 128 0 13 | 64 0 128 14 | 192 0 128 15 | 64 128 128 16 | 192 128 128 17 | 0 64 0 18 | 128 64 0 19 | 0 192 0 20 | 128 192 0 21 | 0 64 128 -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/16.jpg -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/16_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/16_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/16_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/16_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/ade20k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/ade20k.jpg -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/ade20k_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/ade20k_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/ade20k_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/ade20k_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/cityscape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/cityscape.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/cityscapes_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/cityscapes_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/cityscapes_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/cityscapes_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/pascal_voc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/pascal_voc.jpg -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/pascal_voc_blend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/pascal_voc_blend.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/examples/pascal_voc_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/pascal_voc_map.png -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/models/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/models/pspnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .backbone import * 6 | 7 | __extractors__ = { 8 | "resnet18" : resnet18, 9 | "resnet34" : resnet34, 10 | "resnet50" : resnet50, 11 | "resnet101" : resnet101, 12 | "resnet152" : resnet152 13 | } 14 | 15 | class PPM(nn.Module): 16 | 17 | """The Pyramid Pooling Module""" 18 | 19 | def __init__(self, input_dims, reduction_dims, scales): 20 | 21 | super(PPM, self).__init__() 22 | 23 | self.features = [] 24 | for scale in scales: 25 | self.features.append(nn.Sequential( 26 | nn.AdaptiveAvgPool2d(scale), 27 | nn.Conv2d(input_dims, reduction_dims, 1, bias=False), 28 | nn.BatchNorm2d(reduction_dims), 29 | nn.ReLU(inplace=True) 30 | )) 31 | self.features = nn.ModuleList(self.features) 32 | 33 | def forward(self, x): 34 | x_size = x.size() 35 | result = [x] 36 | for feature in self.features: 37 | result.append( 38 | F.interpolate(feature(x), size=x_size[2:], mode="bilinear", align_corners=True) 39 | ) 40 | 41 | result = torch.cat(result, 1) 42 | 43 | return result 44 | 45 | class PSPNet_model(nn.Module): 46 | 47 | """ The main PSPNet Module""" 48 | 49 | def __init__(self, extractor="resnet50", scales=[1,2,3,6], 50 | dropout=0.1, num_classes=21, zoom_factor=8, 51 | criterion=nn.CrossEntropyLoss(ignore_index=255), pretrained=True): 52 | 53 | super(PSPNet_model, self).__init__() 54 | 55 | if len(scales)%4 != 0: 56 | raise ValueError("len of scales should be 4 but got ", len(scales)) 57 | if num_classes <= 1: 58 | raise ValueError("num_classes should be > 1 but found ", num_classes) 59 | if zoom_factor not in [1, 2, 4, 8]: 60 | raise ValueError("zoom_factor should be in [1, 2, 4, 8] but got ", zoom_factor) 61 | 62 | self.extractor = extractor 63 | self.scales = scales 64 | self.dropout = dropout 65 | self.num_classes = num_classes 66 | self.zoom_factor = zoom_factor 67 | self.criterion = criterion 68 | self.pretrained = pretrained 69 | 70 | backbone = __extractors__[self.extractor](False) 71 | 72 | # build the layers 73 | self.layer0 = nn.Sequential( 74 | backbone.conv1, 75 | backbone.bn1, 76 | backbone.relu, 77 | backbone.conv2, 78 | backbone.bn2, 79 | backbone.relu, 80 | backbone.conv3, 81 | backbone.bn3, 82 | backbone.relu, 83 | backbone.maxpool, 84 | ) 85 | self.layer1 = backbone.layer1 86 | self.layer2 = backbone.layer2 87 | self.layer3 = backbone.layer3 88 | self.layer4 = backbone.layer4 89 | 90 | for n, m in self.layer3.named_modules(): 91 | if "conv2" in n: 92 | m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1) 93 | elif "downsample.0" in n: 94 | m.stride = (1, 1) 95 | for n, m in self.layer4.named_modules(): 96 | if "conv2" in n: 97 | m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1) 98 | elif "downsample.0" in n: 99 | m.stride = (1, 1) 100 | 101 | feature_dims = 2048 102 | 103 | self.ppm = PPM(feature_dims, int(feature_dims/len(scales)), scales) 104 | feature_dims *= 2 105 | 106 | self.cls = nn.Sequential( 107 | nn.Conv2d(feature_dims, 512, kernel_size=3, padding=1, bias=False), 108 | nn.BatchNorm2d(512), 109 | nn.ReLU(inplace=True), 110 | nn.Dropout2d(p=dropout), 111 | nn.Conv2d(512, num_classes, kernel_size=1) 112 | ) 113 | 114 | if not self.pretrained: 115 | self.aux = nn.Sequential( 116 | nn.Conv2d(1024, 256, kernel_size=3, padding=1, bias=False), 117 | nn.BatchNorm2d(256), 118 | nn.ReLU(inplace=True), 119 | nn.Dropout2d(p=dropout), 120 | nn.Conv2d(256, self.num_classes, kernel_size=1) 121 | ) 122 | 123 | 124 | def forward(self, x, y=None): 125 | 126 | x_size = x.shape 127 | 128 | assert (x_size[2] - 1) % 8 == 0 and (x_size[3] - 1) % 8 == 0 129 | 130 | h = int((x_size[2] - 1) / 8 * self.zoom_factor + 1) 131 | w = int((x_size[3] - 1) / 8 * self.zoom_factor + 1) 132 | 133 | x = self.layer0(x) 134 | x = self.layer1(x) 135 | x = self.layer2(x) 136 | x_aux = self.layer3(x) # for aux loss during training 137 | x = self.layer4(x_aux) 138 | 139 | x = self.ppm(x) 140 | 141 | x = self.cls(x) 142 | 143 | if self.zoom_factor != 1: 144 | x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=True) 145 | 146 | if not self.pretrained: 147 | aux = self.aux(x_aux) 148 | if self.zoom_factor != 1: 149 | x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=True) 150 | 151 | main_loss = self.criterion(x, y) 152 | aux_loss = self.criterion(aux, y) 153 | 154 | return x.max(1)[1], main_loss, aux_loss 155 | 156 | else: 157 | 158 | return x 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/readme.md: -------------------------------------------------------------------------------- 1 | # Pyramid Scene Parsing Network (PSPNet) 2 | 3 | PSPNet is an Image segmentation architecture that achieves high degrees of performance and accuracy on segmentation tasks. It uses a *Pyramid Pooling Module* to aggregate global image context information to make better predictions. 4 | 5 | Read the paper [here](https://arxiv.org/pdf/1612.01105.pdf). 6 | Check the [summary](#summary) for implementation details or [Usage](#Usage) for Usage details. 7 | 8 | Check out this [notebook](https://github.com/pranjaldatta/PyVision/blob/master/demo/segmentation/pspnet/pspnet_demo.ipynb) to see how easily you can use PSPNet in 3-4 lines! 9 | 10 | If the above link does not work, please look [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/segmentation/pspnet/pspnet_demo.ipynb). 11 | 12 | ## Summary 13 | 14 | This implementation makes use of pretrained models provided by the authors [here](https://github.com/hszhao/semseg). Currently, PyVision PSPNet supports the models listed below. 15 | 16 | | Model | Backbone | Dataset | mIoU/mAcc/aAcc (Single Scale)| mIoU/mAcc/aAcc (Multi Scale) | 17 | ----|---|----|----|----| 18 | | pspnet-resnet50-voc2012 | Resnet50 | VOC2012 | 0.7705/0.8513/0.9489 | 0.7802/0.8580/0.9513 | 19 | | pspnet-resnet101-voc2012| Resnet101 | VOC2012 | 0.7907/0.8636/0.9534 | 0.7963/0.8677/0.9550 | 20 | | pspnet-resnet50-ade20k | Resnet50 | ADE20k | 0.4189/0.5227/0.8039 | 0.4284/0.5266/0.8106 | 21 | | pspnet-resnet50-cityscapes | Resnet50 | Cityscapes | 0.7730/0.8431/0.9597 | 0.7838/0.8486/0.9617| 22 | 23 | ### Note regarding Implementation 24 | 25 | **Downsampling**: The network as trained by the authors operate on Input images rescaled to 473x473. But when run without gpu, this configuration is computationally intensive. To resolve this situation, the implementation automatically downsamples the input image to a much smaller resolution of 225x225 **if** gpu is not available. On the other hand, **if** a gpu is available, this downsampling behavior is prevented. If explicitly needed, the user can overide this default behavior by passing a boolean (True or False) to the **downsample** parameter in the *PSPNet* constructor. (i.e. if downsample=True, downsampling behavior is enabled and vice versa) 26 | 27 | **Class Names**: Often the user may just want the pixel wise prediction matrix (wherein every value in the matrix denotes the *index* of the class to which the corresponding pixel belongs), in that case, the user would also need the *class names* list on which the indices are based. To get that simply do, 28 | 29 | ```python 30 | classlist = model.class_names() 31 | ``` 32 | 33 | ## Usage 34 | 35 | For more details, go through the docstrings/source code. 36 | 37 | **Brief** 38 | 39 | The model setup is done via the PSPNet class exposed via *pyvision.segmentation.pspnet*. All model related configuration parameters can be configured from the class constructor. 40 | 41 | Inference is run through the *inference()* method. 42 | 43 | **Quick Start** 44 | 45 | - To use the default *pspnet-resnet50-voc2012* model, 46 | 47 | ```python 48 | from pyvision.segmentation import pspnet 49 | 50 | # the model constructor 51 | # setting device=gpu and downsample=False. 52 | # Normally explicit setting of downsample=False 53 | # isnt needed as it is automatically handled. 54 | # This is just for demo purposes 55 | m = pspnet.PSPNet(device="gpu", downsample=False) 56 | 57 | preds, color_img, blend_img = m.inference("|| | 94 | ||| | 95 | 96 | **Dataset: ADE20k** 97 | 98 | |Original Image|Segmentation Map| Blend Image| 99 | -----|-----|-----| 100 | ||| | 101 | 102 | **Dataset: Cityscapes** 103 | 104 | |Original Image|Segmentation Map| Blend Image| 105 | -----|-----|-----| 106 | ||| | 107 | -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/util/__init__.py -------------------------------------------------------------------------------- /pyvision/segmentation/pspnet/util/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | 4 | def make_color_seg_map(seg_map_np, palette): 5 | color_img = Image.fromarray(seg_map_np.astype(np.uint8)).convert('P') 6 | color_img.putpalette(palette) 7 | return color_img -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | gdown 3 | numpy 4 | opencv-python==4.1.1 5 | matplotlib 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | def read_readme(): 4 | with open("README.md") as fp: 5 | long_desc = fp.read() 6 | return long_desc 7 | 8 | setup( 9 | name = "pyvision", 10 | version = "1.0.0", 11 | author = "Pranjal Datta", 12 | description = ("Ready-to-use implementations of some of the most common " 13 | "computer vision algorithms."), 14 | license = "MIT", 15 | long_description = read_readme(), 16 | url = "https://github.com/pranjaldatta/PyVision", 17 | 18 | packages = find_packages(), 19 | include_package_data = True, 20 | ) -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH='.' 2 | python tests/detection/yolov3/yolo_test.py 3 | -------------------------------------------------------------------------------- /tests/detection/detr/cars_test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/detr/cars_test.jpg -------------------------------------------------------------------------------- /tests/detection/detr/detr_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.detection import detr 2 | import time 3 | 4 | 5 | imgs = ["tests/detection/detr/cars_test.jpg", "tests/detection/detr/zebra_test.jpg"] 6 | 7 | print(detr.available_models()) # show available models 8 | 9 | # testing on defualt detr-resnet50 10 | detr_object = detr.DETR(show=False) # make show True to see detections 11 | print("Testing with detr-resnet50") 12 | print("-"*50) 13 | start_time = time.time() 14 | for img in imgs: 15 | _, objs = detr_object.detect(img) 16 | print("No. of detections: ", len(objs)) 17 | print("-"*50) 18 | 19 | print("Total detection time: ", time.time() - start_time) 20 | print("-"*50, end="\n\n") 21 | 22 | # testing on detr-resnet101 23 | detr_object = detr.DETR(model="detr-resnet101", show=False) 24 | print("Testing with detr-resnet101") 25 | print("-"*50) 26 | start_time = time.time() 27 | for img in imgs: 28 | _, objs = detr_object.detect(img) 29 | print("No. of detections: ", len(objs)) 30 | print("-"*50) 31 | 32 | print("Total detection time: ", time.time() - start_time) 33 | print("-"*50) -------------------------------------------------------------------------------- /tests/detection/detr/zebra_test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/detr/zebra_test.jpg -------------------------------------------------------------------------------- /tests/detection/effdet/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/effdet/2.jpg -------------------------------------------------------------------------------- /tests/detection/effdet/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/effdet/3.jpg -------------------------------------------------------------------------------- /tests/detection/effdet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/effdet/__init__.py -------------------------------------------------------------------------------- /tests/detection/effdet/test_effdet.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from PIL import Image 3 | from pyvision.detection import efficientdet 4 | 5 | model = efficientdet.EfficientDet("coco", thresh=0.95) 6 | 7 | img1 = cv2.imread("tests/detection/effdet/2.jpg") 8 | img2 = cv2.imread("tests/detection/effdet/3.jpg") 9 | 10 | imgs = [img1, img2] 11 | 12 | for img in imgs: 13 | img = cv2.resize(img, (416, 416)) 14 | res = model.detect(img) 15 | cv2.imshow("Frame", res[0]) 16 | if cv2.waitKey() == ord('q'): 17 | continue -------------------------------------------------------------------------------- /tests/detection/yolov3/cars_test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/yolov3/cars_test.jpg -------------------------------------------------------------------------------- /tests/detection/yolov3/yolo_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.detection import yolov3 2 | import time 3 | 4 | 5 | imgs = ["tests/detection/yolov3/cars_test.jpg", "tests/detection/yolov3/zebra_test.jpg"] 6 | 7 | print(yolov3.available_models()) # show available models 8 | 9 | # testing on defualt yolov3-416 10 | yolo = yolov3.YOLOv3(show=False) # make show True to see detections 11 | print("Testing with yolov3-416") 12 | print("-"*50) 13 | start_time = time.time() 14 | for img in imgs: 15 | _, objs = yolo.detect(img) 16 | print("No. of detections: ", len(objs)) 17 | print("-"*50) 18 | 19 | print("Total detection time: ", time.time() - start_time) 20 | print("-"*50, end="\n\n") 21 | 22 | # testing on yolov3-tiny 23 | yolo = yolov3.YOLOv3(model="yolov3-tiny", show=False) 24 | print("Testing with yolov3-tiny") 25 | print("-"*50) 26 | start_time = time.time() 27 | for img in imgs: 28 | _, objs = yolo.detect(img) 29 | print("No. of detections: ", len(objs)) 30 | print("-"*50) 31 | 32 | print("Total detection time: ", time.time() - start_time) 33 | print("-"*50) -------------------------------------------------------------------------------- /tests/detection/yolov3/zebra_test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/yolov3/zebra_test.jpg -------------------------------------------------------------------------------- /tests/face_detection/facenet/imgs/BarackObama.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/imgs/BarackObama.jpeg -------------------------------------------------------------------------------- /tests/face_detection/facenet/imgs/ManojBajpayee.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/imgs/ManojBajpayee.jpeg -------------------------------------------------------------------------------- /tests/face_detection/facenet/imgs/MarkZuckerberg.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/imgs/MarkZuckerberg.jpeg -------------------------------------------------------------------------------- /tests/face_detection/facenet/multiple_img.py: -------------------------------------------------------------------------------- 1 | from pyvision.face_detection.facenet import Facenet 2 | 3 | # In this example, we take all the imgs from the ./imgs folder and 4 | # generate embeddings for them. We also associate each embedding with their 5 | # filename which act as 'true labels'. Then we use these embeddings to 'classify' 6 | # whether a supplied image belongs to any one of given categories 7 | 8 | # First we instantiate the facenet object. saveLoc is the path to the 9 | # folder wherein the embeddings will be saved. By default it will be saved 10 | # as "embeddings.pkl" but can be changed with the "saveName" param 11 | fc = Facenet(saveLoc="save", saveName="embeddings2.pkl") 12 | 13 | embeddings = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs") 14 | 15 | did_match, preds, loss = fc.compare_embeddings( 16 | img="demo/face_detection/facenet/zucktest.jpeg", 17 | embedLoc="save/embeddings2.pkl", 18 | embeddings=None, 19 | label="MarkZuckerberg" 20 | ) 21 | print(did_match, preds, loss) 22 | print("For 'True' Image, we get: ", did_match) 23 | 24 | -------------------------------------------------------------------------------- /tests/face_detection/facenet/single_img.py: -------------------------------------------------------------------------------- 1 | from pyvision.face_detection.facenet import Facenet 2 | 3 | # In this example we take a single image from the ./imgs folder 4 | # Generate embeddings and store them. Then use those embeddings to 5 | # check whether a previously unseen image is classified accurately or not 6 | 7 | 8 | # First we instantiate the facenet object. saveLoc is the path to the 9 | # folder wherein the embeddings will be saved. By default it will be saved 10 | # as "embeddings.pkl" but can be changed with the "saveName" param 11 | fc = Facenet(saveLoc="save/") 12 | 13 | # generate embeds 14 | _ = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs/BarackObama.jpeg", label="Barack Obama") 15 | 16 | # now we compare it against a "False" image 17 | did_match, pred, loss = fc.compare_embeddings(None, img="demo/face_detection/facenet/imgs/ManojBajpayee.jpeg", label="Barack Obama", embedLoc="save/embeddings.pkl") 18 | print(did_match, pred, loss) 19 | print("Comparing against 'False' image, we get: ", did_match) -------------------------------------------------------------------------------- /tests/face_detection/facenet/zucktest.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/zucktest.jpeg -------------------------------------------------------------------------------- /tests/gans/deep_convolutional_gan/gan_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.gans.deep_convolutional_gan import DeepConvGAN 2 | 3 | ''' Initializing the DC_GAN module with the necessary paths ''' 4 | DeepConvGAN.inference(DeepConvGAN, set_weight_dir = 'dcgan-model.pth', set_gen_dir='result_img') 5 | -------------------------------------------------------------------------------- /tests/gans/wasserstein_gan/gan_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.gans.wasserstein_gan import WassGAN 2 | 3 | wgan = WassGAN() 4 | wgan.inference(set_gen_dir="gen") -------------------------------------------------------------------------------- /tests/misc/NeuralStyleTransfer/nst_test.py: -------------------------------------------------------------------------------- 1 | #to run 2 | from pyvision.misc.NeuralStyleTransfer import NeuralStyle 3 | 4 | __PREFIX__ = "pyvision/misc/NeuralStyleTransfer/Examples/" 5 | #provide the paths to the two images 6 | style_img, content_img = (__PREFIX__+'images/style1.jpg', __PREFIX__+'images/content2.jpg') 7 | 8 | #if you do not wish to use gpu, pass use_gpu=False as a parameter, i.e., nst=Neural_Style(num_steps=300, use_gpu=False) 9 | nst = NeuralStyle(num_steps=300, retain_dims=False) 10 | 11 | #call the function to run neural style transfer 12 | output, time = nst.run_style_transfer(style_img, content_img) 13 | print("time taken: ", time) 14 | 15 | -------------------------------------------------------------------------------- /tests/misc/mtcnn/images/class2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/class2.jpg -------------------------------------------------------------------------------- /tests/misc/mtcnn/images/designated-survivor-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/designated-survivor-2.jpg -------------------------------------------------------------------------------- /tests/misc/mtcnn/images/person1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/person1.jpeg -------------------------------------------------------------------------------- /tests/misc/mtcnn/images/scenery.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/scenery.jpeg -------------------------------------------------------------------------------- /tests/misc/mtcnn/images/test5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/test5.jpg -------------------------------------------------------------------------------- /tests/misc/mtcnn/mtcnn_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.misc.mtcnn import MTCNN 2 | from pyvision.misc.mtcnn.utils.visualize import show_boxes, _show_boxes 3 | from PIL import Image 4 | import cv2 5 | from glob import glob 6 | 7 | 8 | a = [glob("tests/misc/mtcnn/images/*.{}".format(s)) for s in ["jpg", "jpeg", "png"]] 9 | imgs = [i for ai in a for i in ai] 10 | 11 | mtcnn = MTCNN() 12 | for img in imgs: 13 | img = Image.open(img) 14 | b = mtcnn.detect(img) 15 | try: 16 | img = show_boxes(img, b) 17 | except: 18 | img = _show_boxes(img, b) 19 | 20 | img.show() -------------------------------------------------------------------------------- /tests/misc/mtcnn/net_test.py: -------------------------------------------------------------------------------- 1 | from mtcnn.nets import ONet, PNet, RNet, FlattenTensorCustom 2 | import torch 3 | import numpy as np 4 | from colorama import Fore 5 | 6 | 7 | 8 | pnet = PNet() 9 | pnet.summary() 10 | 11 | print("-"*50) 12 | 13 | t = FlattenTensorCustom() 14 | ar = np.random.rand(64, 3, 32, 32) 15 | tensor = torch.FloatTensor(ar) 16 | tensor = t(tensor) 17 | if list(tensor.shape) == [64, 3*32*32]: 18 | pass 19 | else: 20 | print(tensor.shape) 21 | print(Fore.RED+"ERROR: at FlattenTensorCustom Test"+Fore.RESET) 22 | exit() 23 | 24 | print("-"*50) 25 | 26 | rnet = RNet() 27 | rnet.summary() 28 | 29 | print("-"*50) 30 | 31 | onet = ONet() 32 | onet.summary() -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_gaussian/denoised_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/denoised_1.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_gaussian/denoised_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/denoised_2.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_gaussian/denoised_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/denoised_3.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_gaussian/source_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/source_1.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_gaussian/source_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/source_2.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_gaussian/source_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/source_3.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_text/denoised_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/denoised_1.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_text/denoised_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/denoised_2.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_text/denoised_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/denoised_3.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_text/source_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/source_1.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_text/source_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/source_2.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/Output_text/source_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/source_3.png -------------------------------------------------------------------------------- /tests/misc/noise2noise/n2n_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.misc.noise2noise import Noise2Noise 2 | import os 3 | 4 | #data_path = 5 | data_path = os.getcwd() + "/tests/misc/noise2noise/test_images" 6 | 7 | #noise types: gaussian, text 8 | n2n = Noise2Noise(noise='text',data_path=data_path,mode='test') 9 | -------------------------------------------------------------------------------- /tests/misc/noise2noise/test.py: -------------------------------------------------------------------------------- 1 | from pyvision.misc.noise2noise.model import Noise2Noise 2 | import cv2 3 | from PIL import Image 4 | import numpy as np 5 | 6 | def gaussian_noise(img): 7 | ''' 8 | Add Gaussian noise in dataset 9 | Input: img of type PIL.Image 10 | Output: Noisy mage of type PIL.Image 11 | ''' 12 | w,h = img.size 13 | c = len(img.getbands()) 14 | 15 | sigma = np.random.uniform(20,50) 16 | gauss = np.random.normal(10,25,(h,w,c)) 17 | noisy = np.array(img) + gauss 18 | 19 | #Values less than 0 become 0 and more than 255 become 255 20 | noisy = np.clip(noisy, 0, 255).astype(np.uint8) 21 | img = Image.fromarray(noisy) 22 | 23 | return img 24 | 25 | n2n = Noise2Noise(noise="gaussian") 26 | 27 | img_path = "/home/pranjal/Projects/clone/PyVision/tests/misc/noise2noise/test_images/test.jpg" 28 | img = Image.open(img_path) 29 | img = gaussian_noise(img) 30 | 31 | img.show() 32 | img.save("noised.png") 33 | n2n.inference(img, show=False, save="denoised.png") -------------------------------------------------------------------------------- /tests/misc/noise2noise/test_images/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/test_images/test.jpg -------------------------------------------------------------------------------- /tests/misc/noise2noise/test_images/test1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/test_images/test1.jpg -------------------------------------------------------------------------------- /tests/misc/noise2noise/test_images/test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/test_images/test2.jpg -------------------------------------------------------------------------------- /tests/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/readme.md -------------------------------------------------------------------------------- /tests/segmentation/fcn/fcn101.py: -------------------------------------------------------------------------------- 1 | from pyvision.segmentation import fcn 2 | from glob import glob 3 | 4 | fcn_model = fcn.FCN(model="fcn-resnet101-coco", device="cpu", show=False) 5 | 6 | for idx, item in enumerate(glob("pyvision/segmentation/fcn/examples/*.jpg")): 7 | print(f"#### Image #{idx+1} ####") 8 | preds, seg_map, blend_map = fcn_model.inference(item, save=item.split(".")[0]+"_101") 9 | print("Prediction matrix shape: ", preds.shape) 10 | print("Segmentation Map shape: ", seg_map.size) 11 | print("Blend Map shape: ", blend_map.size) -------------------------------------------------------------------------------- /tests/segmentation/fcn/fcn50.py: -------------------------------------------------------------------------------- 1 | from pyvision.segmentation import fcn 2 | from glob import glob 3 | 4 | fcn_model = fcn.FCN(device="cpu", show=False) 5 | 6 | for idx, item in enumerate(glob("pyvision/segmentation/fcn/examples/*.jpg")): 7 | print(f"#### Image #{idx+1} ####") 8 | preds, seg_map, blend_map = fcn_model.inference(item, save=item.split(".")[0]+"_50") 9 | print("Prediction matrix shape: ", preds.shape) 10 | print("Segmentation Map shape: ", seg_map.size) 11 | print("Blend Map shape: ", blend_map.size) -------------------------------------------------------------------------------- /tests/segmentation/pspnet/pspnet_test.py: -------------------------------------------------------------------------------- 1 | from pyvision.segmentation.pspnet import PSPNet 2 | 3 | m = PSPNet(model="pspnet-resnet50-ade20k") 4 | 5 | m.inference("pyvision/segmentation/pspnet/examples/ade20k.jpg", save="ade20k") 6 | --------------------------------------------------------------------------------