├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── demo
    ├── detection
    │   ├── detr
    │   │   └── detr_demo.ipynb
    │   └── yolov3
    │   │   └── yolov3_demo.ipynb
    ├── face_detection
    │   └── facenet
    │   │   ├── imgs
    │   │       ├── BarackObama.jpeg
    │   │       ├── ManojBajpayee.jpeg
    │   │       └── MarkZuckerberg.jpeg
    │   │   ├── multiple_img.py
    │   │   ├── single_img.py
    │   │   └── zucktest.jpeg
    ├── gans
    │   └── deep_convolutional_gan
    │   │   ├── __init__.py
    │   │   └── dcgan_infernence_nb.ipynb
    ├── misc
    │   └── NeuralStyleTransfer
    │   │   └── nst_demo.ipynb
    └── segmentation
    │   └── pspnet
    │       └── pspnet_demo.ipynb
├── docs
    ├── contributing.md
    ├── developing.md
    └── weights.md
├── pyvision
    ├── __init__.py
    ├── detection
    │   ├── __init__.py
    │   ├── detr
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   └── weights_download.json
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── classes.txt
    │   │   │   └── coco.names
    │   │   ├── detr.py
    │   │   ├── model.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── backbone.py
    │   │   │   └── transformers.py
    │   │   ├── readme.md
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── box_utils.py
    │   │   │   ├── misc.py
    │   │   │   ├── pallete
    │   │   │   └── position_encoding.py
    │   ├── efficientdet
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   ├── dataset_coco.yaml
    │   │   │   └── weights_download.json
    │   │   ├── lib
    │   │   │   ├── __init__.py
    │   │   │   ├── dataset.py
    │   │   │   ├── losses.py
    │   │   │   ├── model.py
    │   │   │   └── utils.py
    │   │   ├── model.py
    │   │   ├── readme.md
    │   │   └── train.py
    │   ├── readme.md
    │   └── yolov3
    │   │   ├── __init__.py
    │   │   ├── config
    │   │       ├── __init__.py
    │   │       ├── models_supported.txt
    │   │       ├── weights_download.json
    │   │       ├── yolov3-tiny.cfg
    │   │       └── yolov3.cfg
    │   │   ├── darknet.py
    │   │   ├── data
    │   │       ├── __init__.py
    │   │       └── coco.names
    │   │   ├── issues.md
    │   │   ├── model.py
    │   │   ├── readme.md
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── box_utils.py
    │   │       ├── layer_factory.py
    │   │       ├── pallete
    │   │       ├── parse_config.py
    │   │       ├── preprocess.py
    │   │       └── utils.py
    ├── face_detection
    │   ├── __init__.py
    │   └── facenet
    │   │   ├── __init__.py
    │   │   ├── config
    │   │       ├── __init__.py
    │   │       └── weights_download.json
    │   │   ├── model.py
    │   │   ├── models
    │   │       ├── InceptionResnetV1.py
    │   │       └── __init__.py
    │   │   ├── readme.md
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── extract_face.py
    │   │       └── layer_factory.py
    ├── gans
    │   ├── __init__.py
    │   ├── deep_convolutional_gan
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   └── weights_download.json
    │   │   ├── dcgan
    │   │   │   ├── __init__.py
    │   │   │   └── dcgan.py
    │   │   ├── docs
    │   │   │   └── documentation.md
    │   │   ├── model.py
    │   │   └── results
    │   │   │   ├── losses.png
    │   │   │   ├── result.png
    │   │   │   ├── result2.png
    │   │   │   └── results_img
    │   │   │       ├── __init__.py
    │   │   │       ├── generated_image_0.jpg
    │   │   │       ├── generated_image_1.jpg
    │   │   │       ├── generated_image_10.jpg
    │   │   │       ├── generated_image_11.jpg
    │   │   │       ├── generated_image_12.jpg
    │   │   │       ├── generated_image_13.jpg
    │   │   │       ├── generated_image_14.jpg
    │   │   │       ├── generated_image_15.jpg
    │   │   │       ├── generated_image_2.jpg
    │   │   │       ├── generated_image_3.jpg
    │   │   │       ├── generated_image_4.jpg
    │   │   │       ├── generated_image_5.jpg
    │   │   │       ├── generated_image_6.jpg
    │   │   │       ├── generated_image_7.jpg
    │   │   │       ├── generated_image_8.jpg
    │   │   │       └── generated_image_9.jpg
    │   └── wasserstein_gan
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── config
    │   │       ├── __init__.py
    │   │       └── weights_download.json
    │   │   ├── current_output_imgs
    │   │       ├── __init__.py
    │   │       └── test36.png
    │   │   ├── model.py
    │   │   ├── train.py
    │   │   └── wgan.py
    ├── misc
    │   ├── NeuralStyleTransfer
    │   │   ├── Examples
    │   │   │   ├── images
    │   │   │   │   ├── content1.jpg
    │   │   │   │   ├── content2.jpg
    │   │   │   │   ├── content3.jpg
    │   │   │   │   ├── content4.jpg
    │   │   │   │   ├── content5.jpg
    │   │   │   │   ├── content6.jpeg
    │   │   │   │   ├── style1.jpg
    │   │   │   │   ├── style6.jpg
    │   │   │   │   └── style7.jpg
    │   │   │   └── output
    │   │   │   │   ├── content1+style6.png
    │   │   │   │   ├── content2+style1.png
    │   │   │   │   ├── content3+style6.png
    │   │   │   │   ├── content4+style1.png
    │   │   │   │   ├── content4+style7.png
    │   │   │   │   ├── content5+style1.png
    │   │   │   │   └── content6+style7.png
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   └── neural_style.py
    │   ├── __init__.py
    │   ├── mtcnn
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   └── weights_download.json
    │   │   ├── detector.py
    │   │   ├── model.py
    │   │   ├── nets.py
    │   │   ├── requirements.txt
    │   │   ├── stage_one.py
    │   │   ├── stage_two.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── utils.py
    │   │   │   └── visualize.py
    │   └── noise2noise
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── assets
    │   │       ├── gauss_1.png
    │   │       ├── gauss_3.png
    │   │       ├── gdenoised_1.png
    │   │       ├── gdenoised_3.png
    │   │       ├── tdenoised_1.png
    │   │       ├── tdenoised_3.png
    │   │       ├── text_1.png
    │   │       └── text_3.png
    │   │   ├── config
    │   │       ├── __init__.py
    │   │       └── weights_download.json
    │   │   ├── dataset.py
    │   │   ├── model.py
    │   │   └── unet.py
    └── segmentation
    │   ├── __init__.py
    │   ├── fcn
    │       ├── README.md
    │       ├── __init__.py
    │       ├── config
    │       │   ├── __init__.py
    │       │   └── weights_download.json
    │       ├── data
    │       │   ├── __init__.py
    │       │   ├── voc2012_classes.txt
    │       │   └── voc2012_colors.txt
    │       ├── examples
    │       │   ├── 16.jpg
    │       │   ├── 16_101_blend.png
    │       │   ├── 16_101_map.png
    │       │   ├── 16_50_blend.png
    │       │   ├── 16_50_map.png
    │       │   ├── pascal_voc.jpg
    │       │   ├── pascal_voc_101_blend.png
    │       │   ├── pascal_voc_101_map.png
    │       │   ├── pascal_voc_50_blend.png
    │       │   └── pascal_voc_50_map.png
    │       ├── model.py
    │       ├── models
    │       │   ├── __init__.py
    │       │   ├── backbone.py
    │       │   └── fcn_net.py
    │       └── util
    │       │   ├── __init__.py
    │       │   └── utils.py
    │   └── pspnet
    │       ├── __init__.py
    │       ├── config
    │           ├── __init__.py
    │           ├── data_config.json
    │           └── weights_download.json
    │       ├── data
    │           ├── __init__.py
    │           ├── ade20k_classes.txt
    │           ├── ade20k_colors.txt
    │           ├── cityscapes_classes.txt
    │           ├── cityscapes_colors.txt
    │           ├── voc2012_classes.txt
    │           └── voc2012_colors.txt
    │       ├── examples
    │           ├── 16.jpg
    │           ├── 16_blend.png
    │           ├── 16_map.png
    │           ├── ade20k.jpg
    │           ├── ade20k_blend.png
    │           ├── ade20k_map.png
    │           ├── cityscape.png
    │           ├── cityscapes_blend.png
    │           ├── cityscapes_map.png
    │           ├── pascal_voc.jpg
    │           ├── pascal_voc_blend.png
    │           └── pascal_voc_map.png
    │       ├── model.py
    │       ├── models
    │           ├── __init__.py
    │           ├── backbone.py
    │           └── pspnet.py
    │       ├── readme.md
    │       └── util
    │           ├── __init__.py
    │           └── utils.py
├── requirements.txt
├── setup.py
├── test.sh
└── tests
    ├── detection
        ├── detr
        │   ├── cars_test.jpg
        │   ├── detr_test.py
        │   └── zebra_test.jpg
        ├── effdet
        │   ├── 2.jpg
        │   ├── 3.jpg
        │   ├── __init__.py
        │   └── test_effdet.py
        └── yolov3
        │   ├── cars_test.jpg
        │   ├── yolo_test.py
        │   └── zebra_test.jpg
    ├── face_detection
        └── facenet
        │   ├── imgs
        │       ├── BarackObama.jpeg
        │       ├── ManojBajpayee.jpeg
        │       └── MarkZuckerberg.jpeg
        │   ├── multiple_img.py
        │   ├── single_img.py
        │   └── zucktest.jpeg
    ├── gans
        ├── deep_convolutional_gan
        │   └── gan_test.py
        └── wasserstein_gan
        │   └── gan_test.py
    ├── misc
        ├── NeuralStyleTransfer
        │   └── nst_test.py
        ├── mtcnn
        │   ├── images
        │   │   ├── class2.jpg
        │   │   ├── designated-survivor-2.jpg
        │   │   ├── person1.jpeg
        │   │   ├── scenery.jpeg
        │   │   └── test5.jpg
        │   ├── mtcnn_test.py
        │   └── net_test.py
        └── noise2noise
        │   ├── Output_gaussian
        │       ├── denoised_1.png
        │       ├── denoised_2.png
        │       ├── denoised_3.png
        │       ├── source_1.png
        │       ├── source_2.png
        │       └── source_3.png
        │   ├── Output_text
        │       ├── denoised_1.png
        │       ├── denoised_2.png
        │       ├── denoised_3.png
        │       ├── source_1.png
        │       ├── source_2.png
        │       └── source_3.png
        │   ├── n2n_test.py
        │   ├── test.py
        │   └── test_images
        │       ├── test.jpg
        │       ├── test1.jpg
        │       └── test2.jpg
    ├── readme.md
    └── segmentation
        ├── fcn
            ├── fcn101.py
            └── fcn50.py
        └── pspnet
            └── pspnet_test.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/
 2 | .DS_Store
 3 | __pycache__/
 4 | pyvision/detection/yolov3/weights/
 5 | pyvision/misc/mtcnn/weights/
 6 | pyvision/face_detection/facenet/weights/
 7 | pyvision/detection/detr/weights/
 8 | pyvision/segmentation/pspnet/weights/
 9 | pyvision/misc/noise2noise/weights/
10 | pyvision/segmentation/fcn/weights/
11 | test.py


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, Pranjal  Datta
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include pyvision/detection/yolov3/config/*.cfg
 2 | include pyvision/detection/yolov3/config/*.json
 3 | include pyvision/detection/yolov3/config/*.txt
 4 | include pyvision/detection/yolov3/data/*.names
 5 | include pyvision/detection/yolov3/utils/pallete
 6 | 
 7 | include pyvision/detection/detr/utils/pallete
 8 | include pyvision/detection/detr/config/*.json
 9 | include pyvision/detection/detr/data/*.txt
10 | 
11 | include pyvision/detection/efficientdet/config/*.json
12 | include pyvision/detection/efficientdet/config/*.yaml
13 | 
14 | include pyvision/segmentation/pspnet/config/*.json
15 | include pyvision/segmentation/pspnet/data/*.txt 
16 | 
17 | include pyvision/segmentation/fcn/config/*.json
18 | include pyvision/segmentation/fcn/data/*.txt 
19 | 
20 | include pyvision/misc/noise2noise/config/*.json  
21 | 
22 | include pyvision/misc/mtcnn/config/*.json
23 | 
24 | include pyvision/face_detection/facenet/config/*.json
25 | 
26 | include pyvision/gans/wasserstein_gan/config/*.json
27 | 
28 | recursive-include pyvision *.md
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # PyVision
 3 | 
 4 | Ready-to-use implementations of some of the most common computer vision algorithms.
 5 | 
 6 | In PyTorch only!
 7 | 
 8 | ## Currently available architectures
 9 | 
10 | - **Multi Task Cascaded Convolutional Neural Network (MTCNN)** : A SOTA face and facial-landmark detection architecture. Check out [this](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/misc/mtcnn) out for more details.
11 | 
12 | - **YOLOv3:** The SOTA object detection algorithm. For more details, read the [docs](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/detection/yolov3).
13 | 
14 | - **FaceNet: A Unified Embedding for Face Recognition and Clustering**: One of the most popular architectures used for facial recognition. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/face_detection/facenet).
15 | 
16 | - **DEtection TRansformer (DETR)**: An end-to-end object detection architecture using transformers. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/detection/detr).
17 | 
18 | - **Neural Style Transfer (NST)**: Transfer *style* from one Image into another. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/misc/NeuralStyleTransfer)
19 | 
20 | - **Pyramid Scene Parsing Network (PSPNet)**: Instance Segmentation architecture that makes use of *Pyramid Pooling Module* for better results. For more details, check [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/segmentation/pspnet).
21 | 
22 | For full list of architectures that has been ported or are **in the process** of being ported, check [here](https://github.com/pranjaldatta/PyVision/blob/master/docs/developing.md).
23 | 
24 | ## Installation
25 | 
26 | 1. Run the code in your terminal to clone the master branch which contains the working code
27 | 
28 | ```
29 | $ git clone https://github.com/pranjaldatta/PyVision.git --single-branch --branch master
30 | ```
31 | 
32 | 2. Then, go to the repository root by pasting the command given below into your terminal
33 | 
34 | ```
35 | $ cd PyVision
36 | ```
37 | 
38 | 3. Run the following command in the terminal to install PyVision into the current virtual or conda environment
39 | 
40 | ```
41 | $ pip install .
42 | ```
43 | 
44 | 4. You are good to go!.
45 | 
46 | ## Contributing
47 | 
48 | For contribution guidelines, please look [here](https://github.com/pranjaldatta/PyVision/tree/master/docs/contributing.md).  Contributions are always welcome!
49 | 
50 | ## ToDo
51 | 
52 | - [ ] Populate with more architectures (obviously)
53 | 
54 | - [x] ~~Come up with an efficient way to make the repository minimal i.e. assets (like weights) will only be downloaded on as-you-need basis.~~ All weights are hosted on SRM-MIC Google drive and downloaded using gdown
55 | 
56 | - [x] ~~Come up with an efficient way to ensure that heavy architecture specific dependecies are installed only when required.~~ All heavy assets are installed only when model is being used.
57 | 
58 | ## Note
59 | 
60 | Currently, its working only in pre-configured conda environment with all dependencies installed.
61 | 
62 | **P.S.** Star the repo if you liked the work!
63 | 


--------------------------------------------------------------------------------
/demo/face_detection/facenet/imgs/BarackObama.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/imgs/BarackObama.jpeg


--------------------------------------------------------------------------------
/demo/face_detection/facenet/imgs/ManojBajpayee.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/imgs/ManojBajpayee.jpeg


--------------------------------------------------------------------------------
/demo/face_detection/facenet/imgs/MarkZuckerberg.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/imgs/MarkZuckerberg.jpeg


--------------------------------------------------------------------------------
/demo/face_detection/facenet/multiple_img.py:
--------------------------------------------------------------------------------
 1 | from pyvision.face_detection.facenet import Facenet
 2 | 
 3 | # In this example, we take all the imgs from the ./imgs folder and 
 4 | # generate embeddings for them. We also associate each embedding with their
 5 | # filename which act as 'true labels'. Then we use these embeddings to 'classify'
 6 | # whether a supplied image belongs to any one of given categories
 7 | 
 8 | # First we instantiate the facenet object. saveLoc is the path to the
 9 | # folder wherein the embeddings will be saved. By default it will be saved
10 | # as "embeddings.pkl" but can be changed with the "saveName" param
11 | fc = Facenet(saveLoc="save", saveName="embeddings2.pkl")
12 | 
13 | embeddings = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs")
14 | 
15 | did_match, preds, loss = fc.compare_embeddings(
16 |     img="demo/face_detection/facenet/zucktest.jpeg",
17 |     embedLoc="save/embeddings2.pkl",
18 |     embeddings=None,
19 |     label="MarkZuckerberg"
20 | )
21 | print(did_match, preds, loss)
22 | print("For 'True' Image, we get: ", did_match)
23 | 
24 | 


--------------------------------------------------------------------------------
/demo/face_detection/facenet/single_img.py:
--------------------------------------------------------------------------------
 1 | from pyvision.face_detection.facenet import Facenet
 2 | 
 3 | # In this example we take a single image from the ./imgs folder
 4 | # Generate embeddings and store them. Then use those embeddings to 
 5 | # check whether a previously unseen image is classified accurately or not
 6 | 
 7 | 
 8 | # First we instantiate the facenet object. saveLoc is the path to the
 9 | # folder wherein the embeddings will be saved. By default it will be saved
10 | # as "embeddings.pkl" but can be changed with the "saveName" param
11 | fc = Facenet(saveLoc="save/") 
12 | 
13 | # generate embeds
14 | _ = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs/BarackObama.jpeg", label="Barack Obama")
15 | 
16 | # now we compare it against a "False" image 
17 | did_match, pred, loss = fc.compare_embeddings(None, img="demo/face_detection/facenet/imgs/ManojBajpayee.jpeg", label="Barack Obama", embedLoc="save/embeddings.pkl")
18 | print(did_match, pred, loss)
19 | print("Comparing against 'False' image, we get: ", did_match)


--------------------------------------------------------------------------------
/demo/face_detection/facenet/zucktest.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/demo/face_detection/facenet/zucktest.jpeg


--------------------------------------------------------------------------------
/demo/gans/deep_convolutional_gan/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
 1 | # Contribution Guidelines
 2 | 
 3 | PyVision is meant to be a collection of all major (popular or otherwise) computer vision architectures made available in an easy-to-use, extensible format so that the net number of lines required to use the architecture, whether for training or inference purposes are reduced to **three/four** lines.
 4 | 
 5 | The objective of such an effort is two fold:
 6 | 
 7 | - To develop a library for our own use, that simplifies computer vision architecture use so that developers can focus on the project they are working on and not bother about the nuances and headaches of complex implementations.
 8 | 
 9 | - To learn the nuances and deal with the headaches of complex architecture implementations and hopefully, become better engineers!
10 | 
11 | 
12 | ## Why Contribute?
13 | 
14 | - Learn the details of seemingly complex architectures.
15 | 
16 | - Learn the nuances of implementation.
17 | 
18 | - Help make computer vision easier and more approachable!
19 | 
20 | ## How to Contribute? 
21 | 
22 | The following steps detail roughly the contribution workflow
23 | 
24 | 1. Decide on an architecture you want to implement! Once decided, **open an issue** at [issues](https://github.com/pranjaldatta/PyVision/issues). Be sure to classify the architecture under a given category. *For Example*, YOLOv3 falls under the category of *detection*. If unsure, ask in the issue. 
25 | 
26 | 2. Once you are sure no one else is working on the given architecture, clone the master repository.
27 | 
28 | 3. Once in local repository root, create a branch with your name and architecture you are working on. *An example branch name:* **pranjal-yolov3**.
29 | To create a new branch, run the following command in the local repo root from your terminal,
30 | 
31 | ```
32 | $ git checkout -b <branch-name>
33 | ```
34 | 
35 | 4. Code!
36 | 
37 | 5. **Important**: The most critical issue here is regarding the model **weights**. The weights of a given model, **does not come** pre-loaded with the repository. This is done because, 
38 |     - To reduce the repository size (obviously).
39 |     - GitHub doesn't allow hosting of files of sizes more than 100 MB.
40 |     - Also, making model weights available **lazily** is more efficient as people are downloading **only** those weights that they are using
41 | 
42 |     So, whats the solution? For the detailed process check [this](https://github.com/pranjaldatta/PyVision/blob/master/docs/weights.md).
43 | 
44 |     **TL;DR**:
45 |     - Provide the maintainer with links to the downloadable weights in the issue. The maintainer will download the weights and upload it to SRM-MIC's Google Drive.
46 |     
47 |     - Will provide the **file id** to the contributor.
48 | 
49 |     - Download the weights in a **lazy** manner only when the **model is being initialized** using **gdown**
50 | 
51 |     - Check YOLOv3's download_weights() method for reference.
52 | 
53 | 6. Add tests! The tests should be self-contained and folder structure should be maintained in the [tests folder](https://github.com/pranjaldatta/PyVision/blob/master/tests) as it is maintained in the repo root. 
54 | 
55 | 7. **Very Important**: Add docs! Add docstrings to classes, functions. **How to use** along with example code is a must. Try to cover everything in documentation whether as markup or in source code.
56 | 
57 | 8. Once you are done, push the branch **referring** the issue! Resolve any problems/inconsistencies brought to your notice and wait for the merge!
58 | 


--------------------------------------------------------------------------------
/docs/developing.md:
--------------------------------------------------------------------------------
 1 | # Architectures Being Ported to PyVision
 2 | 
 3 | - [x] **Multi Task Cascaded Convolutional Neural Network in PyTorch (MTCNN)**: Link to [paper](https://arxiv.org/pdf/1604.02878.pdf). Contributed by [Sashrika Surya](https://github.com/sashrika15) and [Pranjal Datta](https://github.com/pranjaldatta). Can be accessed [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/mtcnn).
 4 | 
 5 | - [x] **You Only Look Once v3 (YOLOv3)**: Link to [paper](https://arxiv.org/pdf/1804.02767.pdf). Contributed by [Pranjal Datta](https://github.com/pranjaldatta). Can be accessed [here](https://github.com/pranjaldatta/PyVision/tree/master/pyvision/detection/yolov3).
 6 | 
 7 | - [x] **Neural Style Transfer**: Link to [paper](https://arxiv.org/pdf/1508.06576.pdf). Being contributed by [Anushka Choudhary](https://github.com/Anushka0805) .
 8 | 
 9 | - [x] **Wasserstein GAN**: Link to [paper](https://arxiv.org/pdf/1701.07875.pdf). Being contributed by [Paras Rawat](https://github.com/TrizteX).
10 | 
11 | - [x] **Noise2Noise**: Link to [paper](https://arxiv.org/pdf/1803.04189.pdf). Being contributed by [Sashrika Surya](https://github.com/sashrika15).
12 | 
13 | - [x] **FaceNet**: Link to [paper](https://arxiv.org/pdf/1503.03832.pdf). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta)
14 | 
15 | - [x] **DEtection TRansformer (DETR)**: Link to [paper](https://scontent.fccu3-1.fna.fbcdn.net/v/t39.8562-6/101177000_245125840263462_1160672288488554496_n.pdf?_nc_cat=104&_nc_sid=ae5e01&_nc_ohc=sU420_xbxT8AX9LfbKI&_nc_ht=scontent.fccu3-1.fna&oh=455f6284084dfccdf0b9b39a878d290f&oe=5F0EB147). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta)
16 | 
17 | - [x] **Pyramid Scene Parsing Network (PSPNet)**: Link to [paper](https://arxiv.org/pdf/1612.01105.pdf). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta).
18 | 
19 | - [x] **Fully Convolutional Network for Segmentation (FCNNet)**: Link to [paper](https://arxiv.org/pdf/1605.06211v1.pdf). Being contributed by [Pranjal Datta](https://github.com/pranjaldatta).
20 | 
21 | - [x] **Deep Convolutional GAN (DCGAN)**: Link to [paper](https://arxiv.org/abs/1511.06434.pdf). Contributed by [Srijarko Roy](https://github.com/srijarkoroy) and [Indira Dutta](https://github.com/indiradutta).
22 | 


--------------------------------------------------------------------------------
/docs/weights.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/docs/weights.md


--------------------------------------------------------------------------------
/pyvision/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))
4 | 


--------------------------------------------------------------------------------
/pyvision/detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/detr/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import DETR, available_models


--------------------------------------------------------------------------------
/pyvision/detection/detr/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/detr/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "detr-resnet50" : "1yKx023hJV_CG6vqDRYSc2YV0FqiBGoXx",
3 |     "detr-resnet101": "1koBQ-cIbHGwpafzGNDJCRRpf89trTpuR"
4 | }
5 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/data/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/detr/data/classes.txt:
--------------------------------------------------------------------------------
 1 | N/A
 2 | person
 3 | bicycle
 4 | car
 5 | motorcycle
 6 | airplane
 7 | bus
 8 | train
 9 | truck
10 | boat
11 | traffic light
12 | fire hydrant
13 | N/A
14 | stop sign
15 | parking meter
16 | bench
17 | bird
18 | cat
19 | dog
20 | horse
21 | sheep
22 | cow
23 | elephant
24 | bear
25 | zebra
26 | giraffe
27 | N/A
28 | backpack
29 | umbrella
30 | N/A
31 | N/A
32 | handbag
33 | tie
34 | suitcase
35 | frisbee
36 | skis
37 | snowboard
38 | sports ball
39 | kite
40 | baseball bat
41 | baseball glove
42 | skateboard
43 | surfboard
44 | tennis racket
45 | bottle
46 | N/A
47 | wine glass
48 | cup
49 | fork
50 | knife
51 | spoon
52 | bowl
53 | banana
54 | apple
55 | sandwich
56 | orange
57 | broccoli
58 | carrot
59 | hot dog
60 | pizza
61 | donut
62 | cake
63 | chair
64 | couch
65 | potted plant
66 | bed
67 | N/A
68 | dining table
69 | N/A
70 | N/A
71 | toilet
72 | N/A
73 | tv
74 | laptop
75 | mouse
76 | remote
77 | keyboard
78 | cell phone
79 | microwave
80 | oven
81 | toaster
82 | sink
83 | refrigerator
84 | N/A
85 | book
86 | clock
87 | vase
88 | scissors
89 | teddy bear
90 | hair drier
91 | toothbrush
92 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/detr.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F  
  4 | 
  5 | from .utils.misc import NestedTensor, nested_tensor_from_tensor_list
  6 | from .utils.box_utils import box_wh_to_xy
  7 | 
  8 | class MLP(nn.Module):
  9 |     """
 10 |     A very simple multi layer perceptron also known as FFN 
 11 |     """
 12 |     def __init__(self, in_dims, hidden_dims, out_dims, num_layers):
 13 |         
 14 |         super().__init__()
 15 | 
 16 |         self.num_layers = num_layers
 17 |         h = [hidden_dims] * (num_layers - 1)
 18 |         self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dims]+h, h+[out_dims]))
 19 | 
 20 |     def forward(self, x):
 21 |         
 22 |         for i, layer in enumerate(self.layers):
 23 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
 24 |         return x
 25 | 
 26 | class DETR_model(nn.Module):
 27 |     """
 28 |     The main detr module that performs the forward pass
 29 |     """
 30 |     def __init__(self, backbone, transformer, num_classes, num_queries, aux_loss=False):
 31 |         """The module that builds the detr model
 32 | 
 33 |         Parameters
 34 |         ----------
 35 |         backbone : [nn.Module]
 36 |             the backbone to be used by the detr model. defined in backbone.py  
 37 |         transformer : [nn.Module]
 38 |             the transformer to be used by the detr model. define din transformers.py
 39 |         num_classes : [int]  
 40 |             number of object classses   
 41 |         num_queries : [int]
 42 |             number of object queries i.e. detection slot i.e. the maximum number 
 43 |             of objects that can be detected in a single image. For COCO, its 100
 44 |         aux_loss : bool, optional
 45 |             if auxiliary decoding losses are to be used, by default False
 46 |         """
 47 |         
 48 |         super().__init__()
 49 | 
 50 |         self.backbone = backbone
 51 |         self.transformer = transformer
 52 |         self.num_classes = num_classes
 53 |         self.num_queries = num_queries
 54 |         self.aux_loss = aux_loss
 55 |         
 56 |         hidden_dim = self.transformer.d_model 
 57 |         self.class_embed = nn.Linear(hidden_dim, num_classes+1)
 58 |         self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3)
 59 |         self.query_embed = nn.Embedding(num_queries, hidden_dim)
 60 |         self.input_proj = nn.Conv2d(backbone.num_channels, hidden_dim, 1)
 61 | 
 62 |     def forward(self, samples: NestedTensor):
 63 |         """
 64 |         The forward method defines a single forward pass for the model.
 65 |         It expects a TensorList obect which consists of :
 66 |             - samples.tensor: batched images of shape [B, 3, H, W] 
 67 |             - samples.mask: a binary mask of shape [B, H, W] containing 1 padded pixels
 68 |         
 69 |         It returns the following elements:
 70 |             -  pred_logits = classification logits for all queries.
 71 |                              Shape = [B, num_queries, (num_classes + 1)]
 72 |             - pred_boxes = normalized box coordinates for all object queries represented as
 73 |                            (center_x, center_y, height, width). These values are normalized
 74 |                            between [0, 1] relative to size of each input image. utils/postprocess
 75 |                            retrieves unnormalized bounding boxes
 76 |             - aux_outputs = Optional           
 77 |                              
 78 |         """       
 79 |         if not isinstance(samples, NestedTensor):
 80 |             samples = nested_tensor_from_tensor_list(samples)
 81 |         
 82 |         # we run it through the backbone 
 83 |         features, pos = self.backbone(samples)
 84 | 
 85 |         # now we get the tensors and masks for each image and make the transformer pass
 86 |         src, mask = features[-1].decompose()
 87 |         assert mask is not None
 88 |         hs = self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos[-1])[0]
 89 | 
 90 |         output_class = self.class_embed(hs)
 91 |         output_coord = self.bbox_embed(hs).sigmoid()
 92 |         out = {
 93 |             "pred_logits": output_class[-1],
 94 |             "pred_boxes": output_coord[-1]
 95 |         }
 96 |         if self.aux_loss:
 97 |             raise NotImplementedError("aux_loss not implemented yet")
 98 |         
 99 |         return out
100 | 
101 | class DETR_postprocess(nn.Module):
102 |     """
103 |     This module converts DETR output into a simple usable format"""
104 |     def __init__(self, conf=0.7):
105 |         super(DETR_postprocess, self).__init__()
106 |         self.conf = conf
107 | 
108 |     @torch.no_grad()
109 |     def forward(self, outputs, target_size):
110 |         """
111 |         Converts raw DETR outputs into a usable format i.e. it takes the raw 
112 |         normalized (wrt to [0, 1]) bounding boxes predictions, unnormalizes it,
113 |         scales it to original image size and returns a list of dictionaries of
114 |         format {score, class_label, box_coords} for all the detections in a given image
115 |         """
116 |         raw_logits, raw_boxes = outputs['pred_logits'], outputs["pred_boxes"]
117 | 
118 |         assert len(raw_logits) == len(target_size), "raw_logits and target size len mismatch"
119 |         assert target_size.shape[1] == 2, "target_size shape dim 1 not equal to 2"
120 | 
121 |         probs = F.softmax(raw_logits, -1)[0,:,:-1]
122 |         keep = probs.max(-1).values > self.conf
123 |         probs = probs[keep]
124 |         probs, labels = probs[...,:-1].max(-1)
125 | 
126 |         # converting boxes to [x1, y1, x2, y2] format
127 |         raw_boxes = raw_boxes[:,keep,:]
128 |         boxes = box_wh_to_xy(raw_boxes)
129 | 
130 |         if boxes.device is not "cpu":
131 |             boxes = boxes.cpu()
132 |         
133 |         # convert coords relative to [0, 1] to absolute [H, W] coords
134 |         img_height, img_width = target_size.unbind(1)
135 |         scale_factors = torch.stack([img_width, img_height, img_width, img_height], dim=1)
136 |         boxes = boxes  * scale_factors[:, :] # remove none
137 | 
138 |         results = [{"scores": s.item(), "labels": l.item(), "coords": c.tolist()} for s, l, c in zip(probs, labels, boxes[0])]
139 | 
140 |         return results
141 | 
142 | 
143 | 
144 | 
145 |     


--------------------------------------------------------------------------------
/pyvision/detection/detr/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/models/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/detr/models/backbone.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The backbone modules are defined here 
  3 | """
  4 | 
  5 | from typing import List, Dict
  6 | 
  7 | import torch 
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F 
 10 | import torchvision
 11 | from torchvision.models._utils import IntermediateLayerGetter
 12 | 
 13 | from ..utils.misc import NestedTensor
 14 | 
 15 | class FrozenBatchNorm2d(nn.Module):
 16 |     """
 17 |     Custom batch norm layers where the batch stats and affine parameters
 18 |     are fixed.
 19 | 
 20 |     Copy-paste from torchvision.misc.ops with added eps before rqsrt, without
 21 |     which any other models other than resnet[18, 24, 50, 101] produce nans 
 22 |     """
 23 |     def __init__(self, size):
 24 |         super(FrozenBatchNorm2d, self).__init__()
 25 |         
 26 |         self.register_buffer("weight", torch.ones(size))
 27 |         self.register_buffer("bias", torch.zeros(size))
 28 |         self.register_buffer("running_mean", torch.zeros(size))
 29 |         self.register_buffer("running_var", torch.ones(size))
 30 | 
 31 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata,strict,
 32 |                              missing_keys, unexpected_keys, error_msgs):
 33 |         
 34 |         num_batches_tracked_key = prefix + "num_batches_tracked"
 35 |         if num_batches_tracked_key in state_dict:
 36 |             del state_dict[num_batches_tracked_key]
 37 |         
 38 |         super(FrozenBatchNorm2d, self)._load_from_state_dict(
 39 |             state_dict, prefix, local_metadata,strict,
 40 |             missing_keys, unexpected_keys, error_msgs
 41 |         )
 42 | 
 43 |     def forward(self, x):
 44 | 
 45 |         w = self.weight.reshape(1, -1, 1, 1)
 46 |         b = self.bias.reshape(1, -1, 1, 1)
 47 |         rv = self.running_var.reshape(1, -1, 1, 1)
 48 |         rm = self.running_mean.reshape(1, -1, 1, 1)
 49 |         eps = 1e-5
 50 |         scale = w * (rv + eps).rsqrt()
 51 |         bias = b - rm * scale
 52 |         
 53 |         return x * scale + bias
 54 | 
 55 | 
 56 | class BackboneBase(nn.Module):
 57 | 
 58 |     def __init__(self, backbone:nn.Module, train_backbone: bool, num_channels: int,
 59 |                 return_interim_layers: bool):
 60 | 
 61 |         super().__init__()
 62 |         
 63 |         for name, param in backbone.named_parameters():
 64 |             if not train_backbone or "layer_2" not in name or "layer_3" not in name or "layer_4" not in name:
 65 |                 param.requires_grad_(False)
 66 |         
 67 |         if return_interim_layers:
 68 |             return_layers = {"layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3"}
 69 |         else:
 70 |             return_layers = {"layer4": "0"}
 71 | 
 72 |         self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
 73 |         self.num_channels = num_channels
 74 | 
 75 |     def forward(self, tensor_list: NestedTensor):
 76 | 
 77 |         xs = self.body(tensor_list.tensors)
 78 |         out: Dict[str, NestedTensor] = {}
 79 |         for name, x in xs.items():
 80 |             m = tensor_list.mask
 81 |             assert m is not None
 82 |             mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0]
 83 |             out[name] = NestedTensor(x, mask)
 84 | 
 85 |         return out
 86 | 
 87 | class Backbone(BackboneBase):
 88 |     """ 
 89 |     Resnet backbone with frozen batchnorm
 90 |     """
 91 |     def __init__(self, name: str, train_backbone: bool, return_interim_layers: bool,
 92 |                 dilation: bool):
 93 |         
 94 |         backbone = getattr(torchvision.models, name)(
 95 |             replace_stride_with_dilation=[False, False, dilation],
 96 |             pretrained=False, norm_layer=FrozenBatchNorm2d
 97 |         ) # make pretrained true if requried 
 98 |         
 99 |         num_channels = 512 if name in ('resnet18', 'resnet34') else 2048
100 |         super().__init__(backbone, train_backbone, num_channels=num_channels, return_interim_layers=return_interim_layers)
101 | 
102 | 
103 | class Joiner(nn.Sequential):
104 |     
105 |     def __init__(self, backbone, position_embedding):
106 |         super().__init__(backbone, position_embedding)
107 | 
108 |     def forward(self, tensor_list: NestedTensor):
109 |         xs = self[0](tensor_list)
110 |         out: List[NestedTensor] = []
111 |         pos = []
112 |         for name, x in xs.items():
113 |             out.append(x)
114 |             pos.append(self[1](x).to(x.tensors.dtype)) # postional encoding
115 |         
116 |         return out, pos
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/readme.md:
--------------------------------------------------------------------------------
 1 | # End-to-End Object Detection with Transformers (DEtection TRansformer)
 2 | 
 3 | DETR successfully uses Transformers in a conventional computer vision task such as detection. It reimagines the object detection pipeline and proposes an end-to-end pipeline. It views object detection as a **direct set prediction** problem. 
 4 | 
 5 | Check out [usage](#Usage) to start using DETR or check [summary](#Summary) for implementation details.
 6 | 
 7 | Do check out the [paper](https://scontent.fccu3-1.fna.fbcdn.net/v/t39.8562-6/101177000_245125840263462_1160672288488554496_n.pdf?_nc_cat=104&_nc_sid=ae5e01&_nc_ohc=sU420_xbxT8AX9LfbKI&_nc_ht=scontent.fccu3-1.fna&oh=455f6284084dfccdf0b9b39a878d290f&oe=5F0EB147) or visit the original GitHub [repository](https://github.com/facebookresearch/detr?fbclid=IwAR3Eqm_JaWigPZfi5Uk3Pdi24u_Y198n2twoTSvYnn22XmiBAN92lC3TgYA). (The visit is worth it! Not only they outline their approach in detail but also they demonstrate through a [colab notebook](https://colab.research.google.com/github/facebookresearch/detr/blob/colab/notebooks/detr_demo.ipynb) how easy it is to make your own DETR in approx. 50 pytorch lines!)
 8 | 
 9 | 
10 | Check out this standalone [notebook](https://github.com/pranjaldatta/PyVision/blob/master/demo/detection/detr/detr_demo.ipynb) to see how easily you can use YOLOv3 in 3-4 lines!
11 | If the notebook link doesn't work, please look [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/detection/detr/detr_demo.ipynb) as a workaround.
12 | 
13 | ## Summary
14 | 
15 | Currently, PyVision DETR supports the models listed below. The pretrained models were provided by the authors. More details can be accessed [here](https://github.com/facebookresearch/detr?fbclid=IwAR3Eqm_JaWigPZfi5Uk3Pdi24u_Y198n2twoTSvYnn22XmiBAN92lC3TgYA).
16 | 
17 | *Note:* Panoptic models are being added.
18 | 
19 | | Model| Train Dataset| Test Dataset | box AP | Available |
20 | |--------|------------|------|---|----|
21 | | DETR-Resnet50 (default) | COCO2017-val5k | COCO2017-val5k | 42.0 | Yes |
22 | | DETR-Resnet101 | COCO2017-val5k | COCO2017-val5k | 43.5 | Yes
23 | 
24 | ## Usage
25 | 
26 | For detailed documentation and parameters, refer to docstrings/source code.
27 | 
28 | **Brief Usage Summary:**
29 | 
30 | The model setup is done via the DETR class exposed in *PyVison.detection.detr* . All model related configuration ranging from model type to confidence thresholds can be set throught the class constructor.
31 | 
32 | Detection is done through the *detect()* method in the DETR class. Again, it offers some parameters for customisation that can override the general class configuration. Refer to source code docstrings for more details.
33 | 
34 | **Quick Start:**
35 | 
36 | - To use the default *DETR-Resnet50* model,
37 | 
38 | ```
39 | from pyvision.detection import detr
40 | 
41 | detr_obj = detr.DETR()
42 | 
43 | # time_taken is the total time taken to perform the detection
44 | # result is the list of detections in a dict format {"scores": ..., "labels": ..., "coords": ...}
45 | 
46 | time_taken, result = detr_obj.detect(<img path or img in numpy format>)
47 | ```
48 | 
49 | - To use *DTER-Resnet101* model:
50 | 
51 | ```
52 | from pyvision.detection import detr
53 | 
54 | detr_obj = detr.DETR(model="detr-resnet101")
55 | 
56 | # time_taken is the total time taken to perform the detection
57 | # result is the list of detections in a dict format {"scores": ..., "labels": ..., "coords": ...}
58 | 
59 | time_taken, result = detr_obj.detect(<img path or img in numpy format>)
60 | ```
61 | 
62 | - To list supported models,
63 | 
64 | ```
65 | from pyvision.detection import detr
66 | 
67 | print(detr.available_models())
68 | ```
69 | 
70 | - To run **tests**, from repo root, run the following command from terminal
71 | 
72 | ```
73 | $ python tests/detection/detr/detr_test.py
74 | ```
75 | 
76 | ## Contributor
77 | 
78 | - [Pranjal Datta](https://github.com/pranjaldatta)
79 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/utils/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/detr/utils/box_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision.ops.boxes import box_area
 3 | import random
 4 | import cv2
 5 | 
 6 | clip = lambda x, x_min, x_max : x if x_min <= x <= x_max else (x_min if x < x_min else x_max)
 7 | 
 8 | def box_wh_to_xy(x):
 9 |     """
10 |     Converts co-ordinates from (x, y, w, h) to 
11 |     (x1, y1, x2, y2) format 
12 |     """
13 |     x, y, w, h = x.unbind(-1)
14 |     
15 |     x1 = x - 0.5 * w
16 |     y1 = y - 0.5 * h
17 |     x2 = x + 0.5 * w
18 |     y2 = y + 0.5 * h
19 | 
20 |     return torch.stack([x1, y1, x2, y2], dim=-1)
21 | 
22 | def box_xy_to_wh(x):
23 |     """
24 |     Converts co-ordinates from (x1, y1, x2, y2) to 
25 |     (x, y, w, h) 
26 |     """
27 |     x1, y1, x2, y2 = x.unbind(-1)
28 | 
29 |     x = (x2 + x1)/2
30 |     y = (y2 + y1)/2
31 |     w = (x2 - x1)
32 |     h = (y2 - y1)
33 | 
34 |     return torch.stack([x, y, w, h], dim=-1)
35 | 
36 | def iou(box1, box2):
37 |     """
38 |     Returns the iou between two boxes 
39 |     """
40 |     area1 = box_area(box1)
41 |     area2 = box_area(box2)
42 | 
43 |     top_left = torch.max(box1[:, None, :2], box2[:, :2]) # remove None! very Irritating
44 |     bottom_right = torch.min(box1[:, None, 2:], box2[:, 2:]) # remove None! Very Irritating
45 | 
46 |     wh = (bottom_right - top_left).clamp(min=0)
47 |     inter = wh[:, :, 0] * wh[:, :, 1]
48 | 
49 |     union = area1 + area2 - inter #check this
50 | 
51 |     iou = inter / union
52 | 
53 |     return iou, union
54 | 
55 | def draw_box(orig_img, box, _cls, _cls_idx, colors, annotate):
56 | 
57 |     #img_w, img_h = orig_img.shape[0], orig_img.shape[1]
58 |     #box[0:2]= [clip(x, 0.0, img_w) for x in box[0:2]]
59 |     #box[1:4] = [clip(x, 0.0, img_h) for x in box[1:4]]
60 | 
61 |     coords1 = (int(box[0]), int(box[1]))   
62 |     coords2 = (int(box[2]), int(box[3]))
63 |     
64 | 
65 |     label = "{0}".format(_cls)
66 |     color = colors[_cls_idx]
67 |     cv2.rectangle(orig_img, coords1, coords2, color, 2)
68 |     if annotate:
69 |         text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
70 |         coords2 = coords1[0] + text_size[0] + 3, coords1[1] + text_size[1] + 4
71 |         cv2.rectangle(orig_img, coords1, coords2, color, -1)
72 |         cv2.putText(orig_img, label, (coords1[0], coords1[1]+text_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1)
73 |     
74 |     return orig_img    
75 |     
76 | 
77 | def clamp(results, w_lim, h_lim):
78 | 
79 |     for idx in range(len(results)):
80 |         box = results[idx]["coords"]
81 |         box[0:2]= [clip(x, 0.0, w_lim) for x in box[0:2]]
82 |         box[1:4] = [clip(x, 0.0, h_lim) for x in box[1:4]] 
83 |         results[idx]["coords"] = box
84 |     return results
85 | 
86 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F  
 4 | 
 5 | from typing import List, Optional
 6 | 
 7 | class NestedTensor(object):
 8 | 
 9 |     def __init__(self, tensors, mask: Optional[torch.Tensor]):
10 | 
11 |         self.tensors = tensors
12 |         self.mask = mask
13 | 
14 |     def to(self, device):
15 | 
16 |         cast_tensor = self.tensors.to(device)
17 |         mask = self.mask
18 |         if mask is not None:
19 |             assert mask is not None
20 |             cast_mask = mask.to(device)
21 |         else:
22 |             cast_mask =  None
23 |         
24 |         return NestedTensor(cast_tensor, cast_mask)
25 |     
26 |     def decompose(self):
27 |         return  self.tensors, self.mask
28 |     
29 |     def __repr__(self):
30 |         return str(self.tensors)
31 | 
32 | 
33 | def _max_by_axis(inp_list):
34 |     """List[List[int]] -> List[int]"""
35 |     maxes = inp_list[0]
36 |     for sublist  in inp_list[1:]:
37 |         for index, item in enumerate(sublist):
38 |             maxes[index] = max(maxes[index], item)
39 |     return maxes
40 | 
41 | 
42 | 
43 | def nested_tensor_from_tensor_list(tensor_list: List[torch.Tensor]):
44 |     """
45 |     Converts a list of tensor-images[3, H, W]  into nested tensor object for 
46 |     model input
47 |     """
48 |     if tensor_list[0].ndim == 3:
49 | 
50 |         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
51 |         batch_size = [len(tensor_list)] + max_size
52 |         b, c, h, w = batch_size
53 |         dtype = tensor_list[0].dtype
54 |         device = tensor_list[0].device
55 |         tensor = torch.zeros(batch_size, dtype=dtype, device=device)
56 |         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)    
57 |         
58 |         for img, pad_img, m in zip(tensor_list, tensor, mask):
59 |             pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].copy_(img)
60 |             m[:img.shape[1], :img.shape[2]] = False
61 |     else:
62 |         raise ValueError("Images can have ndim == 3 but found ", tensor_list[0].ndim)
63 | 
64 |     return NestedTensor(tensor, mask)
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/pyvision/detection/detr/utils/pallete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/detr/utils/pallete


--------------------------------------------------------------------------------
/pyvision/detection/detr/utils/position_encoding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from .misc import NestedTensor
 6 | 
 7 | class PositionEmbeddingSine(nn.Module):
 8 |     """
 9 |     This is a more standard version  of the position embedding as defined
10 |     in the attention is all you need paper, but is generalized to work on
11 |     images 
12 |     """
13 |     def __init__(self, num_pos_feats=64, temp=10000, norm=False, scale=None):
14 |         super().__init__()
15 |         
16 |         self.num_pos_feats = num_pos_feats
17 |         self.temp = temp
18 |         self.norm  = norm
19 |     
20 | 
21 |         if scale is not None and norm is False:
22 |             raise ValueError("normalize should be true if scale is passed")
23 |         if scale is None:
24 |             scale = 2 * math.pi
25 |         
26 |         self.scale = scale
27 |     
28 |     def forward(self, tensor_list: NestedTensor):
29 |         
30 |         x = tensor_list.tensors
31 |         mask = tensor_list.mask
32 |         
33 |         assert mask is not None
34 | 
35 |         not_mask = ~mask
36 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
37 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
38 |         if self.norm:
39 |             eps = 1e-6
40 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
41 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
42 |         
43 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
44 |         dim_t = self.temp ** (2 * (dim_t // 2) / self.num_pos_feats)
45 | 
46 |         x_pos = x_embed[:, :, :, None] / dim_t 
47 |         y_pos = y_embed[:, :, :, None] / dim_t
48 |         x_pos = torch.stack((x_pos[:,:,:,0::2].sin(), x_pos[:,:,:,1::2].cos()), dim=4).flatten(3)
49 |         y_pos = torch.stack((y_pos[:,:,:,0::2].sin(), y_pos[:,:,:,1::2].cos()), dim=4).flatten(3)
50 | 
51 |         pos = torch.cat((y_pos, x_pos), dim=3).permute(0, 3, 1, 2)
52 |         
53 |         return pos
54 | 


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import EffdetInferAPI as EfficientDet


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/efficientdet/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/config/dataset_coco.yaml:
--------------------------------------------------------------------------------
 1 | class_list : ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
 2 |                 "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
 3 |                 "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
 4 |                 "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
 5 |                 "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
 6 |                 "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
 7 |                 "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
 8 |                 "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
 9 |                 "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
10 |                 "teddy bear", "hair drier", "toothbrush"]
11 | 
12 | model_name: "effdet_coco"
13 | 
14 | colors : [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), (14, 89, 122),
15 |           (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, 132), (169, 158, 85), (188, 185, 26), (103, 1, 17),
16 |           (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60),
17 |           (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, 159), (182, 173, 32), (34, 113, 133), (90, 135, 34),
18 |           (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, 112), (89, 60, 55), (15, 54, 88), (112, 75, 181),
19 |           (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108),
20 |           (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, 148), (182, 101, 89), (44, 65, 179), (1, 33, 26),
21 |           (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50),
22 |           (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33),
23 |           (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91),
24 |           (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, 106), (113, 68, 54), (136, 116, 112), (119, 139, 130),
25 |           (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, 180), (49, 119, 155), (153, 50, 183), (125, 38, 3),
26 |           (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, 148), (28, 144, 118), (29, 9, 24), (175, 45, 108),
27 |           (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95),
28 |           (2, 20, 184), (122, 37, 185)]


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/config/weights_download.json:
--------------------------------------------------------------------------------
1 | 
2 | {
3 |     "effdet_coco": "1jvcGIWyZ3jjTltiErp-OPNTA7SLWlslR"
4 | }
5 | 


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/efficientdet/lib/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/lib/dataset.py:
--------------------------------------------------------------------------------
  1 | import os 
  2 | import torch 
  3 | import numpy as np  
  4 | 
  5 | from torch.utils.data import Dataset, DataLoader
  6 | from pycocotools.coco import COCO
  7 | 
  8 | import cv2 
  9 | 
 10 | class CustomDataset(Dataset):
 11 | 
 12 |     def __init__(self, root_dir, img_dir="images", set_name="train2017", transform=None):
 13 | 
 14 |         self.root_dir = root_dir
 15 |         self.img_dir = img_dir
 16 |         self.set_name = set_name
 17 |         self.transform  =  transform
 18 | 
 19 |         self.coco_tool = COCO(os.path.join(self.root_dir, 'annotations', 'instances_'+self.set_name+'.json'))
 20 |         self.image_ids = self.coco_tool.getImgIds() 
 21 | 
 22 |         self.load_classes() 
 23 |     
 24 |     def load_classes(self):
 25 | 
 26 |         categories = self.coco_tool.loadCats(self.coco_tool.getCatIds())
 27 |         categories.sort(key = lambda x: x["id"])
 28 |         
 29 |         # load name -> label
 30 |         self.classes = {}
 31 |         self.coco_labels = {}
 32 |         self.coco_labels_inverse = {} 
 33 |         for category in categories:
 34 |             self.coco_labels[len(self.classes)] = category['id']
 35 |             self.coco_labels_inverse[category['id']] = len(self.classes)
 36 |             self.classes[category['name']] = len(self.classes)
 37 |         
 38 |         # load label -> name
 39 |         self.labels = {} 
 40 |         for key, value in self.classes.items():
 41 |             self.labels[value] = key 
 42 |         
 43 | 
 44 |     def load_image(self, idx):
 45 | 
 46 |         img_info = self.coco_tool.loadImgs(self.image_ids[idx])[0]
 47 |         img_path = os.path.join(
 48 |             self.root_dir, self.img_dir, self.set_name, img_info['file_name']
 49 |         )
 50 |         img = cv2.imread(img_path)
 51 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 52 | 
 53 |         img = img.astype(np.float32) / 255.0 
 54 | 
 55 |         return img 
 56 | 
 57 |     def coco_label_to_label(self, coco_label):
 58 |         return self.coco_labels_inverse[coco_label]
 59 | 
 60 |     def label_to_coco_label(self, label):
 61 |         return self.coco_labels[label]
 62 |     
 63 |     def num_classes(self):
 64 |         return len(self.classes)
 65 | 
 66 |     def load_annotations(self, idx):
 67 | 
 68 |         anno_ids = self.coco_tool.getAnnIds(
 69 |             imgIds=self.image_ids[idx], iscrowd=False
 70 |         )
 71 |         annotations = np.zeros((0, 5))
 72 | 
 73 |         # if some images miss annotations 
 74 |         if len(anno_ids) == 0:
 75 |             return annotations
 76 |         
 77 |         # parsing the annotations here 
 78 |         coco_annotations = self.coco_tool.loadAnns(anno_ids)
 79 |         for idx, a in enumerate(coco_annotations):
 80 | 
 81 |             # skip the annotations that have no height/width
 82 |             if a['bbox'][2] < 1 or a['bbox'][3] < 1:
 83 |                 continue 
 84 |             
 85 |             annotation = np.zeros((1, 5))
 86 |             annotation[0, :4] = a['bbox']
 87 |             annotation[0, 4] = self.coco_label_to_label(a['category_id'])
 88 |             annotations = np.append(annotations, annotation, axis=0)
 89 |         
 90 |         # transform [x, y, w, h] -> [x1, y1, x2, y2]
 91 |         annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
 92 |         annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
 93 | 
 94 |         return annotations    
 95 |     
 96 | 
 97 |     def __len__(self):
 98 |         return len(self.image_ids)
 99 | 
100 |     
101 |     def __getitem__(self, idx):
102 | 
103 |         img = self.load_image(idx)
104 |         annot = self.load_annotations(idx)
105 |         
106 |         data = {
107 |             "img": img, 
108 |             "annot": annot
109 |         }
110 |         
111 |         if self.transform:
112 |             data = self.transform(data)
113 | 
114 |         return data
115 | 
116 | 
117 | def collater(data):
118 |     imgs = [s['img'] for s in data]
119 |     annots = [s['annot'] for s in data]
120 |     scales = [s['scale'] for s in data]
121 | 
122 |     imgs = torch.from_numpy(np.stack(imgs, axis=0))
123 | 
124 |     max_num_annots = max(annot.shape[0] for annot in annots)
125 | 
126 |     if max_num_annots > 0:
127 | 
128 |         annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
129 | 
130 |         if max_num_annots > 0:
131 |             for idx, annot in enumerate(annots):
132 |                 if annot.shape[0] > 0:
133 |                     annot_padded[idx, :annot.shape[0], :] = annot
134 |     else:
135 |         annot_padded = torch.ones((len(annots), 1, 5)) * -1
136 | 
137 |     imgs = imgs.permute(0, 3, 1, 2)
138 | 
139 |     return {'img': imgs, 'annot': annot_padded, 'scale': scales}
140 | 
141 | 
142 | class Resizer(object):
143 |     """Convert ndarrays in sample to Tensors."""
144 | 
145 |     def __call__(self, sample, common_size=512):
146 |         image, annots = sample['img'], sample['annot']
147 |         height, width, _ = image.shape
148 |         if height > width:
149 |             scale = common_size / height
150 |             resized_height = common_size
151 |             resized_width = int(width * scale)
152 |         else:
153 |             scale = common_size / width
154 |             resized_height = int(height * scale)
155 |             resized_width = common_size
156 | 
157 |         image = cv2.resize(image, (resized_width, resized_height))
158 | 
159 |         new_image = np.zeros((common_size, common_size, 3))
160 |         new_image[0:resized_height, 0:resized_width] = image
161 | 
162 |         annots[:, :4] *= scale
163 | 
164 |         return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
165 | 
166 | 
167 | class Augmenter(object):
168 |     """Convert ndarrays in sample to Tensors."""
169 | 
170 |     def __call__(self, sample, flip_x=0.5):
171 |         if np.random.rand() < flip_x:
172 |             image, annots = sample['img'], sample['annot']
173 |             image = image[:, ::-1, :]
174 | 
175 |             rows, cols, channels = image.shape
176 | 
177 |             x1 = annots[:, 0].copy()
178 |             x2 = annots[:, 2].copy()
179 | 
180 |             x_tmp = x1.copy()
181 | 
182 |             annots[:, 0] = cols - x2
183 |             annots[:, 2] = cols - x_tmp
184 | 
185 |             sample = {'img': image, 'annot': annots}
186 | 
187 |         return sample
188 | 
189 | 
190 | class Normalizer(object):
191 | 
192 |     def __init__(self):
193 |         self.mean = np.array([[[0.485, 0.456, 0.406]]])
194 |         self.std = np.array([[[0.229, 0.224, 0.225]]])
195 | 
196 |     def __call__(self, sample):
197 |         image, annots = sample['img'], sample['annot']
198 | 
199 |         return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots}
200 | 
201 | 


--------------------------------------------------------------------------------
/pyvision/detection/efficientdet/readme.md:
--------------------------------------------------------------------------------
 1 | # EfficientDet: Scalable and Efficient Object Detection
 2 | 
 3 | A model zoo implementation of the EfficientDet algorithm.
 4 | 
 5 | ## Current Stat
 6 | 
 7 | * Efficientdet-b0 trained on Dataset-v3 with a loss of 0.13
 8 | 
 9 | ## Usage
10 | 
11 | * To Train, from repo root,
12 | 
13 | ```shell
14 | !python src/models/efficientdet/train.py <args (--help for descriptions of all options)>
15 | ```
16 | 
17 | ## To Do
18 | 
19 | - [ ] Training b1 - b7 models. Experimenting with focal loss values.
20 | - [ ] Train API
21 | 


--------------------------------------------------------------------------------
/pyvision/detection/readme.md:
--------------------------------------------------------------------------------
1 | # Detection
2 | 
3 | Contains popular Object Detection architectures.
4 | 
5 | ## Currently Supported:
6 | - YOLOv3
7 | - DEtection TRansformer (DETR)
8 | 


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import *


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/config/models_supported.txt:
--------------------------------------------------------------------------------
1 | yolov3
2 | yolov3-tiny


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "yolov3.weights":"1edDTm9BwkNylyFfv6BJmS_OzSNw5QncR",
3 |     "yolov3-tiny.weights":"1U1xYO5ubw0_JiNkHIe8KwJk8alOySBaK"
4 | }


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/config/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | # 0
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | # 1
 35 | [maxpool]
 36 | size=2
 37 | stride=2
 38 | 
 39 | # 2
 40 | [convolutional]
 41 | batch_normalize=1
 42 | filters=32
 43 | size=3
 44 | stride=1
 45 | pad=1
 46 | activation=leaky
 47 | 
 48 | # 3
 49 | [maxpool]
 50 | size=2
 51 | stride=2
 52 | 
 53 | # 4
 54 | [convolutional]
 55 | batch_normalize=1
 56 | filters=64
 57 | size=3
 58 | stride=1
 59 | pad=1
 60 | activation=leaky
 61 | 
 62 | # 5
 63 | [maxpool]
 64 | size=2
 65 | stride=2
 66 | 
 67 | # 6
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=128
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | # 7
 77 | [maxpool]
 78 | size=2
 79 | stride=2
 80 | 
 81 | # 8
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=256
 85 | size=3
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | # 9
 91 | [maxpool]
 92 | size=2
 93 | stride=2
 94 | 
 95 | # 10
 96 | [convolutional]
 97 | batch_normalize=1
 98 | filters=512
 99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 | 
104 | # 11
105 | [maxpool]
106 | size=2
107 | stride=1
108 | 
109 | # 12
110 | [convolutional]
111 | batch_normalize=1
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 | 
118 | ###########
119 | 
120 | # 13
121 | [convolutional]
122 | batch_normalize=1
123 | filters=256
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | # 14
130 | [convolutional]
131 | batch_normalize=1
132 | filters=512
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 | 
138 | # 15
139 | [convolutional]
140 | size=1
141 | stride=1
142 | pad=1
143 | filters=255
144 | activation=linear
145 | 
146 | 
147 | 
148 | # 16
149 | [yolo]
150 | mask = 3,4,5
151 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
152 | classes=80
153 | num=6
154 | jitter=.3
155 | ignore_thresh = .7
156 | truth_thresh = 1
157 | random=1
158 | 
159 | # 17
160 | [route]
161 | layers = -4
162 | 
163 | # 18
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 | 
172 | # 19
173 | [upsample]
174 | stride=2
175 | 
176 | # 20
177 | [route]
178 | layers = -1, 8
179 | 
180 | # 21
181 | [convolutional]
182 | batch_normalize=1
183 | filters=256
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | # 22
190 | [convolutional]
191 | size=1
192 | stride=1
193 | pad=1
194 | filters=255
195 | activation=linear
196 | 
197 | # 23
198 | [yolo]
199 | mask = 1,2,3
200 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
201 | classes=80
202 | num=6
203 | jitter=.3
204 | ignore_thresh = .7
205 | truth_thresh = 1
206 | random=1


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/data/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/issues.md:
--------------------------------------------------------------------------------
1 | - [ ] check gpu flags throughout repo


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/readme.md:
--------------------------------------------------------------------------------
 1 | # You Only Look Once v3 (YOLOv3)
 2 | 
 3 | YOLOv3 is a state of the art object detection algorithm.
 4 | 
 5 | Check out [usage](#Usage) to start using YOLOv3 in your project or check [summary](#Summary) for implementation details.
 6 | 
 7 | Do check out their [website](https://pjreddie.com/darknet/yolo/) or read the [paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf).
 8 | 
 9 | Check out this standalone [notebook](https://github.com/pranjaldatta/PyVision/blob/master/demo/detection/yolov3/yolov3_demo.ipynb) to see how easily you can use YOLOv3 in 3-4 lines!
10 | 
11 | If the above link does not work, please look [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/detection/yolov3/yolov3_demo.ipynb).
12 | 
13 | ## Summary
14 | 
15 | Currently, PyVision YOLOv3 supports the model listed below. The pretrained models were provided by the author. More details can be accessed [here](https://pjreddie.com/darknet/yolo/).
16 | 
17 | | Model     | Train Dataset| Test Dataset | mAP | FPS| Available |
18 | --------|------------|------|---|----|-----|
19 | | YOLOv3-416 (default) | COCO-trainval | test-dev | 55.3 | 35 | Yes |
20 | | YOLOv3-tiny | COCO-trainval | test-dev | 33.1 | 220| Yes
21 | 
22 | ## Usage
23 | 
24 | For detailed documentation and parameters, refer to docstrings/source code.
25 | 
26 | **Brief Usage Summary:**
27 | 
28 | The model setup is done via the YOLOv3 class exposed in *PyVison.detection.yolov3* . All model related configuration ranging from model type to confidence thresholds can be set throught the class constructor.
29 | 
30 | Detection is done through the *detect()* method in the YOLOv3 class. Again, it offers some parameters for customisation that can override the general class configuration. Refer to source code docstrings for more details.
31 | 
32 | **Quick Start:**
33 | 
34 | - To use the default *YOLOv3-416* model,
35 | 
36 | ```
37 | from pyvision.detection import yolov3
38 | 
39 | yolo = yolov3.YOLOv3()
40 | 
41 | # img is the images in array format with boxes drawn
42 | # objs is the list of detections and box coordinates
43 | imgs, objs = yolo.detect(<img path or img in numpy format>)
44 | ```
45 | 
46 | - To use *YOLOv3-tiny* model:
47 | 
48 | ```
49 | from pyvision.detection import yolov3
50 | 
51 | yolo = yolov3.YOLOv3(model="yolov3-tiny")
52 | 
53 | # img is the images in array format with boxes drawn
54 | # objs is the list of detections and box coordinates
55 | imgs, objs = yolo.detect(<img path or img in numpy format>)
56 | ```
57 | 
58 | - To list supported models,
59 | 
60 | ```
61 | from pyvision.detection import yolov3
62 | 
63 | print(yolov3.available_models())
64 | ```
65 | 
66 | - To run **tests**, from repo root, run the following command from terminal
67 | 
68 | ```
69 | $ python tests/detection/yolov3/yolo_test.py
70 | ```
71 | 
72 | ## Contributor
73 | 
74 | - [Pranjal Datta](https://github.com/pranjaldatta)


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/utils/__init__.py


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/utils/box_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F   
 4 | 
 5 | import numpy as np  
 6 | import cv2
 7 | import matplotlib.pyplot as plt  
 8 | import pickle as pkl
 9 | import random
10 | 
11 | def iou(box1, box2, device):
12 |     """
13 |     calculates iou between two boxes box1 and box2
14 |     """
15 |     b1x1, b1y1, b1x2, b1y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
16 |     b2x1, b2y1, b2x2, b2y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
17 | 
18 |     inter_x1 = torch.max(b1x1, b2x1)
19 |     inter_y1 = torch.max(b1y1, b2y1)
20 |     inter_x2 = torch.min(b1x2, b2x2)
21 |     inter_y2 = torch.min(b1y2, b2y2)
22 | 
23 |     inter_shape = inter_x1.shape
24 | 
25 |     if torch.cuda.is_available() and device is not "cpu":
26 |         inter_area = torch.max(inter_x2-inter_x1+1.0, torch.zeros(inter_shape).cuda())*torch.max(inter_y2-inter_y1+1.0, torch.zeros(inter_shape).cuda())
27 |     else:
28 |         inter_area = torch.max(inter_x2-inter_x1+1.0, torch.zeros(inter_shape))*torch.max(inter_y2-inter_y1+1.0, torch.zeros(inter_shape))
29 |     
30 |     box1_area = (b1x2 - b1x1 + 1.0) * (b1y2 - b1y1 + 1.0)
31 |     box2_area = (b2x2 - b2x1 + 1.0) * (b2y2 - b2y1 + 1.0)
32 | 
33 |     iou = inter_area / (box1_area + box2_area - inter_area)
34 | 
35 |     return iou
36 | 
37 | 
38 | def draw_box(pred, orig_img, cls, colors):
39 |     """
40 |     draw the predicted bounding boxes on a given image.
41 |     designed for single images. 
42 |     For multi batch support, supply singular image iteratively
43 |     """
44 | 
45 |     coords1 = tuple(pred[1:3].int())
46 |     coords2 = tuple(pred[3:5].int())
47 |     label = "{0}".format(cls)
48 |     color = random.choice(colors)
49 |     cv2.rectangle(orig_img, coords1, coords2, color, 2)
50 |     text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
51 |     coords2 = coords1[0] + text_size[0] + 3, coords1[1] + text_size[1] + 4
52 |     cv2.rectangle(orig_img, coords1, coords2, color, -1)
53 |     cv2.putText(orig_img, label, (coords1[0], coords1[1]+text_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1)
54 |     return orig_img    
55 |     


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/utils/pallete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/detection/yolov3/utils/pallete


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_config(path):
 2 |     """
 3 |     This method parses a config file and constructs a list of blocks.
 4 | 
 5 |     Each block is a singular unit in the architecture as explained in 
 6 |     the paper. Blocks are represented as a dictionary in the list.
 7 | 
 8 |     Input: 
 9 |     - path: path to the config file.
10 | 
11 |     Returns:
12 |     - a list containing a dictionary of individual block information.
13 |     """
14 |     cfg_file = open(path, "r")
15 | 
16 |     lines = cfg_file.read().split("\n")
17 |     lines = [line for line in lines if len(line) > 0]
18 |     lines = [line for line in lines if line[0] != '#']
19 |     lines = [line.strip() for line in lines]
20 |     
21 |     block = {}
22 |     blocks_list = []
23 | 
24 |     for line in lines: 
25 |         if line[0] == "[":
26 |             if len(block) != 0:
27 |                 blocks_list.append(block)
28 |                 block = {}
29 |             block["type"] = line[1:-1].rstrip()
30 |         else:
31 |             idx, value = line.split("=")
32 |             block[idx.rstrip()] = value.lstrip()
33 |     blocks_list.append(block)
34 | 
35 |     return blocks_list
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/utils/preprocess.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F  
 4 | 
 5 | import numpy as np  
 6 | import cv2
 7 | import matplotlib.pyplot as plt  
 8 | from PIL import Image 
 9 | 
10 | 
11 | def letterbox_img(img, dims):
12 |     """
13 |     resize image keeping aspect ratio intact using padding
14 |     """
15 |     img_w, img_h = img.shape[1], img.shape[0]
16 |     w, h = dims
17 |     new_width = int(img_w * min(w/img_w, h/img_h))
18 |     new_height = int(img_h * min(w/img_w, h/img_h))
19 |     img_resized = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
20 | 
21 |     canvas = np.full((dims[1], dims[0], 3), 128)
22 | 
23 |     canvas[(h-new_height)//2:(h-new_height)//2 + new_height, (w-new_width)//2:(w-new_width)//2 + new_width,  :] = img_resized
24 | 
25 |     return canvas
26 | 
27 | def prepare_img_cv2(img, dims):
28 |     """
29 |     prepare image for forward pass.
30 | 
31 |     returns a Tensor
32 |     """
33 |     # type check
34 |     if not isinstance(img, np.ndarray):
35 |         raise TypeError("expected <np.ndarray>. got <{}>".format(type(img)))
36 |     
37 |     img_dims = (img.shape[1], img.shape[0])
38 |     _img = (letterbox_img(img, (dims, dims)))
39 |     _img_new = _img[:,:,::-1].transpose((2,0,1)).copy()
40 |     _img_new = torch.from_numpy(_img_new).float().div(255.0).unsqueeze(0)
41 |     
42 |     return _img_new, img, img_dims
43 | 
44 | 
45 | def prepare_img_pil(img, dims):
46 |     """
47 |     prepares a PIL image for forward pass
48 | 
49 |     returns a Tensor
50 |     """
51 |     # type check
52 |     if not isinstance(img, Image.Image):
53 |         raise TypeError("expected <PIL.Image>. got <{}>".format(type(img)))
54 |     
55 |     original_img = img
56 |     img = img.convert("RGB")
57 |     img_dims = img.size
58 |     img = img.resize(dims)
59 |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
60 |     img = img.view(*dims, 3).transpose(0,1).transpose(0,2).contiguous()
61 |     img = img.view(1, 3, *dims)
62 |     img = img.float().div(255.0)
63 |     return (img, original_img, img_dims)
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/pyvision/detection/yolov3/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F 
  4 | 
  5 | import numpy as np  
  6 | import cv2  
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | from .box_utils import iou
 10 | 
 11 | 
 12 | def load_classes(path):
 13 |     with open(path) as class_file:
 14 |         class_names = class_file.read().split("\n")[:-1]
 15 |     return class_names
 16 | 
 17 | def predict_transforms(preds, input_dims, anchors, n_classes, device='cpu'):
 18 | 
 19 |     batch_size = preds.size(0)
 20 |     stride = input_dims // preds.size(2)
 21 |     grid_size = input_dims // stride
 22 |     box_attrs = 5 + n_classes
 23 |     n_anchors = len(anchors)
 24 | 
 25 |     anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
 26 | 
 27 |     preds = preds.view(batch_size, box_attrs*n_anchors, grid_size*grid_size)
 28 |     preds = preds.transpose(1,2).contiguous()
 29 |     preds = preds.view(batch_size, grid_size*grid_size*n_anchors, box_attrs)
 30 | 
 31 |     preds[:,:,0] = torch.sigmoid(preds[:,:,0])
 32 |     preds[:,:,1] = torch.sigmoid(preds[:,:,1])
 33 |     preds[:,:,4] = torch.sigmoid(preds[:,:,4])
 34 | 
 35 |     grid_len = np.arange(grid_size)
 36 |     a, b = np.meshgrid(grid_len, grid_len)
 37 | 
 38 |     x_offset = torch.FloatTensor(a).view(-1,1)
 39 |     y_offset = torch.FloatTensor(b).view(-1,1)
 40 | 
 41 |     
 42 |     x_offset = x_offset.to(device)
 43 |     y_offset = y_offset.to(device)
 44 | 
 45 |     x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, n_anchors)
 46 |     x_y_offset = x_y_offset.view(-1, 2).unsqueeze(0)
 47 | 
 48 |     preds[:,:,:2] += x_y_offset
 49 | 
 50 |     anchors = torch.FloatTensor(anchors)
 51 |     
 52 |     anchors = anchors.to(device)
 53 |     
 54 |     anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
 55 |     preds[:,:,2:4] = torch.exp(preds[:,:,2:4])*anchors
 56 | 
 57 |     preds[:,:,5:5+n_classes] = torch.sigmoid(preds[:,:,5:5+n_classes])
 58 | 
 59 |     preds[:,:,:4] *= stride
 60 |     
 61 |     
 62 |     return preds
 63 | 
 64 | 
 65 | def _unique(t):
 66 | 
 67 |     t_numpy = t.cpu().numpy()
 68 |     t_np_unique = np.unique(t_numpy)
 69 |     t_unique = torch.from_numpy(t_np_unique)
 70 | 
 71 |     unique_tensor = torch.zeros(t_unique.shape) # error prone
 72 |     unique_tensor.copy_(t_unique)
 73 |     return unique_tensor
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | def postprocess(preds, device, confidence, n_classes, nms=True, nms_conf=0.5):
 81 |     """
 82 |     We perform confidence thresholding and nms suppression in this
 83 |     method
 84 |     """
 85 | 
 86 |     # confidence thresholding
 87 | 
 88 |     conf_mask = (preds[:,:,4] > confidence).float().unsqueeze(2)
 89 |     preds = preds*conf_mask
 90 |   
 91 | 
 92 |     # checks for non zero indices. If no non zero index remains 
 93 |     # shape of ind_nz will be (x, 0).  In that case we return 0
 94 |     try:
 95 |         ind_nz = torch.nonzero(preds[:,:,4]).transpose(0,1).contiguous()
 96 |         if ind_nz.size(1) == 0:
 97 |             raise Exception
 98 |     except:
 99 |         return 0
100 |     
101 | 
102 |     # translate the coords from (center_x, center_y, height, width)
103 |     # to (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
104 |     
105 |     box_corners = torch.zeros_like(preds) #error prone
106 |     box_corners[:,:,0] = (preds[:,:,0] - preds[:,:,2]/2)
107 |     box_corners[:,:,1] = (preds[:,:,1] - preds[:,:,3]/2)
108 |     box_corners[:,:,2] = (preds[:,:,0] + preds[:,:,2]/2)
109 |     box_corners[:,:,3] = (preds[:,:,1] + preds[:,:,3]/2)
110 |     preds[:,:,:4] = box_corners[:,:,:4]
111 | 
112 |     batch_size = preds.size(0)
113 | 
114 |     output = torch.zeros(1, preds.size(2) + 1) 
115 |     write = False
116 | 
117 |     for index in range(batch_size):
118 | 
119 |         image_preds = preds[index]
120 | 
121 |         max_conf, max_conf_score = torch.max(image_preds[:,5:5+n_classes], 1) 
122 |         max_conf = max_conf.float().unsqueeze(1)
123 |         max_conf_score = max_conf_score.float().unsqueeze(1)
124 |         _seq = (image_preds[:,:5], max_conf, max_conf_score)
125 |         image_preds = torch.cat(_seq, 1)
126 | 
127 |         non_zero_indices = (torch.nonzero(image_preds[:,4]))
128 | 
129 |         _image_preds = image_preds[non_zero_indices.squeeze(), :].view(-1,7)
130 | 
131 |         try:
132 |             img_classes = _unique(_image_preds[:,-1])
133 |             img_classes = img_classes.to(device)
134 |         except:
135 |             continue
136 | 
137 |         # now we do nms classwise 
138 |         for _class in img_classes:
139 | 
140 |             cls_mask = _image_preds*(_image_preds[:,-1] == _class).float().unsqueeze(1)
141 |             cls_mask_index = torch.nonzero(cls_mask[:, -2]).squeeze()
142 | 
143 |             image_pred_class = _image_preds[cls_mask_index].view(-1, 7)
144 | 
145 |             # sort the detections such that the entry with maximum objectness
146 |             # score is at the top
147 |             conf_sort_index = torch.sort(image_pred_class[:,4], descending=True)[1]
148 |             image_pred_class = image_pred_class[conf_sort_index]
149 |             num_dets = image_pred_class.size(0)
150 | 
151 |             if nms:
152 | 
153 |                 # we run nms for each detection
154 |                 for i in range(num_dets):
155 |                     
156 |                     try:
157 |                         ious = iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:], device)
158 |                     except ValueError:
159 |                         #print("ValueError: at iou calculation")
160 |                         break
161 |                     except IndexError:
162 |                         #print("IndexError: at iou calculation")
163 |                         break
164 | 
165 |                     # zero out all the entries whose iou value exceed the threshold
166 |                     iou_mask = (ious < nms_conf).float().unsqueeze(1)
167 |                     image_pred_class[i+1:] *= iou_mask
168 | 
169 |                     # Remove the zero entries
170 |                     non_zero_idx = torch.nonzero(image_pred_class[:,4]).squeeze()
171 |                     image_pred_class = image_pred_class[non_zero_idx].view(-1, 7)
172 | 
173 |                 
174 |                 batch_inds = torch.zeros(image_pred_class.size(0), 1).fill_(index)
175 |                 batch_inds = batch_inds.to(device)
176 |                 _to_cat = (batch_inds, image_pred_class)
177 | 
178 |                 if not write:
179 |                     output = torch.cat(_to_cat, 1)
180 |                     write = True
181 |                 else:
182 |                     _outs = torch.cat(_to_cat, 1)
183 |                     output = torch.cat((output, _outs))
184 | 
185 | 
186 |     return output
187 | 


--------------------------------------------------------------------------------
/pyvision/face_detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/face_detection/__init__.py


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Facenet


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/face_detection/facenet/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "facenet-vggface2.pt" : "1m_CVVqbGNL-2LSFaVf1mHm_VhoSvVLHe",
3 |     "facenet-casia-webface.pt" : "1JkzXiTWvsMgIz-ViOPicF8_PUrJFYfEM"
4 | }


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/models/__init__.py:
--------------------------------------------------------------------------------
1 | from ....misc.mtcnn import MTCNN


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/readme.md:
--------------------------------------------------------------------------------
 1 | # FaceNet: A Unified Embedding for Face Recognition and Clustering
 2 | 
 3 | FaceNet is one of the most popular face recognition architectures.
 4 | 
 5 | ## Paper
 6 | The paper can be read [here](https://arxiv.org/pdf/1503.03832.pdf).
 7 | 
 8 | ## Summary
 9 | 
10 | - The implementation uses a **Inception-Resnet-v1** architecture to generate the embeddings.
11 | 
12 | - Currently two models pretrained on *casia-webface* and *vggface2* are made available. These weights were originally made available by David Sandberg's implementation [here](https://github.com/davidsandberg/facenet).
13 | 
14 | - For usage details check **Usage**, but to summarize, the implementation essentially exposes functions for embedding generation and embedding verification as well i.e. a basic Facial Recognition pipeline has been made available
15 | 
16 | ## Quick Usage
17 | 
18 | Check [demo](https://github.com/pranjaldatta/PyVision/tree/master/demo/face_detection/facenet).
19 | 
20 | ## Usage
21 | 
22 | - Import facenet.
23 | 
24 | ```
25 | from pyvision.face_detection.facenet import Facenet
26 | ```
27 | 
28 | - Initialize the class. Pretrained for the moment can be casia-webface or vggface2 models.
29 | 
30 | ```
31 | fc = Facenet(pretrianed="casia-webface", saveLoc="save", saveName="det.pkl")
32 | ```
33 | 
34 | - Now we gotta generate embeddings and store the embeddings for comparison. For this we use the **generate_embeddings()** function. There are two ways images can be supplied to this function:
35 | 
36 |     1. Pass a directory containing images. In that case, the individual image names will be used as image labels
37 | 
38 |     2. Pass a singular image/path. In this case, a  label has to be passed by the user. This gives the most flexibility and hence is recommended.
39 | 
40 |     Also, the *save* parameter can be used to specify a custom location for a given embedding that is different than the one specified during model init.
41 | 
42 |     Also it returns a list of dicts containing labels and their associated embeddings.
43 | 
44 | 
45 | ```
46 | embeddings = fc.generate_embeddings(...)
47 | ```
48 | 
49 | - Now to run "recognition" on an image, we use the **verify_embeddings()** function. Unline the generate_embeddings() function, this function only accepts singular image or image paths i.e. no directories are allowed.
50 | A few things to note regarding the function:
51 | 
52 |     1. Embeddings can either be passed directly as a parameter (a list of dicts) or a path to a stored embedding can be passed.
53 |     
54 |     2. The comparison function uses *l2_norm* to calculate distances between embeddings. Other distance calculation metrics like *cosine_similarity* can be added in the future.
55 | 
56 |     3. The *compare_embeddings()* function needs to be supplied with a label and the function will check whether the given embedding is *similar* to the previously known embeddings associated with the supplied label. 
57 | 
58 |     4. Return a tuple (True/False, prediction, min_l2_loss)
59 | 
60 | ```
61 | did_match, pred_label, l2_loss = fc.compare_embeddings(...)
62 | ```
63 | 
64 | - For more details look [tests](https://github.com/pranjaldatta/PyVision/tree/master/tests/face_detection/facenet).
65 | 
66 | ## Note
67 | While implementing the pretrained models, it was found that often in many cases classifications were not accurate. So it is recommended that care is taken while using facenet.
68 | 


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/face_detection/facenet/utils/__init__.py


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/utils/extract_face.py:
--------------------------------------------------------------------------------
  1 | import os  
  2 | import cv2 
  3 | from PIL import Image
  4 | import numpy as np
  5 | 
  6 | import torch 
  7 | import torch.nn as nn 
  8 | import torch.nn.functional as F  
  9 | 
 10 | 
 11 | def crop_and_tensorify(img, box, size=160, margin=0, save=None, show=True):
 12 |     """Extract face + margin from PIL Image given bounding box coordinates
 13 | 
 14 |     Arguments:
 15 |     -> img: PIL Image from which faces have to be extracted
 16 |     -> box: Bounding box coordinates in (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
 17 |     -> size: size of the crop
 18 |     -> margin: around bounding boxes
 19 |     -> save: location to save the crop
 20 |     -> show: show the crops 
 21 | 
 22 |     Returns: 
 23 |     -> torch.Tensor: The face in a tensor format
 24 |     """
 25 |     if not isinstance(img, Image.Image):
 26 |         raise TypeError("PIL Image accepted. Got img of type: ", type(img))
 27 |     
 28 |     box = box[:4]
 29 | 
 30 |     margin = [
 31 |         margin * (box[2] - box[0]) / (size - margin),
 32 |         margin * (box[3] - box[1]) / (size - margin)
 33 |     ]
 34 | 
 35 |     box = [
 36 |         int(max(box[0] - margin[0]/2 , 0)),
 37 |         int(max(box[1] - margin[1]/2 , 0)),
 38 |         int(min(box[2] + margin[0]/2 , img.size[0])),
 39 |         int(min(box[3] + margin[1]/2 , img.size[1]))
 40 |     ]
 41 |  
 42 |     face = img.crop(box).resize((size, size), Image.BILINEAR)
 43 | 
 44 |     if save is not None:
 45 |         face.save(save+"/detection.png")
 46 |     if show:
 47 |         face.show()
 48 |     
 49 |     face = torch.tensor(np.float32(face))
 50 |     
 51 |     return face 
 52 | 
 53 | def prewhiten_func(x):
 54 |     mean = x.mean()
 55 |     std = x.std()
 56 |     std_adj = std.clamp(min=1.0/(float(x.numel())**5))
 57 |     y = (x - mean) / std_adj
 58 |     return y
 59 |         
 60 | 
 61 | def extract_face(mtcnn_module, img, prewhiten=True, conf_thresh=.6):
 62 |     """
 63 |     extract_face takes in a PIL or cv2 image or a path to an image.
 64 |     Runs MTCNN on the image to detect the face, crop the faces, convert 
 65 |     to tensor and return a tensor and the associated face confidences 
 66 | 
 67 |     Argument:
 68 |     -> img: PIL or cv2 Image. Can be a path to
 69 |     -> conf_thresh: Minimum confidence threshold for MTCNN
 70 | 
 71 |     Returns:
 72 |     -> face_tensors, props = cropped faces converted into tensors and their 
 73 |                               associated confidences repectively
 74 |     """
 75 |     
 76 |     if mtcnn_module is None:
 77 |         raise ValueError("mtcnn_module cannot be None")
 78 |     
 79 |     if isinstance(img, str):
 80 |         img = Image.open(img)
 81 |     elif isinstance(img, np.ndarray):
 82 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 83 |         img = Image.fromarray(img)
 84 |     
 85 |     # generating detections 
 86 |     detections = mtcnn_module.detect(img)
 87 | 
 88 |     # crop every face, convert to tensor
 89 |     faces_list = []
 90 |     for detection in detections:
 91 | 
 92 |         face = crop_and_tensorify(img, detection, show=False)
 93 |         if prewhiten:
 94 |             face = prewhiten_func(face)
 95 |         faces_list.append(face)
 96 | 
 97 |     faces_list = torch.stack(faces_list)    
 98 | 
 99 |     return faces_list # return face detections probs also
100 | 
101 |     
102 |     
103 |     
104 |     
105 |     
106 |         
107 | 
108 |     


--------------------------------------------------------------------------------
/pyvision/face_detection/facenet/utils/layer_factory.py:
--------------------------------------------------------------------------------
  1 | import torch 
  2 | import torch.nn as nn  
  3 | import torch.nn.functional as F  
  4 | 
  5 | import os  
  6 | import numpy as np  
  7 | 
  8 | class BasicConv2d(nn.Module):
  9 | 
 10 |     def __init__(self, in_channels, out_channels, size, stride, padding=0):
 11 | 
 12 |         super().__init__()
 13 |         
 14 |         self.conv = nn.Conv2d(in_channels, out_channels, size, 
 15 |                     stride, padding, bias=False)
 16 |         # batch normalize values are defined the Sandberg Implementation
 17 |         self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.1, affine=True)
 18 |         self.relu_fn = nn.ReLU(inplace=False)
 19 | 
 20 |     def forward(self, x):
 21 |         x = self.conv(x)
 22 |         x = self.bn(x)
 23 |         x = self.relu_fn(x)
 24 | 
 25 |         return x
 26 | 
 27 | class Block35_A(nn.Module):
 28 |     """
 29 |     Builds the 32x32 block. (Referred in the paper as Inception-
 30 |     Resnet-A)
 31 |     """
 32 |     def __init__(self, scale=1.0):
 33 | 
 34 |         super().__init__()
 35 | 
 36 |         self.scale = scale
 37 | 
 38 |         # now we construct the different branches. 
 39 |         # Refer to Inception-Resnet-A diagram in the paper
 40 |         self.branch0 = BasicConv2d(256, 32, 1, 1)
 41 |         
 42 |         self.branch1 = nn.Sequential(
 43 |             BasicConv2d(256, 32, 1, 1),
 44 |             BasicConv2d(32, 32, 3, 1, 1)
 45 |         )
 46 | 
 47 |         self.branch2 = nn.Sequential(
 48 |             BasicConv2d(256, 32, 1, 1),
 49 |             BasicConv2d(32, 32, 3, 1, 1),
 50 |             BasicConv2d(32, 32, 3, 1, 1)
 51 |         )
 52 | 
 53 |         self.conv2d = nn.Conv2d(96, 256, 1, 1)
 54 |         self.relu_fn = nn.ReLU(inplace=False)
 55 |     
 56 |     def forward(self, x):
 57 | 
 58 |         x0 = self.branch0(x)
 59 |         x1 = self.branch1(x)
 60 |         x2 = self.branch2(x)
 61 |         
 62 |         x_cat = torch.cat((x0, x1, x2), 1)
 63 | 
 64 |         out = self.conv2d(x_cat)
 65 |         out = out * self.scale + x
 66 |         out = self.relu_fn(out)
 67 | 
 68 |         return out
 69 | 
 70 | class Block17_B(nn.Module):
 71 |     """
 72 |     Builds the 17x17 Block. (referred to as Inception-Resnet-B) 
 73 |     """
 74 |     def __init__(self, scale=1.0):
 75 |         
 76 |         super().__init__()
 77 |         
 78 |         self.scale = scale  
 79 |         
 80 |         self.branch0 = BasicConv2d(896, 128, 1, 1)
 81 | 
 82 |         self.branch1 = nn.Sequential(
 83 |             BasicConv2d(896, 128, 1, 1),
 84 |             BasicConv2d(128, 128, size=(1, 7), stride=1, padding=(0, 3)),
 85 |             BasicConv2d(128, 128, size=(7, 1), stride=1, padding=(3, 0))
 86 |         )
 87 | 
 88 |         self.conv2d = nn.Conv2d(256, 896, 1, 1)
 89 |         self.relu_fn = nn.ReLU(inplace=False)
 90 | 
 91 |     def forward(self, x):
 92 | 
 93 |         x0 = self.branch0(x)
 94 |         x1 = self.branch1(x)
 95 |         
 96 |         x_cat = torch.cat((x0, x1), 1)
 97 | 
 98 |         out = self.conv2d(x_cat)
 99 |         out = out * self.scale + x
100 |         out = self.relu_fn(out)
101 | 
102 |         return out
103 | 
104 | class Block8_C(nn.Module):
105 |     """
106 |     Implements the 8x8 Block. (Referred to as Inception-Resnet-C in the paper) 
107 |     """
108 |     def __init__(self, scale=1.0, relu=True):
109 | 
110 |         super().__init__()
111 |         
112 |         self.scale = scale
113 |         self.relu = relu
114 | 
115 |         self.branch0 = BasicConv2d(1792, 192, 1, 1)
116 | 
117 |         self.branch1 = nn.Sequential(
118 |             BasicConv2d(1792, 192, 1, 1),
119 |             BasicConv2d(192, 192, (1, 3), 1, (0, 1)),
120 |             BasicConv2d(192, 192, (3, 1), 1, (1, 0))
121 |         )
122 | 
123 |         self.conv2d = nn.Conv2d(384, 1792, 1, 1)
124 |         if self.relu:
125 |             self.relu_fn = nn.ReLU(inplace=False)
126 |     
127 |     def forward(self, x):
128 | 
129 |         x0 = self.branch0(x)
130 |         x1 = self.branch1(x)
131 | 
132 |         x_cat = torch.cat((x0, x1), 1)
133 |         out = self.conv2d(x_cat)
134 |         out = out * self.scale + x
135 |         if self.relu:
136 |             out = self.relu_fn(out)
137 |         
138 |         return out
139 | 
140 | 
141 | class Reduction_A(nn.Module):
142 |     """
143 |     Builds the Reduction A module. Refer to paper for details 
144 |     """
145 |     def __init__(self):
146 | 
147 |         super().__init__()
148 | 
149 |         self.branch0 = BasicConv2d(256, 384, 3, 2)
150 | 
151 |         self.branch1 = nn.Sequential(
152 |             BasicConv2d(256, 192, 1, 1),
153 |             BasicConv2d(192, 192, 3, 1, 1),
154 |             BasicConv2d(192, 256, 3, 2)
155 |         )
156 | 
157 |         self.branch2 = nn.MaxPool2d(3, stride=2)
158 | 
159 |     def forward(self, x):
160 | 
161 |         x0 = self.branch0(x)
162 |         x1 = self.branch1(x)
163 |         x2 = self.branch2(x)
164 | 
165 |         out = torch.cat((x0, x1, x2), 1)
166 | 
167 |         return out
168 | 
169 | class Reduction_B(nn.Module):
170 |     """
171 |     Builds Reduction B module. For more details check the paper 
172 |     """
173 |     def __init__(self):
174 | 
175 |         super().__init__()
176 | 
177 |         self.branch0 = nn.Sequential(
178 |             BasicConv2d(896, 256, 1, 1),
179 |             BasicConv2d(256, 384, 3, 2)
180 |         )
181 | 
182 |         self.branch1 = nn.Sequential(
183 |             BasicConv2d(896, 256, 1, 1),
184 |             BasicConv2d(256, 256, 3, 2)
185 |         )
186 | 
187 |         self.branch2 = nn.Sequential(
188 |             BasicConv2d(896, 256, 1, 1),
189 |             BasicConv2d(256, 256, 3, 1, 1),
190 |             BasicConv2d(256, 256, 3, 2)
191 |         )
192 | 
193 |         self.branch3 = nn.MaxPool2d(3, 2)
194 | 
195 |     def forward(self, x):
196 | 
197 |         x0 = self.branch0(x)
198 |         x1 = self.branch1(x)
199 |         x2 = self.branch2(x)
200 |         x3 = self.branch3(x)
201 |         out = torch.cat((x0, x1, x2, x3), 1)
202 |         return out


--------------------------------------------------------------------------------
/pyvision/gans/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/__init__.py


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/README.md:
--------------------------------------------------------------------------------
  1 | # Deep Convolutional GAN
  2 | This is an implementation of the research paper <a href = "https://arxiv.org/abs/1511.06434.pdf">"Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks"</a> written by Alec Radford, Luke Metz, Soumith Chintala.
  3 | 
  4 | Check out this <a href = "https://colab.research.google.com/drive/1rz1NZK0m0b5xxcLrgtEOpvIsl3aEfUtJ?usp=sharing">notebook</a> and run the DC_GAN inferences in just 3 lines.
  5 | 
  6 | ## Dependencies
  7 | - torch==1.8.0
  8 | - torchvision==0.9.0
  9 | - numpy==1.20.3
 10 | - matplotlib==3.3.4
 11 | - IPython==7.23.1
 12 | - gdown==3.13.0
 13 | 
 14 | ## Dataset
 15 | The original paper had used three datasets for training the DCGAN namely - *Large-scale Scene Understanding (LSUN) (Yu et al., 2015), Imagenet-1k and a newly assembled Faces dataset*. However due to computational and other limitations, we have used <a href = "http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html">Large-scale CelebFaces Attributes (CelebA) Dataset</a>.
 16 | 
 17 | ### Guidelines to download, setup and use the dataset
 18 | The CelebA dataset may be downloaded <a href = "https://drive.google.com/file/d/1yW6QkWcd6sWYB2rw9d-A36woiXVLTpny/view?usp=sharing">here</a> as a file named *img_align_celeba.zip*. 
 19 | 
 20 | **Please write the following commands on your terminal to extract the file in the proper directory**
 21 | ```
 22 | $ mkdir celeba
 23 | $ unzip </path/to/img_align_celeba.zip> -d </path/to/celeba>
 24 | ```
 25 | The resulting directory structure should be:
 26 | ```
 27 | /path/to/celeba
 28 |     -> img_align_celeba
 29 |         -> 188242.jpg
 30 |         -> 173822.jpg
 31 |         -> 284702.jpg
 32 |         -> 537394.jpg
 33 |            ...
 34 | ```
 35 | <br>
 36 | 
 37 | **Note**: You may use any other dataset of your choice. However, please ensure that the directory structure remains the same for the code to be compatible with it.
 38 | 
 39 | ## Quick Start    
 40 | - Incase you want to use some other dataset to train the DCGAN (with 1 GPU), please initialize the DCGAN module with your desired dataset path and train as:
 41 | 
 42 | ```python
 43 | from pyvision.gans.deep_convolutional_gan.dcgan import DCGAN
 44 | 
 45 | dc_gan = DCGAN(data = <path/to/dataset>)
 46 | img_list, G_losses, D_losses = dc_gan.train(<path/to/save/model>)
 47 | ```
 48 | 
 49 | - Incase you have either no GPU (0) or more than 1 GPU on your machine, consider changing the ngpu parameter while initializing the DCGAN module with your desired dataset path and train as:
 50 | 
 51 | 
 52 | ```python
 53 | from pyvision.gans.deep_convolutional_gan.dcgan import DCGAN
 54 | 
 55 | dc_gan = DCGAN(data = <path/to/dataset>, ngpu = <number of GPUs available>)
 56 | img_list, G_losses, D_losses = dc_gan.train(<path/to/save/model>)
 57 | ```
 58 | 
 59 | **Note**: Is is advisable to use a GPU for training because training the DCGAN is computationally very expensive.
 60 | 
 61 | - To get the inferences directly with our pre-trained model please initialize the DeepConvGAN with the desired path to the model and get the inferences as:
 62 | 
 63 | ```python
 64 | 
 65 | from pyvision.gans.deep_convolutional_gan import DeepConvGAN
 66 | 
 67 | DeepConvGAN.inference(DeepConvGAN, set_weight_dir='dcgan-model.pth' , set_gen_dir='<path/to/save/inferences>')
 68 | ```
 69 | 
 70 | ## Tests
 71 | To run tests from PyVision root, run,
 72 | 
 73 |     $ python tests/gans/deep_convolutional_gan/gan_test.py
 74 | 
 75 | ## Results from implementation
 76 | - Plot to see how D and G’s losses changed during training
 77 | 
 78 | <img src = "/pyvision/gans/deep_convolutional_gan/results/losses.png">
 79 | 
 80 | - Batches of fake data from G
 81 | 
 82 | <img src = "/pyvision/gans/deep_convolutional_gan/results/result.png" height = 350px width = 350px> &nbsp; &nbsp; <img src = "/pyvision/gans/deep_convolutional_gan/results/result2.png" height = 350px width = 350px>
 83 | 
 84 | Check out the documentation <a href = "https://github.com/indiradutta/PyVision/blob/master/pyvision/gans/deep_convolutional_gan/docs/documentation.md">here</a>.
 85 | 
 86 | ### Citation
 87 | ``` 
 88 | @inproceedings{liu2015faceattributes,
 89 |  title = {Deep Learning Face Attributes in the Wild},
 90 |  author = {Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
 91 |  booktitle = {Proceedings of International Conference on Computer Vision (ICCV)},
 92 |  month = {December},
 93 |  year = {2015} 
 94 | }
 95 | ```
 96 | 
 97 | ## Contributed by:
 98 | - <a href = "https://github.com/indiradutta">Indira Dutta</a>
 99 | - <a href = "https://github.com/srijarkoroy">Srijarko Roy</a>
100 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import DeepConvGAN
2 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/config/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"dcgan-model.pth": "1EMm3rdVZvNoT2y4VtULFNzIwkIQslWQT"
3 | }
4 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/dcgan/__init__.py:
--------------------------------------------------------------------------------
1 | from .dcgan import DCGAN, Generator, Discriminator


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/docs/documentation.md:
--------------------------------------------------------------------------------
 1 | ## Model Components
 2 | The DCGAN Architecture has the following components:
 3 | 
 4 | - The Generator uses fractional-strided convolutions followed by batch normalisation and ReLU activation for all layers except for the last that uses tanh activation.
 5 | - The Discriminator uses strided convolutions followed by batch normalisation and LeakyReLU activation for all layers except for a single sigmoid output.
 6 | <img src="https://miro.medium.com/max/846/1*rdXKdyfNjorzP10ZA3yNmQ.png" >
 7 | 
 8 | ## Parameters
 9 | 
10 | Parameter |  &nbsp;&nbsp;&nbsp;&nbsp; Value &nbsp;&nbsp;&nbsp;&nbsp; |
11 | :------------: | :---: |
12 | batch_size | 128 |
13 | image_size | 64 |
14 | nc | 3 |
15 | nz | 100 |
16 | ngf | 64 |
17 | ndf | 64 |
18 | num_epochs | 5 |
19 | lr | 0.0002 |
20 | beta1 | 0.5 |
21 | ngpu | 1 |
22 | 
23 | ## Result Documentation
24 | After running *DCGAN* on the CelebA Dataset for 5 epochs on GPU (computationally very expensive) we got the following output images along with the Generator and Discriminator losses.
25 | 
26 | ## Batch of images from the Generator after 5 epochs 
27 | <img src="/pyvision/gans/deep_convolutional_gan/results//result2.png">
28 | 
29 | ## Losses after each epoch
30 | No. of Epochs | Generator Loss | Discriminator Loss |
31 | :------------: | :------------: | :------------: |
32 | 1 | 0.7894 | 1.0838 |
33 | 2 | 0.7277 | 1.0489 |
34 | 3 | 0.7796 | 0.9256 |
35 | 4 | 0.6330 | 1.1345 |
36 | 5 | 0.7519 | 1.0138 |
37 | 
38 | ## Plot for Generator Loss and Discriminator Loss w.r.t number of iterations
39 | <img src="/pyvision/gans/deep_convolutional_gan/results/losses.png">
40 | 
41 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.parallel
  4 | import torch.backends.cudnn as cudnn
  5 | import torch.optim as optim
  6 | import torch.utils.data
  7 | 
  8 | import torchvision.datasets as dset
  9 | import torchvision.transforms as transforms
 10 | import torchvision.utils as vutils
 11 | 
 12 | import numpy as np
 13 | import os
 14 | import json
 15 | import gdown
 16 | 
 17 | import matplotlib.pyplot as plt
 18 | import matplotlib.animation as animation
 19 | from IPython.display import HTML
 20 | 
 21 | from .dcgan import Generator
 22 | 
 23 | __PREFIX__ = os.path.dirname(os.path.realpath(__file__))
 24 | 
 25 | class DeepConvGAN(object):
 26 | 
 27 |     def __init__(self, nc = 3, nz = 100, ngf = 64, ngpu = 1):
 28 | 
 29 |         ''' 
 30 |         The constructor has the Parameters which are going to be used to generate the images
 31 | 
 32 |         Parameters:
 33 | 
 34 |         - nc(default: 3): number of color channels in an image, we have used 3 channels(RGB).
 35 | 
 36 |         - nz(default: 100): length of the latent vector that is initially passed into the Generator, according to the paper it is 100.
 37 | 
 38 |         - ngf(default: 64):  denotes the depth of the feature maps passed through the Generator, according to the paper it is 64.
 39 | 
 40 |         - ndf(default: 64): denotes the depth of the feature maps passed through the Discriminator, according to the paper it is 64.
 41 | 
 42 |         - ngpu(default: 1): number of GPUs available for training. If no GPU is available, the model will train on CPU. Here, we have only 1 GPU available.
 43 |         '''
 44 | 
 45 |         if ngpu > 0 and not torch.cuda.is_available():
 46 |             raise ValueError('ngpu > 0 but cuda not available')
 47 | 
 48 |         self.nc = nc
 49 |         self.nz = nz
 50 |         self.ngf = ngf
 51 |         self.ngpu = ngpu
 52 |         self.device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
 53 | 
 54 |     def inference(self, set_weight_dir = 'dcgan-model.pth', set_gen_dir = 'result_img'):
 55 | 
 56 |         set_weight_dir = __PREFIX__ + "/weights/" + set_weight_dir
 57 | 
 58 |         ''' saving generated images in a directory '''
 59 |         def save_image(set_gen_dir):
 60 |             if os.path.exists(set_gen_dir):
 61 |                 print("Found directory for saving generated images")
 62 |                 return 1
 63 |             else:
 64 |                 print("Directory for saving images not found, making a directory named 'result_img'")
 65 |                 os.mkdir(set_gen_dir)
 66 |                 return 1
 67 |         
 68 |         ''' checking if weights are present '''
 69 |         def check_weights(set_weight_dir):
 70 |             if os.path.exists(set_weight_dir):
 71 |                 print("Found weights")
 72 |                 return 1
 73 |             else:
 74 |                 print("Downloading weights")
 75 |                 download_weights()
 76 | 
 77 |         ''' downloading weights if not present '''
 78 |         def download_weights():
 79 |             with open(__PREFIX__+"/config/weights_download.json") as fp:
 80 |                 json_file = json.load(fp)
 81 |                 if not os.path.exists(__PREFIX__+"/weights/"):
 82 |                     os.mkdir(__PREFIX__+"/weights/")
 83 |                 url = 'https://drive.google.com/uc?id={}'.format(json_file['dcgan-model.pth'])
 84 |                 gdown.download(url, __PREFIX__+"/weights/dcgan-model.pth", quiet=False)
 85 |                 set_weight_dir = "dcgan-model.pth"
 86 |                 print("Download finished")
 87 | 
 88 |         ''' checking if weights are present '''
 89 |         check_weights(set_weight_dir)
 90 | 
 91 |         '''saving the generated images '''
 92 |         save_image(set_gen_dir)
 93 | 
 94 |         '''calling the DCGAN for inference '''
 95 |         model_GAN = Generator(1, 100, 64, 3)
 96 | 
 97 |         ''' uploading the model '''
 98 |         checkpoint = torch.load(set_weight_dir)
 99 |         model_GAN.load_state_dict(checkpoint['generator_state_dict'])
100 |         model_GAN.eval()
101 | 
102 |         ''' saving the generated images'''
103 |         def save_new_img():
104 | 
105 |             b_size = 512
106 |             noise = torch.randn(b_size, 100, 1, 1)
107 |             out = model_GAN(noise).detach().cpu()
108 |             print("The generated images are saved in the given directory")
109 | 
110 |             ''' saving the generated images in a list '''
111 |             img_list = []
112 |             for i in range(b_size):
113 |                 img_list.append(out[i,:,:,:])
114 |             
115 |             ''' saving the generated images in jpg format '''
116 |             for i in range(len(img_list)):
117 |                 generated_image = '{}/generated_image_{}.jpg'.format(set_gen_dir,i)
118 |                 vutils.save_image(img_list[i], generated_image, padding = 0)            
119 | 
120 |         save_new_img()
121 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/losses.png


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/result.png


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/result2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/result2.png


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_0.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_1.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_10.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_11.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_12.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_13.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_14.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_15.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_2.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_3.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_4.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_5.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_6.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_7.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_8.jpg


--------------------------------------------------------------------------------
/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/deep_convolutional_gan/results/results_img/generated_image_9.jpg


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/README.md:
--------------------------------------------------------------------------------
 1 | # Wasserstein GAN
 2 | 
 3 | This is a PyTorch 1.5.0 implementation of WGAN.
 4 | 
 5 | Check out the paper [here](https://arxiv.org/pdf/1701.07875.pdf).
 6 | 
 7 | **Requirements:**
 8 | 
 9 | 1. Python 3.6+ 
10 | 2. Numpy 1.18.5
11 | 3. PyTorch 1.5+
12 | 4. Gdown 3.11.0
13 | 5. Matplotlib 3.2.1
14 | 6. CUDA - 10.1
15 | 
16 | **Dataset**
17 | 
18 | CelebA was used for the training of this model, which can be downloaded at [this httpURL](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html). 
19 | 
20 | 1. If you wish to use your own dataset, the structure should be "datasets/sub_dir/img.png". The dataset will download as a file named img_align_celeba.zip.
21 | 
22 | 2. Once downloaded, create a directory named **celeba/** and extract the zip file into that directory.
23 | 
24 | 3. The resulting directory structure should be:
25 |     ```
26 |        /path/to/celeba
27 |          -> img_align_celeba
28 |              -> 188242.jpg
29 |              -> 173822.jpg
30 |              -> 284702.jpg
31 |              -> 537394.jpg
32 |     ```
33 | 
34 | This is an important step because we will be using the ImageFolder dataset class, which requires there to be subdirectories in the dataset�s root folder.
35 | 
36 | **Usage**
37 | 
38 | ```python
39 | from model import WassGAN
40 | 
41 | # To train the GAN with default parameters
42 | WassGAN(run_type="train")
43 | 
44 | # To run inference using the GAN
45 | WassGAN()
46 | ```
47 | 
48 | **Train**
49 | 
50 | To train on your own dataset:
51 | 
52 | 1. Specify dataset path in wgan.py " dataroot = 'path' " line 44.
53 | 2. You can change other parameters such as batch_size, etc but we suggest to use the ones already provided.
54 | 3. Please change the number of workers defined in wgan.py " workers = " line 49. 
55 | 4. More parameters regarding training length, learning rate, etc can be changed in train.py, starting line 178.
56 | 5. Number of epochs can be changed by altering the n_epoch in model.py, line 74.
57 | 
58 | ## Inference
59 | 
60 | ##### Weights from a pretrained model on CelebA will be downloaded automatically if not specified elsewise.
61 | 
62 | In order to run inference on your own trained model:
63 | 
64 | 1. Change set_ckpt_dir in model.py
65 | 2. You can change the number of images generated by changing the "len" parameter in model.py, line 57
66 | 
67 | ## Training details
68 | 
69 | * Number of epochs: 135
70 | * Learning rate: 0.00001
71 | * Clamp size: 0.01
72 | * Batch size: 64
73 | * Gpu Used: Nvidia 1660ti 6GB
74 | * Training time: 9 Hrs
75 | 
76 | ### Current output
77 | 
78 | ![Image](current_output_imgs/test36.png)
79 | 


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import WassGAN


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/wasserstein_gan/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"WGAN-disc.pt": "17yn3z1lYY2HevRrfAOOzEFVqNX68lld8",
3 | 	"WGAN-gen.pt": "17SP_KIS1iL_kdk0B45UWRe6SBqiXIszt",
4 | 	"WGAN-stats.pkl": "1E8zIl4tDwRmntGy8x2yvB5h9hLXZpiNN"
5 | }
6 | 


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/current_output_imgs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/wasserstein_gan/current_output_imgs/__init__.py


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/current_output_imgs/test36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/gans/wasserstein_gan/current_output_imgs/test36.png


--------------------------------------------------------------------------------
/pyvision/gans/wasserstein_gan/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import torch.nn.functional as F
  5 | import torchvision.utils
  6 | import numpy as np
  7 | import argparse
  8 | import os
  9 | import subprocess as sp
 10 | from .wgan import *
 11 | import json
 12 | import gdown
 13 | from .train import *
 14 | 
 15 | __PREFIX__ = os.path.dirname(os.path.realpath(__file__))
 16 | 
 17 | class WassGAN:
 18 | 
 19 |     
 20 |     def __init__(self, run_type = "inference"):
 21 |         print("run_type = ",run_type)
 22 |         if run_type == "inference":
 23 |             #self.inference()
 24 |             pass
 25 | 
 26 | 
 27 |         elif run_type == "train":
 28 |             #self.train(train_params, ckpt_params, gan_params, n_epoch, data_loader)
 29 |             pass
 30 | 
 31 | 
 32 |     def train(self, train_params, ckpt_params, gan_params,  n_epoch, data_loader):
 33 |         
 34 |         raise NotImplementedError("training mode not supported")
 35 |         
 36 |         model = CelebA(train_params, ckpt_params, gan_params)
 37 |         data_loader = wgan.load_dataset()
 38 | 
 39 |         torch.manual_seed(100)
 40 |         n_epoch = 135  # Number of epochs to train for
 41 |         model.train(n_epoch, data_loader)
 42 |     
 43 |     def inference(self, set_ckpt_dir="WGAN-gen.pt", set_gen_dir="gen", device="cpu"):
 44 | 
 45 |         set_ckpt_dir = __PREFIX__ + "/weights/" + set_ckpt_dir
 46 |         
 47 |         if device is not "cpu":
 48 | 
 49 |             if not torch.cuda.is_available():
 50 |                 raise ValueError("cuda not available but got device=", device)
 51 |             device = "cuda"
 52 | 
 53 | 
 54 |         def gen(set_gen_dir):
 55 |             #set_gen_dir = "gen"  # path to save img directory
 56 |             if os.path.exists(set_gen_dir):
 57 |                     print("Found gen directory")
 58 |                     return 1
 59 |             else:
 60 |                 print("Directory for saving images not found, making one")
 61 |                 os.mkdir(set_gen_dir)
 62 |                 set_gen_dir = "gen"
 63 |                 return 1
 64 | 
 65 |         def check_weights():
 66 |             if os.path.exists(set_ckpt_dir):
 67 |                 print("Found weights")
 68 |                 return 1
 69 |             else:
 70 |                 print("Downloading weigths")
 71 |                 download_weights()
 72 | 
 73 |         def download_weights():
 74 |             with open(__PREFIX__+"/config/weights_download.json") as fp:
 75 |                 json_file = json.load(fp)
 76 |                 if not os.path.exists(__PREFIX__+"/weights/"):
 77 |                     os.mkdir(__PREFIX__+"/weights/")
 78 |                 url = 'https://drive.google.com/uc?id={}'.format(json_file['WGAN-gen.pt'])
 79 |                 gdown.download(url, __PREFIX__+"/weights/WGAN-gen.pt", quiet=False)
 80 |                 set_ckpt_dir = "WGAN-gen.pt"
 81 |                 print("Download finished")
 82 | 
 83 |         #device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 84 |         check_weights()
 85 |         gen(set_gen_dir)
 86 |         gan = WGAN(device=device)
 87 |         gan.eval()
 88 |         gan = gan.to(device)
 89 |         gan.load_model(filename=set_ckpt_dir)
 90 | 
 91 |         def save_new_img():
 92 |             len = 20  # number of images to be generated
 93 |             for i in range(len):
 94 |                 vec = gan.create_latent_var(1, random.randint(1, 200))  # batch, seed value
 95 |                 img = gan.generate_img(vec)
 96 |                 img = unnormalize(img)
 97 |                 fname_in = '{}/frame{}.png'.format(set_gen_dir, i)
 98 |                 torchvision.utils.save_image(img, fname_in, padding=0)
 99 |             print("All images are saved in gen")
100 | 
101 |         save_new_img()
102 |     


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/content1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content1.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/content2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content2.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/content3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content3.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/content4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content4.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/content5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content5.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/content6.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/content6.jpeg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/style1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/style1.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/style6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/style6.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/images/style7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/images/style7.jpg


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content1+style6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content1+style6.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content2+style1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content2+style1.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content3+style6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content3+style6.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style1.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content4+style7.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content5+style1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content5+style1.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/Examples/output/content6+style7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/NeuralStyleTransfer/Examples/output/content6+style7.png


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Style Transfer
 2 | 
 3 | An implementation of the paper _A Neural Algorithm of Artistic Style_
 4 | The paper can be read [here](https://arxiv.org/pdf/1508.06576.pdf).
 5 | 
 6 | The idea is to extract the _content_ from one image, the 'content image', and the _style_ or _texture_ from another image, the 'style image', to get a single output which has a combination of the two.
 7 | 
 8 | To check out a notebook demonstrating how you can use the Neural Style Transfer Module in 3 lines, check [here](https://github.com/pranjaldatta/PyVision/blob/master/demo/misc/NeuralStyleTransfer/nst_demo.ipynb).
 9 | 
10 | If the link above does not work check [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/misc/NeuralStyleTransfer/nst_demo.ipynb).
11 | 
12 | ## A Few details about the implementation
13 | 
14 | - By default, due to computational limitations, both style and content images are resized to 512x512 if using a GPU or 128x128 if on a CPU. If the *retain_dims* is set to True, the output is **UPSAMPLED** to the original content image dimensions but this upsampling especially for 128x128 images reduces quality.
15 | 
16 | - This behavior can be disabled by setting the param *downsample* to False. This ensures that the style image is resized to the size of the content image and style transfer is run with original content image dimensions. 
17 | 
18 | **Note**: Using Neural Style Transfer is computationally expensive so it is recommended to use GPU for optimal timing.
19 | 
20 | ## Quick Start
21 | 
22 | - Using default settings, to run style transfer on a CPU or GPU
23 | 
24 | ```python
25 | from pyvision.misc.NeuralStyleTransfer import NeuralStyle
26 | 
27 | style_img, content_img = (<path to style img or style img>, 'path to content img or content img')
28 | 
29 | nst = Neural_Style(save = "output.jpg")
30 | 
31 | output, time_taken = nst.run_style_transfer(style_img, content_img)
32 | ```
33 | 
34 | - To disable downsampling and run style transfer on original content image dimensions,
35 | 
36 | ```python
37 | from pyvision.misc.NeuralStyleTransfer import NeuralStyle
38 | 
39 | style_img, content_img = (<path to style img or style img>, 'path to content img or content img')
40 | 
41 | nst = Neural_Style(save = "output.jpg", downsample=False, use_gpu=True)
42 | 
43 | output, time_taken = nst.run_style_transfer(style_img, content_img)
44 | ```
45 | 
46 | ## Examples
47 | 
48 | For more examples, check [Examples](https://github.com/pranjaldatta/PyVision/tree/nst/pyvision/misc/NeuralStyleTransfer/Examples).
49 | 
50 | Content Image | Style Image | Result |
51 | :-------------: | :---------: | :-----: |
52 | <img src="Examples/images/content2.jpg" height=200 width=200>| <img src="Examples/images/style1.jpg" height=200 width=200>| <img src="Examples/output/content2+style1.png" height=200 width=200> |
53 | <img src="Examples/images/content5.jpg" height=200 width=200>| <img src="Examples/images/style1.jpg" height=200 width=200>| <img src="Examples/output/content5+style1.png" height=200 width=200> |
54 | <img src="Examples/images/content6.jpeg" height=200 width=200>| <img src="Examples/images/style7.jpg" height=200 width=200>| <img src="Examples/output/content6+style7.png" height=200 width=200> |
55 | 
56 | 


--------------------------------------------------------------------------------
/pyvision/misc/NeuralStyleTransfer/__init__.py:
--------------------------------------------------------------------------------
1 | from .neural_style import NeuralStyle


--------------------------------------------------------------------------------
/pyvision/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/__init__.py


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Multi Task Cascaded Convolutional Neural Network in PyTorch (MTCNN)
 2 | 
 3 | State of the art face and facial-landmark detection architecture.
 4 | 
 5 | ## Paper
 6 | 
 7 | Read the paper [here](https://arxiv.org/pdf/1604.02878.pdf).
 8 | 
 9 | ## Contributed By
10 | 
11 | - [Sashrika Surya](https://github.com/sashrika15)
12 | 
13 | - [Pranjal Datta](https://github.com/pranjaldatta)
14 | 
15 | ## Tests
16 | 
17 | **All tests passing.**
18 | 
19 | To check, from PyVision root, run:
20 | 
21 | ```
22 | python tests/misc/mtcnn/mtcnn_test.py
23 | ```
24 | 
25 | ## Usage
26 | 
27 | This Usage guide assumes that the PyVision repository has already been cloned. If not follow instructions given in PyVision repository root and clone the repository. Then follow the steps listed below:
28 | 
29 | ```
30 | from pyvision.misc.mtcnn import mtcnn
31 | from PIL import Image
32 | from pyvision.misc.mtcnn.utils.visualize import show_boxes
33 | 
34 | path = <path to image>
35 | 
36 | img = Image.open(path)
37 | 
38 | mtcnn = MTCNN()
39 | boxes = mtcnn.detect(img) # returns bounding boxes
40 | 
41 | img = show_boxes(img, b)
42 | img.show()
43 | ```
44 | 
45 | For a more detailed usage, check out [mtcnn_test.py](https://github.com/pranjaldatta/PyVision/blob/master/tests/misc/mtcnn/mtcnn_test.py)
46 | 


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import *
2 | from .utils import *
3 | 


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pnet": "1Fw-Jrei12NqYhEkwqtS1jP3WvClr8a0P",
3 |     "onet": "11--NPbveLKQ9-f-UH3Kl8CzgfAYQoPch",
4 |     "rnet": "1BqF021ltiNmBIDFMHrr_zv7x4zBtSnmH"
5 | 
6 | }


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/detector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | import torch
  4 | from .nets import PNet, RNet, ONet
  5 | from .stage_one import first_stage
  6 | from .stage_two import get_image_boxes
  7 | from .utils.visualize import show_boxes
  8 | from .utils.utils import nms, convert_to_square, calibrate_boxes
  9 | 
 10 | 
 11 | 
 12 | def detector(image, min_face_size = 20.0, conf_thresh=[0.7, 0.7, 0.8], nms_thresh=[0.7, .7, .7]):
 13 |     """
 14 |     method that accepts an image and returns bounding boxes around faces
 15 | 
 16 |     Parameters:
 17 |     -> image (PIL.Image): Image in PIL format
 18 |     -> min_face_size (float): minimum size of face to look for
 19 |     -> conf_thresh (list): list of confidence thresholds for various parts
 20 |                            parts in the pipeine. (Size = 3)
 21 |     -> nms_thresh (list): list of overlap thresholds for nms (sizze = 3)
 22 |     """
 23 |     
 24 |     try:
 25 |         if not isinstance(image, Image.Image):
 26 |             raise TypeError
 27 |         if len(conf_thresh) != 3 or len(nms_thresh) != 3:
 28 |             raise AssertionError
 29 |     except AssertionError:
 30 |         print("ERROR: conf_thresh or nms_thresh of len :{},{} while expected size: 3".format(len(conf_thresh), len(nms_thresh)))
 31 |         exit()
 32 |     except TypeError:
 33 |         print("ERROR: Image type found:{}, expected: PIL.Image".format(type(image)))
 34 |         exit()
 35 | 
 36 |     pnet = PNet()
 37 |     rnet = RNet()
 38 |     onet = ONet()
 39 |     
 40 |     w, h = image.size
 41 |     min_length = min(h, w)
 42 |     min_detection_size = 12
 43 |     scale_factor = 0.709   #not sure why its .709
 44 |     scales = []
 45 |     m = min_detection_size/min_face_size
 46 |     min_length *= m
 47 |     factor_count = 0
 48 |     
 49 |     while min_length > min_detection_size:
 50 |         scales += [m * np.power(scale_factor,factor_count)]
 51 |         min_length *= scale_factor
 52 |         factor_count += 1
 53 | 
 54 |     ################## Stage 1 #############################
 55 | 
 56 |     bounding_boxes = []
 57 | 
 58 |     for s in scales:
 59 |         boxes = first_stage(image, s, pnet, nms_thresh[0])
 60 |         bounding_boxes.append(boxes)   
 61 |     #bounding_boxes has shape [n_scales, n_boxes, 9]
 62 |     
 63 |     #remove those scales for which bounding boxes were none
 64 |     bounding_boxes = [i for i in bounding_boxes if i is not None]
 65 | 
 66 |     #Add all the boxes for each scale 
 67 |     if len(bounding_boxes)==0:
 68 |         return bounding_boxes
 69 |     
 70 |     bounding_boxes = np.vstack(bounding_boxes)  # returns array of shape [n_boxes, 9]
 71 | 
 72 |     
 73 |     #------------------------- Stage 2 -------------------------------------
 74 |     
 75 |     img_box = get_image_boxes(bounding_boxes,image,size=24)   
 76 |     img_box = torch.tensor(img_box, dtype=torch.float32, requires_grad=False)
 77 | 
 78 |     probs, boxes = rnet(img_box)
 79 | 
 80 |     probs = probs.data.numpy() #Shape [boxes, 2]
 81 |     boxes = boxes.data.numpy() #Shape [boxes, 4]
 82 |     
 83 |     ind = np.where(probs[:, 1] >= conf_thresh[1])[0]
 84 | 
 85 |     bounding_boxes = bounding_boxes[ind]
 86 |     bounding_boxes[:, 4] = probs[ind, 1].reshape((-1,))
 87 |     boxes = boxes[ind]
 88 |     
 89 |     keep = nms(bounding_boxes, nms_thresh[1], mode="union")
 90 |     bounding_boxes = bounding_boxes[keep]
 91 |     boxes = boxes[keep]
 92 |     
 93 |     bounding_boxes = calibrate_boxes(bounding_boxes, boxes)
 94 |     bounding_boxes = convert_to_square(bounding_boxes)
 95 |     bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
 96 |     
 97 |     #--------------------STAGE 3-------------------------------------------------
 98 | 
 99 |     img_box = get_image_boxes(bounding_boxes, image, size=48)
100 |     
101 |     if len(img_box) == 0:
102 |         return [], []
103 |     
104 |     img_box = torch.tensor(img_box, dtype=torch.float32, requires_grad=False)
105 |     probs, boxes, landmarks = onet(img_box)
106 | 
107 |     probs = probs.data.numpy()
108 |     boxes = boxes.data.numpy()
109 |     landmarks = landmarks.data.numpy()
110 | 
111 | 
112 |     keep = np.where(probs[:,1] > conf_thresh[2])[0]
113 | 
114 |     bounding_boxes = bounding_boxes[keep]
115 |     bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
116 |     boxes = boxes[keep]
117 |     landmarks = landmarks[keep]
118 |    
119 |     bounding_boxes = calibrate_boxes(bounding_boxes, boxes)
120 |    
121 | 
122 |     keep = nms(bounding_boxes, overlap_thresh=nms_thresh[2], mode="min")
123 |     bounding_boxes = bounding_boxes[keep]
124 |     bounding_boxes = convert_to_square(bounding_boxes)
125 | 
126 | 
127 |     return bounding_boxes
128 | 
129 | 


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/nets.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn 
  3 | import torch.nn.functional as F  
  4 | import numpy as np  
  5 | from collections import OrderedDict
  6 | import os
  7 | 
  8 | WEIGHTS_PATH = os.path.dirname(os.path.realpath(__file__))+"/weights/"
  9 | 
 10 | 
 11 | class FlattenTensorCustom(nn.Module):
 12 | 
 13 |     def __init__(self):
 14 |         
 15 |         super(FlattenTensorCustom, self).__init__()
 16 | 
 17 |     def forward(self, x):
 18 |         """
 19 |         Input:
 20 |         
 21 |         A Tensor x of shape [batch_no, c, h, w]
 22 | 
 23 |         Output:
 24 | 
 25 |         A Tensor x of shape [batch_no, c*h*w]        
 26 |         """
 27 | 
 28 |         x = x.transpose(3,2).contiguous() #wierd fix
 29 |         
 30 |         return x.view(x.size(0), -1)
 31 | 
 32 | 
 33 | class PNet(nn.Module):    
 34 | 
 35 |     def __init__(self):
 36 | 
 37 |         super(PNet, self).__init__()
 38 | 
 39 |         self.features = nn.Sequential(OrderedDict([
 40 |             
 41 |             ("conv1", nn.Conv2d(3, 10, 3, 1)),
 42 |             ("prelu1", nn.PReLU(10)),
 43 |             ("pool1", nn.MaxPool2d(2,2,ceil_mode=True)),
 44 | 
 45 |             ("conv2", nn.Conv2d(10, 16, 3, 1)),
 46 |             ("prelu2", nn.PReLU(16)),
 47 | 
 48 |             ("conv3", nn.Conv2d(16, 32, 3, 1)),
 49 |             ("prelu3", nn.PReLU(32)),
 50 | 
 51 |         ]))
 52 | 
 53 |         self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
 54 |         self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
 55 |         
 56 |         try:
 57 |             self.weights = np.load(WEIGHTS_PATH+"pnet.npy", allow_pickle=True)[()]
 58 |             for idx, wts in self.named_parameters():
 59 |                 wts.data = torch.FloatTensor(self.weights[idx])
 60 |         except Exception as err:
 61 |             print("ERROR: At Pnet Weight Init: {}".format(err))
 62 |             exit()
 63 | 
 64 | 
 65 |     def summary(self):
 66 |         print("PNet Summary:")
 67 |         print(self.features)    
 68 |         print(self.conv4_1)
 69 |         print(self.conv4_2)
 70 | 
 71 |     def forward(self, x):
 72 |         x = self.features(x)
 73 |         probs = F.softmax(self.conv4_1(x), dim=1) #ERROR PRONE  #holds probilities and box preds respec.
 74 |         boxes = self.conv4_2(x)
 75 | 
 76 |         return probs, boxes   
 77 | 
 78 | 
 79 | class RNet(nn.Module):
 80 | 
 81 |      
 82 |     def __init__(self):
 83 |          
 84 |         super(RNet, self).__init__()
 85 | 
 86 |         self.features = nn.Sequential(OrderedDict([
 87 |             ("conv1", nn.Conv2d(3, 28, 3, 1)),
 88 |             ("prelu1", nn.PReLU(28)),
 89 |             ("pool1", nn.MaxPool2d(3, 2, ceil_mode=True)),
 90 | 
 91 |             ("conv2", nn.Conv2d(28, 48, 3, 1)),
 92 |             ("prelu2", nn.PReLU(48)),
 93 |             ("pool2", nn.MaxPool2d(3, 2, ceil_mode=True)),
 94 | 
 95 |             ("conv3", nn.Conv2d(48, 64, 2, 1)),
 96 | 
 97 |             ("flatten", FlattenTensorCustom()),
 98 |             ("conv4", nn.Linear(576, 128)),
 99 |             ("prelu4", nn.PReLU(128)),      
100 |         ]))       
101 | 
102 |         self.conv5_1 = nn.Linear(128, 2) #boxes
103 |         self.conv5_2 = nn.Linear(128, 4)
104 | 
105 |         try:
106 |             self.weights = np.load(WEIGHTS_PATH+"rnet.npy", allow_pickle=True)[()]
107 |             for idx, wts in self.named_parameters():
108 |                 wts.data = torch.FloatTensor(self.weights[idx])
109 |         except Exception as err:
110 | 
111 |             print("ERROR: at loading rnet weights: {}".format(err))
112 |             exit()
113 |     
114 |     def summary(self):
115 |         print("RNet Summary:")
116 |         print(self.features)
117 |         print("\n")
118 |         print(self.conv5_1)
119 |         print(self.conv5_2)
120 | 
121 |     def forward(self, x):
122 | 
123 |         x = self.features(x)
124 |         probs = F.softmax(self.conv5_1(x), dim=1)
125 |         boxes = self.conv5_2(x)
126 |         
127 |         return probs, boxes    
128 | 
129 | 
130 | class ONet(nn.Module):
131 | 
132 |      
133 |     def __init__(self):
134 |          
135 |         super(ONet, self).__init__()
136 | 
137 |         self.features = nn.Sequential(OrderedDict([
138 |             ("conv1", nn.Conv2d(3, 32, 3, 1)),
139 |             ("prelu1", nn.PReLU(32)),
140 |             ("pool1", nn.MaxPool2d(3, 2, ceil_mode=True)),
141 | 
142 |             ("conv2", nn.Conv2d(32, 64, 3, 1)),
143 |             ("prelu2", nn.PReLU(64)),
144 |             ("pool2", nn.MaxPool2d(3, 2, ceil_mode=True)),
145 | 
146 |             ("conv3", nn.Conv2d(64, 64, 3)),
147 | 
148 |             ("prelu3", nn.PReLU(64)),
149 |             ("pool3", nn.MaxPool2d(2, 2, ceil_mode=True)),
150 | 
151 |             ("conv4", nn.Conv2d(64,128,2)),
152 |             ("prelu4", nn.PReLU(128)),
153 |             ("flatten", FlattenTensorCustom()), 
154 |             ("conv5", nn.Linear(1152,256)),
155 |             ("prelu5", nn.PReLU(256)),
156 | 
157 |         ]))     
158 |           
159 |         self.conv6_1 = nn.Linear(256,2)   #prob of face in bb
160 |         self.conv6_2 = nn.Linear(256,4)   #box
161 |         self.conv6_3 = nn.Linear(256,10)  #facial landmarks
162 | 
163 |         try:
164 |             self.weights = np.load(WEIGHTS_PATH+"onet.npy", allow_pickle=True)[()]
165 |             for idx, wts in self.named_parameters():
166 |                 wts.data = torch.FloatTensor(self.weights[idx])
167 |         except Exception as err:
168 |             print("ERROR: at loading onet weights: {}".format(err))
169 |             exit()
170 |     
171 |     def summary(self):
172 |         print("ONet Summary:")
173 |         print(self.features)
174 |         print("\n")
175 |         print(self.conv6_1)
176 |         print(self.conv6_2)
177 |         print(self.conv6_3)
178 | 
179 |     def forward(self, x):
180 |         x = self.features(x)
181 |         probs = F.softmax(self.conv6_1(x), dim=1)
182 |         boxes = self.conv6_2(x)
183 |         points = self.conv6_3(x)
184 |         return probs, boxes, points   
185 | 


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/requirements.txt:
--------------------------------------------------------------------------------
1 | python=3.7.1
2 | pytorch=1.4.0=py3.7_cpu_0
3 | pillow=7.1.1=py37h718be6c_0
4 | numpy=1.18.1=py37h8960a57_1
5 | torchvision=0.5.0=py37_cpu
6 | opencv=4.1.1


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/stage_one.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np  
 3 | import torch
 4 | from .utils.utils import preprocess, nms
 5 | import cv2
 6 | from PIL import Image
 7 | 
 8 | 
 9 | 
10 | 
11 | def scale_boxes(probs, boxes, scale, thresh=.8):
12 |     """
13 |     A method that takes in the outputs of pnet, probabilities and 
14 |     box cords for a scaled image and returns box cords for the 
15 |     original image.
16 | 
17 |     Params:
18 |     -> probs: probilities of a face for a given bbox; shape: [a,b]
19 |     -> boxes: box coords for a given scaled image; shape" [1, 4, a, b]
20 |     -> scale: a float denoting the scale factor of the image
21 |     -> thresh: minimum confidence required for a facce to qualify
22 | 
23 |     Returns:
24 |     -> returns a float numpy array of shape [num_boxes, 9] #9 because bbox + confidence + offset (4+1+4)
25 |     """
26 |     stride = 2
27 |     cell_size = 12
28 |     inds = np.where(probs > thresh)
29 |     if inds[0].size == 0:
30 |         return np.array([])
31 | 
32 |     tx1, ty1, tx2, ty2 = [boxes[0, i, inds[0], inds[1]] for i in range(4)]  
33 |     offsets = np.array([tx1, ty1, tx2, ty2])
34 | 
35 |     confidence = probs[inds[0], inds[1]]
36 |     
37 |     bboxes = np.vstack([
38 |         np.round((stride*inds[1] + 1.0)/scale),
39 |         np.round((stride*inds[0] + 1.0)/scale),
40 |         np.round((stride*inds[1] + 1.0 + cell_size)/scale),
41 |         np.round((stride*inds[0] + 1.0 + cell_size)/scale),
42 |         confidence,
43 |         offsets
44 |         ])
45 |      
46 |     return bboxes.T
47 | 
48 | 
49 | def first_stage(img, scale, pnet, nms_thresh):
50 |     """
51 |     A method that accepts a PIL Image, 
52 |     runs it through pnet and does nms.
53 | 
54 |     Params:
55 |     -> img: PIL image
56 |     -> scale: a float that determines the scaling factor
57 |     -> pnet: an instance of the pnet
58 |     -> thresh: threshold below which facial probs are unacceptable
59 | 
60 |     Returns:
61 |     -> numpy array of type float of shape [num_boxes, 9]
62 |        which contain box cords for a givens scale, confidence,
63 |        and offsets to actual size
64 |     """
65 |     
66 |     orig_w, orig_h = img.size
67 |     scaled_w, scaled_h = math.ceil(scale*orig_w), math.ceil(scale*orig_h)
68 |     
69 |     img = img.resize((scaled_w, scaled_h), Image.BILINEAR)
70 |     img = preprocess(img)
71 |     
72 |     probs, boxes = pnet(img)
73 |     
74 |     
75 |     probs = probs.data.numpy()[0,1,:,:] 
76 |     boxes = boxes.data.numpy()
77 | 
78 |     bounding_boxes = scale_boxes(probs, boxes, scale)
79 |     if len(bounding_boxes) == 0:
80 |         return None
81 | 
82 |     selected_ids = nms(bounding_boxes[:,0:5], nms_thresh) #indices to be kept 
83 |     return bounding_boxes[selected_ids]
84 |    


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/stage_two.py:
--------------------------------------------------------------------------------
 1 | from .utils.utils import preprocess
 2 | import numpy as np
 3 | from PIL import Image
 4 | from .utils.visualize import show_boxes
 5 | 
 6 | 
 7 | def get_image_boxes(bounding_boxes, img, size=24):
 8 | 
 9 |     """
10 |     Cut out boxes from the image for rnet input
11 |     """
12 | 
13 |     num_boxes = len(bounding_boxes)
14 |     w, h = img.size
15 |     [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(bounding_boxes, w, h)
16 |     img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
17 | 
18 |     for i in range(num_boxes):
19 |         img_box = np.zeros((tmph[i], tmpw[i], 3), 'uint8')
20 | 
21 |         img_array = np.asarray(img, 'uint8')
22 | 
23 |         #Copies the values from img_array to empty img_box
24 |         #x,ex,y,ey are the actual coords in the image
25 |         try:
26 |             img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
27 |                 img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
28 |         except ValueError as ve:
29 |             print("Value error at index {}".format(i))
30 | 
31 |         img_box = Image.fromarray(img_box)
32 |         img_box = img_box.resize((size, size), Image.BILINEAR)
33 |         img_box = np.asarray(img_box, 'float32')
34 | 
35 |         img_boxes[i, :, :, :] = preprocess(img_box)
36 | 
37 |     return img_boxes
38 | 
39 | 
40 | def pad(bboxes, width, height):
41 |     """
42 |     Output:
43 |         dy, dx, edy, edx: Coordinates of cut boxes
44 |         y, x, ey, ex: Coordinates of box in image
45 |         h, w: Heights and widths of boxes.
46 |     """
47 |     
48 |     #No idea why 1 is added and subtracted from w and h
49 |     #e stands for end. So its (x,ex)
50 | 
51 |     x, y, ex, ey = [bboxes[:, i] for i in range(4)]
52 |     w, h = ex - x + 1.0,  ey - y + 1.0
53 |     num_boxes = bboxes.shape[0]
54 |     dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
55 |     edx, edy = w.copy() - 1.0, h.copy() - 1.0
56 | 
57 |     #For top left corner
58 |     ind = np.where(x < 0.0)[0]
59 |     dx[ind] = 0.0 - x[ind]
60 |     x[ind] = 0.0
61 | 
62 |     ind = np.where(y < 0.0)[0]
63 |     dy[ind] = 0.0 - y[ind]
64 |     y[ind] = 0.0
65 | 
66 |     #For bottom right corner 
67 |     ind = np.where(ex > width - 1.0 )[0]
68 |     edx[ind] = w[ind] + width - 2.0 - ex[ind]
69 |     ex[ind] = width - 1.0
70 | 
71 |     ind = np.where(ey > height - 1.0)[0]
72 |     edy[ind] = h[ind] + height - 2.0 - ey[ind]
73 |     ey[ind] = height - 1.0
74 | 
75 |     return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
76 |     return_list = [r.astype('int32') for r in return_list]
77 | 
78 |     return return_list
79 | 
80 | 


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/mtcnn/utils/__init__.py


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | from PIL import Image
  3 | import torch
  4 | 
  5 | def nms(boxes, overlap_thresh=.5, mode='union'):
  6 |     """
  7 |     An utility function that performs nms over the bounding box
  8 | 
  9 |     Params:
 10 |     -> boxes: the bounding box proposals
 11 |     -> overlap_thresh: maximum permissible overlap ratio
 12 |     -> mode: default - union (IoU)
 13 | 
 14 |     Output:
 15 |     -> bounding box list with overlapping boxes removed
 16 |     """
 17 | 
 18 |     if len(boxes) == 0:
 19 |         return []
 20 |     
 21 |     x1, y1, x2, y2, confidence = [boxes[:, i] for i in range(5)]
 22 | 
 23 |     areas = (x2 - x1 + 1.0)*(y2 - y1 + 1.0)
 24 |     selected = []
 25 |     ids_sorted = np.argsort(confidence)
 26 | 
 27 |     while len(ids_sorted) > 0:
 28 |         """
 29 |         we loop through the sorted ids. 
 30 |         1. select the last id
 31 |         2. compare the chosen bbox IoU with all the others
 32 |         3. del the ones above the threshold.
 33 |         4. return selected ids
 34 |         """        
 35 | 
 36 |         last_idx = len(ids_sorted) - 1
 37 |         idx = ids_sorted[last_idx]
 38 |         selected.append(idx)
 39 |         
 40 | 
 41 |         xi1 = np.maximum(x1[idx], x1[ids_sorted[:last_idx]])
 42 |         yi1 = np.maximum(y1[idx], y1[ids_sorted[:last_idx]])
 43 | 
 44 |         xi2 = np.minimum(x2[idx], x2[ids_sorted[:last_idx]])
 45 |         yi2 = np.minimum(y2[idx], y2[ids_sorted[:last_idx]])
 46 | 
 47 |         inter_h = np.maximum(0.0, (yi2 - yi1 + 1.0))
 48 |         inter_w = np.maximum(0.0, (xi2 - xi1 + 1.0))
 49 |         inter_area = inter_h*inter_w
 50 | 
 51 |         if mode == "union":
 52 |             overlap = inter_area/(areas[idx] + areas[ids_sorted[:last_idx]] - inter_area)
 53 |         elif mode == "min":
 54 |             overlap = inter_area/np.minimum(areas[idx], areas[ids_sorted[:last_idx]])
 55 | 
 56 |         to_del = np.concatenate([[last_idx], np.where(overlap > overlap_thresh)[0]])
 57 |         ids_sorted = np.delete(ids_sorted, to_del)
 58 | 
 59 |     #print("nms complete. returning {}/{} boxes".format(len(selected), len(boxes)))
 60 |     return selected
 61 | 
 62 | 
 63 | 
 64 | def preprocess(img):
 65 |     """
 66 |     A utiity function that takes a numpy image array or PIL
 67 |     Image and returns a tensor
 68 |     
 69 |     Input: 
 70 |         -> img: input image in array or PIL format
 71 |     Output:
 72 |         -> tensor    
 73 |     """
 74 |     if isinstance(img, Image.Image):
 75 |         img = np.asarray(img, 'float')
 76 |     img = torch.tensor(img, dtype=torch.float32, requires_grad=False)
 77 |     img = img.permute(2,0,1)
 78 |     img = torch.unsqueeze(img, 0)
 79 |     img = (img - 127.5)*0.0078125  #normalize
 80 |     return img
 81 |     
 82 | def convert_to_square(bbox):
 83 |     """
 84 |     Convert bounding boxes to square shape
 85 |     
 86 |     """
 87 | 
 88 |     square = np.zeros((bbox.shape))
 89 | 
 90 |     x1, y1, x2, y2 = [bbox[:, i] for i in range(4)]
 91 |     h = y2 - y1 + 1.0
 92 |     w = x2 - x1 + 1.0
 93 |     max_side = np.maximum(h, w)
 94 | 
 95 |     square[:,0] = x1 + w*0.5 - max_side*0.5
 96 |     square[:,1] = y1 + h*0.5 - max_side*0.5
 97 |     square[:, 2] = square[:, 0] + max_side - 1.0
 98 |     square[:, 3] = square[:, 1] + max_side - 1.0
 99 | 
100 |     return square
101 | 
102 | def calibrate_boxes(boxes, offsets):
103 |     '''
104 |     offset the original bounding boxes by an amount as predicted by the 
105 |     rnet.
106 | 
107 |     Arguments:
108 |     -> boxes: original bounding box list (shape: [n, 9])
109 |     -> offsets: output of the rnet (shape [n, 4])
110 | 
111 |     Returns:
112 |     -> numpy array of shape [n, 5]
113 |     '''
114 |     
115 |     x1, y1, x2, y2 = [boxes[:,i] for i in range(4)]
116 | 
117 |     width = (x2 - x1 + 1.0)
118 |     height = (y2 - y1 + 1.0)
119 |     
120 |     height = np.reshape(height, (-1, 1))
121 |     width = np.reshape(width, (-1, 1))
122 | 
123 |     tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
124 |     t = [x1, y1, x2, y2, tx1, ty1, tx2, ty2]
125 |     t = list(map(lambda x: np.reshape(x,(-1, 1)), t))
126 |     x1, y1, x2, y2, tx1, ty1, tx2, ty2 = t[:]
127 |     
128 |     """
129 |     it was supposed to be x1t = x1+tx1*width but that was providing negative indices so swapped
130 |     tx1 and tx2
131 |     """
132 |     x1t = x1 + tx2*width
133 |     y1t = y1 + ty1*height
134 |     x2t = x2 + tx1*width
135 |     y2t = y2 + ty2*height
136 | 
137 |     t = [x1t, y1t, x2t, y2t]
138 |  
139 |     t = list(map(lambda x: np.reshape(x, (-1,)), t))
140 |     for i in range(4):
141 |         boxes[:,i] = t[i]
142 |     return boxes
143 | 


--------------------------------------------------------------------------------
/pyvision/misc/mtcnn/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | from PIL import ImageDraw
 2 | 
 3 | def show_boxes(img, bounding_boxes):
 4 | 
 5 |     im = img.copy()
 6 |     draw = ImageDraw.Draw(im)
 7 |     
 8 | 
 9 |     for i in bounding_boxes:
10 |         draw.rectangle([
11 |             (i[0],i[1]),
12 |             (i[2],i[3])
13 |             ], outline = 'red')
14 | 
15 |     return im
16 | 
17 | def _show_boxes(img, boxes):
18 |     im = img.copy()
19 |     draw = ImageDraw.Draw(im)
20 |     boxes = boxes[0]
21 |     for i in boxes:
22 |         draw.rectangle([
23 |             (i[0],i[1]),
24 |             (i[2],i[3])
25 |             ], outline = 'red')
26 |     return im
27 | 


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/README.md:
--------------------------------------------------------------------------------
 1 | # Noise2Noise: Learning Image Restoration without Clean Data
 2 | 
 3 | Noise2Noise is an image-denoising model which is trained on noisy data only.
 4 | This implementation is based on the ICML 2018 [paper](https://arxiv.org/abs/1803.04189) by Jaakko Lehtinen et al.
 5 | 
 6 | ## Some Specific Details
 7 | 
 8 | ### For denoising Gaussian noise
 9 | 
10 | For Gaussian denoising, the model was trained with a *mean* of **10** and a *standard deviation* in the range [20, 50] (sampled randomly from an uniform distribution).
11 | 
12 | ### For Text Removal
13 | 
14 | During the training of text removal model, random number of text units were added.
15 | 
16 | *For more details*, check out [dataset.py](https://github.com/pranjaldatta/PyVision/blob/master/pyvision/misc/noise2noise/dataset.py).
17 | 
18 | ## Summary
19 | 
20 | - This model works for additive gaussian noise and text removal only. It does not include poisson noise and Monte Carlo Rendering discussed in the paper.
21 | - U-Net architecture is followed throughout the model. The original paper used a “RED30” network (Mao et al., 2016) for additive gaussian noise.
22 | - The weights were made available by Joey Litalien's implementation [here](https://github.com/joeylitalien/noise2noise-pytorch).
23 | - For additive gaussian noise, sigma or the standard deviation is an important hyperparameter. If the **noise level is greater than thrice of sigma, the denoiser is unable to present a clear image**.
24 | - The text overlay function works within a random integer range to add a random string to the image. The denoiser works better for small sized strings which cover less pixels.
25 | 
26 | ### Test
27 | 
28 | To run test from PyVision root:
29 | 
30 | ```python
31 | python tests/misc/noise2noise/n2n_test.py
32 | ```
33 | 
34 | ### Usage
35 | 
36 | - The model setup is done through Noise2Noise class via pyvision.misc.noise2noise.model
37 | - The model is initialised with the noise type. For 'test' mode, a data_path is required which contains the path to test images. For 'inference' mode, a PIL image or the path to the image is required as input. The show parameter can be set to 'True' to display the images after denoising.
38 | - The available noise types are: gaussian, text
39 | 
40 | ```python
41 | from pyvision.misc.noise2noise.model import Noise2Noise
42 | from PIL import Image
43 | 
44 | n2n = Noise2Noise(noise="gaussian")
45 | 
46 | img_path = "Path to Image"
47 | img = Image.open(img_path)
48 | 
49 | n2n.inference(img, show=False, save="Denoised.png")
50 | 
51 | ```
52 | 
53 | ### Example
54 | Gaussian Noise:
55 | <table>
56 |   <tr>
57 |     <td>Source Image</td>
58 |      <td>Denoised Image</td>
59 |    
60 |   </tr>
61 |   <tr>
62 |     <td><img src="assets/gauss_1.png" height=200 width=200></td>
63 |     <td><img src="assets/gdenoised_1.png" height=200 width=200></td>
64 |   </tr>
65 |   <tr>
66 |   <td><img src="assets/gauss_3.png" height=200 width=200></td>
67 |   <td><img src="assets/gdenoised_3.png" height=200 width=200></td>
68 |   </tr>
69 |  </table>
70 | 
71 | Text Overlay
72 | <table>
73 |   <tr>
74 |     <td>Source Image</td>
75 |      <td>Denoised Image</td>
76 |    
77 |   </tr>
78 |   <tr>
79 |     <td><img src="assets/text_1.png" height=200 width=200></td>
80 |     <td><img src="assets/tdenoised_1.png" height=200 width=200></td>
81 |   </tr>
82 |   <tr>
83 |   <td><img src="assets/text_3.png" height=200 width=200></td>
84 |   <td><img src="assets/tdenoised_3.png" height=200 width=200></td>
85 |   </tr>
86 |  </table>
87 | 
88 | 


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import *


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/gauss_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gauss_1.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/gauss_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gauss_3.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/gdenoised_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gdenoised_1.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/gdenoised_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/gdenoised_3.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/tdenoised_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/tdenoised_1.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/tdenoised_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/tdenoised_3.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/text_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/text_1.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/assets/text_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/assets/text_3.png


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/misc/noise2noise/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"n2n-gaussian.pt" : "1n_yfTcF4Oz9RqTfHL2ARBQykN5r92yD1" ,
3 | 	"n2n-text.pt" : "1TdjEE4NjZb7m1zNoThGN13HKU5TTzLkJ"
4 | }
5 | 


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/dataset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import os
  4 | import random
  5 | from torch.utils.data import Dataset, DataLoader
  6 | import torchvision.transforms.functional as tvf
  7 | from PIL import Image, ImageDraw, ImageFont
  8 | from string import ascii_letters
  9 | 
 10 | 
 11 | class NoisyDataset(Dataset):
 12 |     '''
 13 |     Loads dataset.
 14 |     NoisyDataset inherits from an abstract class representing Dataset
 15 |     '''
 16 | 
 17 |     def __init__(self, data_dir, noise, crop_size):
 18 |         '''
 19 |         Initialise dataset
 20 |         '''
 21 |         self.data_dir = data_dir
 22 |         self.imgs = []
 23 |         self.crop_size = 320
 24 |         self.noise = noise
 25 | 
 26 |         for file in os.listdir(data_dir):
 27 |             if file.endswith(".jpg"):
 28 |                 self.imgs.append( os.path.join(data_dir,file))
 29 |     
 30 |     def gaussian_noise(self,img):
 31 |         '''
 32 |         Add Gaussian noise in dataset
 33 |         Input: img of type PIL.Image
 34 |         Output: Noisy image of type PIL.Image
 35 |         '''
 36 |         w,h = img.size
 37 |         c = len(img.getbands())
 38 | 
 39 |         sigma = np.random.uniform(20,50)
 40 |         gauss = np.random.normal(10,sigma,(h,w,c))
 41 |         noisy = np.array(img) + gauss
 42 |         
 43 |         #Values less than 0 become 0 and more than 255 become 255
 44 |         noisy = np.clip(noisy, 0, 255).astype(np.uint8)
 45 |         img = Image.fromarray(noisy)
 46 | 
 47 |         return img
 48 | 
 49 |     
 50 |     def add_text(self,img):
 51 |         '''
 52 |         Add random string of text to images
 53 |         Input: img of type PIL.Image
 54 |         Output: Noisy image of type PIL.Image
 55 |         '''
 56 |         w,h = img.size
 57 |         c = len(img.getbands())
 58 |         im = img.copy()
 59 |         draw = ImageDraw.Draw(im)
 60 |         for i in range(random.randint(5,15)):
 61 |             font_type = ImageFont.truetype(font='Arial.ttf',size=np.random.randint(10,20))
 62 |             len_text = np.random.randint(4,20)
 63 |             text = ''.join(random.choice(ascii_letters) for i in range(len_text))
 64 |             x = np.random.randint(0,w)
 65 |             y = np.random.randint(0,h)
 66 |             col = tuple(np.random.randint(0,255,c))
 67 |             draw.text((x,y),text,fill=col,font=font_type)
 68 | 
 69 |         return im
 70 |     
 71 |     
 72 |     def crop_image(self,img):
 73 |         '''
 74 |         Crops the image to a square of size (crop_size, crop_size)
 75 |         Input: img of type PIL.Image
 76 |         Output: Cropped image of type PIL.Image
 77 |         '''
 78 |         
 79 |         w,h = img.size
 80 |         m = min(w,h)
 81 |         img = tvf.crop(img, 0,0,m,m)
 82 |         img = tvf.resize(img, (self.crop_size, self.crop_size))
 83 |      
 84 |         return img
 85 |     
 86 | 
 87 |     def __len__(self):
 88 |         '''
 89 |         Returns length of dataset
 90 |         '''
 91 |         return len(self.imgs)
 92 | 
 93 | 
 94 |     def __getitem__(self,index):
 95 |         '''
 96 |         Compiles dataset
 97 |         '''
 98 |         
 99 |         img =  Image.open(self.imgs[index]).convert('RGB')
100 |         resized_img = self.crop_image(img)
101 | 
102 |         if self.noise == 'text':
103 |             source = tvf.to_tensor(self.add_text(resized_img))
104 |             target = tvf.to_tensor(self.add_text(resized_img))
105 |         else:
106 |             source = tvf.to_tensor(self.gaussian_noise(resized_img))
107 |             target = tvf.to_tensor(self.gaussian_noise(resized_img))
108 |         
109 |         return source,target
110 | 
111 | 
112 |  


--------------------------------------------------------------------------------
/pyvision/misc/noise2noise/unet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn 
  3 | 
  4 | 
  5 | class Unet(nn.Module):
  6 |     '''
  7 |     Unet architecture for n2n.
  8 |     No batch norm, dropout
  9 |     '''
 10 | 
 11 |     def __init__(self, in_channels=3, out_channels=3):
 12 |             """Initializes U-Net."""
 13 | 
 14 |             super(Unet, self).__init__()
 15 | 
 16 |             self._block1 = nn.Sequential(
 17 |                 nn.Conv2d(in_channels, 48, 3, stride=1, padding=1),
 18 |                 nn.ReLU(inplace=True),
 19 |                 nn.Conv2d(48, 48, 3, padding=1),
 20 |                 nn.ReLU(inplace=True),
 21 |                 nn.MaxPool2d(2))
 22 | 
 23 |             self._block2 = nn.Sequential(
 24 |                 nn.Conv2d(48, 48, 3, stride=1, padding=1),
 25 |                 nn.ReLU(inplace=True),
 26 |                 nn.MaxPool2d(2))
 27 | 
 28 |             self._block3 = nn.Sequential(
 29 |                 nn.Conv2d(48, 48, 3, stride=1, padding=1),
 30 |                 nn.ReLU(inplace=True),
 31 |                 nn.ConvTranspose2d(48, 48, 3, stride=2, padding=1, output_padding=1))
 32 | 
 33 |             self._block4 = nn.Sequential(
 34 |                 nn.Conv2d(96, 96, 3, stride=1, padding=1),
 35 |                 nn.ReLU(inplace=True),
 36 |                 nn.Conv2d(96, 96, 3, stride=1, padding=1),
 37 |                 nn.ReLU(inplace=True),
 38 |                 nn.ConvTranspose2d(96, 96, 3, stride=2, padding=1, output_padding=1))
 39 | 
 40 |             self._block5 = nn.Sequential(
 41 |                 nn.Conv2d(144, 96, 3, stride=1, padding=1),
 42 |                 nn.ReLU(inplace=True),
 43 |                 nn.Conv2d(96, 96, 3, stride=1, padding=1),
 44 |                 nn.ReLU(inplace=True),
 45 |                 nn.ConvTranspose2d(96, 96, 3, stride=2, padding=1, output_padding=1))
 46 | 
 47 |             self._block6 = nn.Sequential(
 48 |                 nn.Conv2d(96 + in_channels, 64, 3, stride=1, padding=1),
 49 |                 nn.ReLU(inplace=True),
 50 |                 nn.Conv2d(64, 32, 3, stride=1, padding=1),
 51 |                 nn.ReLU(inplace=True),
 52 |                 nn.Conv2d(32, out_channels, 3, stride=1, padding=1),
 53 |                 nn.LeakyReLU(0.1))
 54 | 
 55 | 
 56 |     def forward(self, x):
 57 | 
 58 |         #Encoder
 59 |         #print("X size = ", str(x.size()))
 60 |         pool1 = self._block1(x)
 61 |         #print(pool1.size())
 62 |         pool2 = self._block2(pool1)
 63 |         #print(pool2.size())
 64 |         pool3 = self._block2(pool2)
 65 |         #print(pool3.size())
 66 |         pool4 = self._block2(pool3)
 67 |         #print(pool4.size())
 68 |         pool5 = self._block2(pool4)
 69 |         #print(pool5.size())
 70 | 
 71 |         #Decoder
 72 |         upsample5 = self._block3(pool5)
 73 |         #print(upsample5.size())
 74 |         concat5 = torch.cat((upsample5, pool4), dim=1)
 75 |         #print(concat5.size())
 76 |         upsample4 = self._block4(concat5)
 77 |         #print(upsample4.size())
 78 |         concat4 = torch.cat((upsample4, pool3), dim=1)
 79 |         #print(concat4.size())
 80 |         upsample3 = self._block5(concat4)
 81 |         #print(upsample3.size())
 82 |         concat3 = torch.cat((upsample3, pool2), dim=1)
 83 |         #print(concat3.size())
 84 |         upsample2 = self._block5(concat3)
 85 |         #print(upsample2.size())
 86 |         concat2 = torch.cat((upsample2, pool1), dim=1)
 87 |         #print(concat2.size())
 88 |         upsample1 = self._block5(concat2)
 89 |         #print(upsample1.size())
 90 |         concat1 = torch.cat((upsample1, x), dim=1)
 91 |         #print(concat1.size())
 92 |         output = self._block6(concat1)
 93 |         #print(output.size())
 94 |         return output
 95 | 
 96 |     def summary(self):
 97 |         print('Unet summary: ')
 98 |         print(self._block1)
 99 |         print(self._block2)
100 |         print(self._block3)
101 |         print(self._block4)
102 |         print(self._block5)
103 |         print(self._block6)
104 | 
105 |         


--------------------------------------------------------------------------------
/pyvision/segmentation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/README.md:
--------------------------------------------------------------------------------
 1 | # Fully Convolutional Networks for Semantic Segmentation (FCN)
 2 | 
 3 | FCN uses a fully convolutional network to segment images into classes of objects. You can read the paper [here](https://arxiv.org/pdf/1605.06211v1.pdf). Check [summary](#Summary) for implementation details or [Usage](#Usage) for Usage details.
 4 | 
 5 | ## Summary
 6 | 
 7 | The implementation makes use of the pretrained models made available by [PyTorch](https://github.com/pytorch). The models were trained on a subset of the COCO dataset containing only those classes that are present in the VOC2012 dataset.
 8 | 
 9 | | Model | Backbone | Dataset | Mean IoU/Global Pixelwise Accuracy|
10 | |----|----|---|-----|
11 | fcn-resnet-50 | resnet-50 | coco2017-train | 60.5 / 91.4 |
12 | |fcn-resnet-101 | resnet-101 | coco2017-train | 63.7 / 91.9 |
13 | 
14 | ## Usage
15 | 
16 | **Brief** 
17 | 
18 | The model setup is done via the FCN class exposed via *pyvision.segmentation.fcn*. All models and related config parameters can be configured from the class constructor.
19 | 
20 | Inference is run through the *inference()* method.
21 | 
22 | **Quick Start**
23 | 
24 | * To use the default *fcn-resnet50-coco* model,
25 | 
26 | ```python
27 | 
28 | from pyvision.segmentation import fcn
29 | 
30 | fcn_model = fcn.FCN(device="cpu", show=False)
31 | 
32 | # item = path to an image or a cv2 image or a PIL Image
33 | preds, seg_map, blend_map = fcn_model.inference(item, save="preds")
34 | 
35 | ```
36 | 
37 | * To use the *fcn-resnet101-coco* model,
38 | 
39 | ```python
40 | 
41 | from pyvision.segmentation import fcn
42 | 
43 | fcn_model = fcn.FCN(model="fcn-resnet101-coco", device="cpu", show=False)
44 | 
45 | # item = path to an image or a cv2 image or a PIL Image
46 | preds, seg_map, blend_map = fcn_model.inference(item, save="preds")
47 | 
48 | ```
49 | 
50 | * To list available models run,
51 | 
52 | ```python
53 | 
54 | from pyvision.segmentation import fcn
55 | 
56 | print(fcn.available_models())
57 | 
58 | ```
59 | 
60 | * To run tests, from repo root, run,
61 | 
62 | ```shell
63 | 
64 | $ python tests/segmentation/fcn/fcn50.py
65 | $ python tests/segmentation/fcn/fcn101.py
66 | 
67 | ```
68 | 
69 | ## Examples
70 | 
71 | **Dataset: COCO2017-train (VOC2012 classes)**
72 | 
73 | | Model | Original Image | Segmentation Map | Blend Image |
74 | |---|-----|----|----|
75 | |**fcn-resnet50-coco**|<img src="examples/16.jpg" height=200 width=200>| <img src="examples/16_50_map.png" height=200 width=200> | <img src="examples/16_50_blend.png" height=200 width=200>|
76 | |**fcn-resnet50-coco**|<img src="examples/pascal_voc.jpg" height=200 width=200>| <img src="examples/pascal_voc_50_map.png" height=200 width=200> | <img src="examples/pascal_voc_50_blend.png" height=200 width=200>|
77 | |**fcn-resnet101-coco**|<img src="examples/16.jpg" height=200 width=200>| <img src="examples/16_101_map.png" height=200 width=200> | <img src="examples/16_101_blend.png" height=200 width=200>|
78 | |**fcn-resnet101-coco**|<img src="examples/pascal_voc.jpg" height=200 width=200>| <img src="examples/pascal_voc_101_map.png" height=200 width=200> | <img src="examples/pascal_voc_101_blend.png" height=200 width=200>|
79 | 
80 | ## Contributed By
81 | 
82 | [Pranjal Datta](https://github.com/pranjaldatta)
83 | 


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import FCN, available_models


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "fcn-resnet50-coco": "1wQ8davh3KzDspnbuZ6e4OnrVggjaYVo8",
3 |     "fcn-resnet101-coco": "1AcxXcQRW8dPdDtwQcQRxBp5vAW-yeFgi"
4 | }


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/data/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/data/voc2012_classes.txt:
--------------------------------------------------------------------------------
 1 | background
 2 | aeroplane
 3 | bicycle
 4 | bird
 5 | boat
 6 | bottle
 7 | bus
 8 | car
 9 | cat
10 | chair
11 | cow
12 | diningtable
13 | dog
14 | horse
15 | motorbike
16 | person
17 | pottedplant
18 | sheep
19 | sofa
20 | train
21 | tvmonitor


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/data/voc2012_colors.txt:
--------------------------------------------------------------------------------
 1 | 0 0 0
 2 | 128 0 0
 3 | 0 128 0
 4 | 128 128 0
 5 | 0 0 128
 6 | 128 0 128
 7 | 0 128 128
 8 | 128 128 128
 9 | 64 0 0
10 | 192 0 0
11 | 64 128 0
12 | 192 128 0
13 | 64 0 128
14 | 192 0 128
15 | 64 128 128
16 | 192 128 128
17 | 0 64 0
18 | 128 64 0
19 | 0 192 0
20 | 128 192 0
21 | 0 64 128


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16.jpg


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/16_101_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_101_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/16_101_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_101_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/16_50_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_50_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/16_50_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/16_50_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/pascal_voc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc.jpg


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/pascal_voc_101_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_101_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/pascal_voc_101_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_101_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/pascal_voc_50_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_50_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/examples/pascal_voc_50_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/examples/pascal_voc_50_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/models/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/models/fcn_net.py:
--------------------------------------------------------------------------------
 1 | import torch  
 2 | import torch.nn as nn  
 3 | import torch.nn.functional as F   
 4 | from torchvision.models._utils import IntermediateLayerGetter
 5 | 
 6 | from .backbone import resnet101, resnet50
 7 | 
 8 | from collections import OrderedDict
 9 | 
10 | __backbones__ = {
11 |     "resnet50" : resnet50,
12 |     "resnet101" : resnet101
13 | }
14 | 
15 | def _build_fcn(name, num_classes, aux, pretrained=False):
16 |     
17 |     backbone = __backbones__[name](
18 |         pretrained=pretrained, 
19 |         replace_stride_with_dilation=[False, True, True]
20 |     )
21 | 
22 |     final_layers = {'layer4': 'out'}
23 |     if aux:
24 |         final_layers['layer3'] = "aux"
25 |     backbone = IntermediateLayerGetter(backbone, return_layers=final_layers)
26 | 
27 |     aux_classifier = None 
28 |     if aux:
29 |         inplanes = 1024 
30 |         aux_classifier = FCNHead(inplanes, num_classes)
31 |     
32 |     inplanes = 2048 
33 |     classifier = FCNHead(inplanes, num_classes)
34 |     #base_model = FCNModel()
35 | 
36 |     fcn_model = FCNModel(backbone, classifier, aux_classifier)
37 | 
38 |     return fcn_model
39 | 
40 | 
41 | class FCNModel(nn.Module):
42 | 
43 |     def __init__(self, backbone, classifier, aux_classifier=None):
44 | 
45 |         super(FCNModel, self).__init__()
46 |         
47 |         self.backbone = backbone
48 |         self.classifier = classifier
49 |         self.aux_classifier = aux_classifier
50 |     
51 |     def forward(self, x):
52 | 
53 |         input_shape = x.shape[-2:]
54 |         features = self.backbone(x)
55 | 
56 |         result = OrderedDict()
57 |         x = features["out"]
58 |         x = self.classifier(x)
59 |         x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=True)
60 |         result["out"] = x
61 | 
62 |         if self.aux_classifier is not None:
63 |             x = features["aux"]
64 |             x = self.aux_classifier(x)
65 |             x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=True)
66 |             result["aux"] = x
67 |         
68 |         return result
69 | 
70 | 
71 | class FCNHead(nn.Sequential):
72 | 
73 |     def __init__(self, inchannels, channels):
74 |         
75 |         intermediate_channels = inchannels // 4 
76 |         layers = [
77 |             nn.Conv2d(inchannels, intermediate_channels, 3, padding=1, bias=False), 
78 |             nn.BatchNorm2d(intermediate_channels), 
79 |             nn.ReLU(), 
80 |             nn.Dropout(0.1), 
81 |             nn.Conv2d(intermediate_channels, channels, 1)
82 |         ]
83 |     
84 |         super(FCNHead, self).__init__(*layers)
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/fcn/util/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/fcn/util/utils.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import numpy as np
3 | 
4 | def make_color_seg_map(seg_map_np, palette):
5 |     color_img = Image.fromarray(seg_map_np.astype(np.uint8)).convert('P')
6 |     color_img.putpalette(palette)
7 |     return color_img


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import PSPNet, available_models


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/config/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/config/data_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "voc2012": {
 3 |         "classes": "21"
 4 |     },
 5 |     "ade20k" : {
 6 |         "classes": "150"
 7 |     },
 8 |     "cityscapes" : {
 9 |         "classes" : "19"
10 |     }
11 | 
12 | }


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/config/weights_download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pspnet-resnet50-voc2012": "1T8NiMaAVNRiS_i4rHVK6oe59oK0pvGGR",
3 |     "pspnet-resnet101-voc2012": "1Qca7YOipac981SvGQGGw6kuphDiQzl3Z",
4 |     "pspnet-resnet50-ade20k": "1BTH9_hIulIndSOcYj1F_lCyMrEGfsaYW",
5 |     "pspnet-resnet50-cityscapes": "1EwX6UxMGeiNi29XL9IS0WUa4KZ7lkbI2"
6 | 
7 | }


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/data/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/ade20k_classes.txt:
--------------------------------------------------------------------------------
  1 | wall
  2 | building
  3 | sky
  4 | floor
  5 | tree
  6 | ceiling
  7 | road
  8 | bed
  9 | windowpane
 10 | grass
 11 | cabinet
 12 | sidewalk
 13 | person
 14 | earth
 15 | door
 16 | table
 17 | mountain
 18 | plant
 19 | curtain
 20 | chair
 21 | car
 22 | water
 23 | painting
 24 | sofa
 25 | shelf
 26 | house
 27 | sea
 28 | mirror
 29 | rug
 30 | field
 31 | armchair
 32 | seat
 33 | fence
 34 | desk
 35 | rock
 36 | wardrobe
 37 | lamp
 38 | bathtub
 39 | railing
 40 | cushion
 41 | base
 42 | box
 43 | column
 44 | signboard
 45 | chest of drawers
 46 | counter
 47 | sand
 48 | sink
 49 | skyscraper
 50 | fireplace
 51 | refrigerator
 52 | grandstand
 53 | path
 54 | stairs
 55 | runway
 56 | case
 57 | pool table
 58 | pillow
 59 | screen door
 60 | stairway
 61 | river
 62 | bridge
 63 | bookcase
 64 | blind
 65 | coffee table
 66 | toilet
 67 | flower
 68 | book
 69 | hill
 70 | bench
 71 | countertop
 72 | stove
 73 | palm
 74 | kitchen island
 75 | computer
 76 | swivel chair
 77 | boat
 78 | bar
 79 | arcade machine
 80 | hovel
 81 | bus
 82 | towel
 83 | light
 84 | truck
 85 | tower
 86 | chandelier
 87 | awning
 88 | streetlight
 89 | booth
 90 | television receiver
 91 | airplane
 92 | dirt track
 93 | apparel
 94 | pole
 95 | land
 96 | bannister
 97 | escalator
 98 | ottoman
 99 | bottle
100 | buffet
101 | poster
102 | stage
103 | van
104 | ship
105 | fountain
106 | conveyer belt
107 | canopy
108 | washer
109 | plaything
110 | swimming pool
111 | stool
112 | barrel
113 | basket
114 | waterfall
115 | tent
116 | bag
117 | minibike
118 | cradle
119 | oven
120 | ball
121 | food
122 | step
123 | tank
124 | trade name
125 | microwave
126 | pot
127 | animal
128 | bicycle
129 | lake
130 | dishwasher
131 | screen
132 | blanket
133 | sculpture
134 | hood
135 | sconce
136 | vase
137 | traffic light
138 | tray
139 | ashcan
140 | fan
141 | pier
142 | crt screen
143 | plate
144 | monitor
145 | bulletin board
146 | shower
147 | radiator
148 | glass
149 | clock
150 | flag


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/ade20k_colors.txt:
--------------------------------------------------------------------------------
  1 | 120 120 120
  2 | 180 120 120
  3 | 6 230 230
  4 | 80 50 50
  5 | 4 200 3
  6 | 120 120 80
  7 | 140 140 140
  8 | 204 5 255
  9 | 230 230 230
 10 | 4 250 7
 11 | 224 5 255
 12 | 235 255 7
 13 | 150 5 61
 14 | 120 120 70
 15 | 8 255 51
 16 | 255 6 82
 17 | 143 255 140
 18 | 204 255 4
 19 | 255 51 7
 20 | 204 70 3
 21 | 0 102 200
 22 | 61 230 250
 23 | 255 6 51
 24 | 11 102 255
 25 | 255 7 71
 26 | 255 9 224
 27 | 9 7 230
 28 | 220 220 220
 29 | 255 9 92
 30 | 112 9 255
 31 | 8 255 214
 32 | 7 255 224
 33 | 255 184 6
 34 | 10 255 71
 35 | 255 41 10
 36 | 7 255 255
 37 | 224 255 8
 38 | 102 8 255
 39 | 255 61 6
 40 | 255 194 7
 41 | 255 122 8
 42 | 0 255 20
 43 | 255 8 41
 44 | 255 5 153
 45 | 6 51 255
 46 | 235 12 255
 47 | 160 150 20
 48 | 0 163 255
 49 | 140 140 140
 50 | 250 10 15
 51 | 20 255 0
 52 | 31 255 0
 53 | 255 31 0
 54 | 255 224 0
 55 | 153 255 0
 56 | 0 0 255
 57 | 255 71 0
 58 | 0 235 255
 59 | 0 173 255
 60 | 31 0 255
 61 | 11 200 200
 62 | 255 82 0
 63 | 0 255 245
 64 | 0 61 255
 65 | 0 255 112
 66 | 0 255 133
 67 | 255 0 0
 68 | 255 163 0
 69 | 255 102 0
 70 | 194 255 0
 71 | 0 143 255
 72 | 51 255 0
 73 | 0 82 255
 74 | 0 255 41
 75 | 0 255 173
 76 | 10 0 255
 77 | 173 255 0
 78 | 0 255 153
 79 | 255 92 0
 80 | 255 0 255
 81 | 255 0 245
 82 | 255 0 102
 83 | 255 173 0
 84 | 255 0 20
 85 | 255 184 184
 86 | 0 31 255
 87 | 0 255 61
 88 | 0 71 255
 89 | 255 0 204
 90 | 0 255 194
 91 | 0 255 82
 92 | 0 10 255
 93 | 0 112 255
 94 | 51 0 255
 95 | 0 194 255
 96 | 0 122 255
 97 | 0 255 163
 98 | 255 153 0
 99 | 0 255 10
100 | 255 112 0
101 | 143 255 0
102 | 82 0 255
103 | 163 255 0
104 | 255 235 0
105 | 8 184 170
106 | 133 0 255
107 | 0 255 92
108 | 184 0 255
109 | 255 0 31
110 | 0 184 255
111 | 0 214 255
112 | 255 0 112
113 | 92 255 0
114 | 0 224 255
115 | 112 224 255
116 | 70 184 160
117 | 163 0 255
118 | 153 0 255
119 | 71 255 0
120 | 255 0 163
121 | 255 204 0
122 | 255 0 143
123 | 0 255 235
124 | 133 255 0
125 | 255 0 235
126 | 245 0 255
127 | 255 0 122
128 | 255 245 0
129 | 10 190 212
130 | 214 255 0
131 | 0 204 255
132 | 20 0 255
133 | 255 255 0
134 | 0 153 255
135 | 0 41 255
136 | 0 255 204
137 | 41 0 255
138 | 41 255 0
139 | 173 0 255
140 | 0 245 255
141 | 71 0 255
142 | 122 0 255
143 | 0 255 184
144 | 0 92 255
145 | 184 255 0
146 | 0 133 255
147 | 255 214 0
148 | 25 194 194
149 | 102 255 0
150 | 92 0 255


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/cityscapes_classes.txt:
--------------------------------------------------------------------------------
 1 | road
 2 | sidewalk
 3 | building
 4 | wall
 5 | fence
 6 | pole
 7 | traffic light
 8 | traffic sign
 9 | vegetation
10 | terrain
11 | sky
12 | person
13 | rider
14 | car
15 | truck
16 | bus
17 | train
18 | motorcycle
19 | bicycle


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/cityscapes_colors.txt:
--------------------------------------------------------------------------------
 1 | 128 64 128
 2 | 244 35 232
 3 | 70 70 70
 4 | 102 102 156
 5 | 190 153 153
 6 | 153 153 153
 7 | 250 170 30
 8 | 220 220 0
 9 | 107 142 35
10 | 152 251 152
11 | 70 130 180
12 | 220 20 60
13 | 255 0 0
14 | 0 0 142
15 | 0 0 70
16 | 0 60 100
17 | 0 80 100
18 | 0 0 230
19 | 119 11 32


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/voc2012_classes.txt:
--------------------------------------------------------------------------------
 1 | background
 2 | aeroplane
 3 | bicycle
 4 | bird
 5 | boat
 6 | bottle
 7 | bus
 8 | car
 9 | cat
10 | chair
11 | cow
12 | diningtable
13 | dog
14 | horse
15 | motorbike
16 | person
17 | pottedplant
18 | sheep
19 | sofa
20 | train
21 | tvmonitor


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/data/voc2012_colors.txt:
--------------------------------------------------------------------------------
 1 | 0 0 0
 2 | 128 0 0
 3 | 0 128 0
 4 | 128 128 0
 5 | 0 0 128
 6 | 128 0 128
 7 | 0 128 128
 8 | 128 128 128
 9 | 64 0 0
10 | 192 0 0
11 | 64 128 0
12 | 192 128 0
13 | 64 0 128
14 | 192 0 128
15 | 64 128 128
16 | 192 128 128
17 | 0 64 0
18 | 128 64 0
19 | 0 192 0
20 | 128 192 0
21 | 0 64 128


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/16.jpg


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/16_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/16_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/16_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/16_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/ade20k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/ade20k.jpg


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/ade20k_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/ade20k_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/ade20k_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/ade20k_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/cityscape.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/cityscape.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/cityscapes_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/cityscapes_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/cityscapes_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/cityscapes_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/pascal_voc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/pascal_voc.jpg


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/pascal_voc_blend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/pascal_voc_blend.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/examples/pascal_voc_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/examples/pascal_voc_map.png


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/models/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/models/pspnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn 
  3 | import torch.nn.functional as F  
  4 | 
  5 | from .backbone import * 
  6 | 
  7 | __extractors__ = {
  8 |     "resnet18" : resnet18,
  9 |     "resnet34" : resnet34,
 10 |     "resnet50" : resnet50,
 11 |     "resnet101" : resnet101,
 12 |     "resnet152" : resnet152
 13 | }
 14 | 
 15 | class PPM(nn.Module):
 16 |     
 17 |     """The Pyramid Pooling Module""" 
 18 |     
 19 |     def __init__(self, input_dims, reduction_dims, scales):
 20 | 
 21 |         super(PPM, self).__init__()
 22 | 
 23 |         self.features = []
 24 |         for scale in scales:
 25 |             self.features.append(nn.Sequential(
 26 |                 nn.AdaptiveAvgPool2d(scale),
 27 |                 nn.Conv2d(input_dims, reduction_dims, 1, bias=False),
 28 |                 nn.BatchNorm2d(reduction_dims),
 29 |                 nn.ReLU(inplace=True)
 30 |             ))
 31 |         self.features = nn.ModuleList(self.features)
 32 | 
 33 |     def forward(self, x):
 34 |         x_size = x.size()
 35 |         result = [x]
 36 |         for feature in self.features:
 37 |             result.append(
 38 |                 F.interpolate(feature(x), size=x_size[2:], mode="bilinear", align_corners=True)
 39 |             )
 40 | 
 41 |         result = torch.cat(result, 1)
 42 |         
 43 |         return result
 44 | 
 45 | class PSPNet_model(nn.Module):
 46 | 
 47 |     """ The main PSPNet Module""" 
 48 | 
 49 |     def __init__(self, extractor="resnet50", scales=[1,2,3,6], 
 50 |         dropout=0.1, num_classes=21, zoom_factor=8,
 51 |         criterion=nn.CrossEntropyLoss(ignore_index=255), pretrained=True):
 52 | 
 53 |         super(PSPNet_model, self).__init__()
 54 | 
 55 |         if len(scales)%4 != 0:
 56 |             raise ValueError("len of scales should be 4 but got ", len(scales))
 57 |         if num_classes <= 1:
 58 |             raise ValueError("num_classes should be > 1 but found ", num_classes)
 59 |         if zoom_factor not in [1, 2, 4, 8]:
 60 |             raise ValueError("zoom_factor should be in [1, 2, 4, 8] but got ", zoom_factor)
 61 | 
 62 |         self.extractor = extractor
 63 |         self.scales = scales
 64 |         self.dropout = dropout
 65 |         self.num_classes = num_classes
 66 |         self.zoom_factor = zoom_factor
 67 |         self.criterion = criterion
 68 |         self.pretrained = pretrained
 69 | 
 70 |         backbone = __extractors__[self.extractor](False)
 71 |         
 72 |         # build the layers
 73 |         self.layer0 = nn.Sequential(
 74 |             backbone.conv1,           
 75 |             backbone.bn1,
 76 |             backbone.relu,
 77 |             backbone.conv2,
 78 |             backbone.bn2,
 79 |             backbone.relu,
 80 |             backbone.conv3,
 81 |             backbone.bn3, 
 82 |             backbone.relu,
 83 |             backbone.maxpool,
 84 |         )
 85 |         self.layer1 = backbone.layer1
 86 |         self.layer2 = backbone.layer2
 87 |         self.layer3 = backbone.layer3
 88 |         self.layer4 = backbone.layer4
 89 | 
 90 |         for n, m in self.layer3.named_modules():
 91 |             if "conv2" in n:
 92 |                 m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1)
 93 |             elif "downsample.0" in n:
 94 |                 m.stride = (1, 1)
 95 |         for n, m in self.layer4.named_modules():
 96 |             if "conv2" in n:
 97 |                 m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1)
 98 |             elif "downsample.0" in n:
 99 |                 m.stride = (1, 1)
100 |         
101 |         feature_dims = 2048 
102 | 
103 |         self.ppm = PPM(feature_dims, int(feature_dims/len(scales)), scales)
104 |         feature_dims *= 2
105 | 
106 |         self.cls = nn.Sequential(
107 |             nn.Conv2d(feature_dims, 512, kernel_size=3, padding=1, bias=False),
108 |             nn.BatchNorm2d(512),
109 |             nn.ReLU(inplace=True),
110 |             nn.Dropout2d(p=dropout),
111 |             nn.Conv2d(512, num_classes, kernel_size=1)
112 |         )
113 | 
114 |         if not self.pretrained:
115 |             self.aux = nn.Sequential(
116 |                 nn.Conv2d(1024, 256, kernel_size=3, padding=1, bias=False),
117 |                 nn.BatchNorm2d(256),
118 |                 nn.ReLU(inplace=True),
119 |                 nn.Dropout2d(p=dropout),
120 |                 nn.Conv2d(256, self.num_classes, kernel_size=1)
121 |             )
122 |     
123 | 
124 |     def forward(self, x, y=None):
125 |     
126 |         x_size = x.shape
127 | 
128 |         assert (x_size[2] - 1) % 8 == 0 and (x_size[3] - 1) % 8 == 0
129 |         
130 |         h = int((x_size[2] - 1) / 8 * self.zoom_factor + 1)
131 |         w = int((x_size[3] - 1) / 8 * self.zoom_factor + 1)
132 | 
133 |         x = self.layer0(x)
134 |         x = self.layer1(x)
135 |         x = self.layer2(x)
136 |         x_aux = self.layer3(x) # for aux loss during training
137 |         x = self.layer4(x_aux)
138 | 
139 |         x = self.ppm(x)
140 |         
141 |         x = self.cls(x)
142 | 
143 |         if self.zoom_factor != 1:
144 |             x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=True)
145 |         
146 |         if not self.pretrained:
147 |             aux = self.aux(x_aux)
148 |             if self.zoom_factor != 1:
149 |                 x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=True)
150 | 
151 |             main_loss = self.criterion(x, y)
152 |             aux_loss = self.criterion(aux, y)
153 | 
154 |             return x.max(1)[1], main_loss, aux_loss
155 |         
156 |         else:
157 |             
158 |             return x
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/readme.md:
--------------------------------------------------------------------------------
  1 | # Pyramid Scene Parsing Network (PSPNet)
  2 | 
  3 | PSPNet is an Image segmentation architecture that achieves high degrees of performance and accuracy on segmentation tasks. It uses a *Pyramid Pooling Module* to aggregate global image context information to make better predictions.
  4 | 
  5 | Read the paper [here](https://arxiv.org/pdf/1612.01105.pdf).
  6 | Check the [summary](#summary) for implementation details or [Usage](#Usage) for Usage details.
  7 | 
  8 | Check out this [notebook](https://github.com/pranjaldatta/PyVision/blob/master/demo/segmentation/pspnet/pspnet_demo.ipynb) to see how easily you can use PSPNet in 3-4 lines!
  9 | 
 10 | If the above link does not work, please look [here](https://nbviewer.jupyter.org/github/pranjaldatta/PyVision/blob/master/demo/segmentation/pspnet/pspnet_demo.ipynb).
 11 | 
 12 | ## Summary
 13 | 
 14 | This implementation makes use of pretrained models provided by the authors [here](https://github.com/hszhao/semseg). Currently, PyVision PSPNet supports the models listed below.
 15 | 
 16 | | Model | Backbone | Dataset | mIoU/mAcc/aAcc (Single Scale)| mIoU/mAcc/aAcc (Multi Scale) |
 17 | ----|---|----|----|----|
 18 | | pspnet-resnet50-voc2012 | Resnet50 | VOC2012 | 0.7705/0.8513/0.9489 | 0.7802/0.8580/0.9513 |
 19 | | pspnet-resnet101-voc2012| Resnet101 | VOC2012 | 0.7907/0.8636/0.9534 | 0.7963/0.8677/0.9550  |
 20 | | pspnet-resnet50-ade20k | Resnet50 | ADE20k | 0.4189/0.5227/0.8039 | 0.4284/0.5266/0.8106 |
 21 | | pspnet-resnet50-cityscapes | Resnet50 | Cityscapes | 0.7730/0.8431/0.9597 | 0.7838/0.8486/0.9617|
 22 | 
 23 | ### Note regarding Implementation
 24 | 
 25 | **Downsampling**: The network as trained by the authors operate on Input images rescaled to 473x473. But when run without gpu, this configuration is computationally intensive. To resolve this situation, the implementation automatically downsamples the input image to a much smaller resolution of 225x225 **if** gpu is not available. On the other hand, **if** a gpu is available, this downsampling behavior is prevented. If explicitly needed, the user can overide this default behavior by passing a boolean (True or False) to the **downsample** parameter in the *PSPNet* constructor. (i.e. if downsample=True, downsampling behavior is enabled and vice versa)
 26 | 
 27 | **Class Names**: Often the user may just want the pixel wise prediction matrix (wherein every value in the matrix denotes the *index* of the class to which the corresponding pixel belongs), in that case, the user would also need the *class names* list on which the indices are based. To get that simply do, 
 28 | 
 29 | ```python
 30 | classlist = model.class_names()
 31 | ```
 32 | 
 33 | ## Usage
 34 | 
 35 | For more details, go through the docstrings/source code.
 36 | 
 37 | **Brief**
 38 | 
 39 | The model setup is done via the PSPNet class exposed via *pyvision.segmentation.pspnet*. All model related configuration parameters can be configured from the class constructor.
 40 | 
 41 | Inference is run through the *inference()* method.
 42 | 
 43 | **Quick Start**
 44 | 
 45 | - To use the default *pspnet-resnet50-voc2012* model,  
 46 | 
 47 | ```python
 48 | from pyvision.segmentation import pspnet
 49 | 
 50 | # the model constructor
 51 | # setting device=gpu and downsample=False.
 52 | # Normally explicit setting of downsample=False
 53 | # isnt needed as it is automatically handled.
 54 | # This is just for demo purposes
 55 | m = pspnet.PSPNet(device="gpu", downsample=False)
 56 | 
 57 | preds, color_img, blend_img = m.inference("<path to img or img in PIL or array format", save="result")
 58 | 
 59 | ```
 60 | 
 61 | - To use *pspnet-resnet50-ade20k* model,
 62 | 
 63 | ```python
 64 | from pyvision.segmentation import pspnet
 65 | 
 66 | # the model constructor
 67 | m = pspnet.PSPNet(device="gpu", downsample=False)
 68 | 
 69 | preds, color_img, blend_img = m.inference("<path to img or img in PIL or array format", save="result")
 70 | 
 71 | ```
 72 | 
 73 | - To list available models run,
 74 | 
 75 | ```python
 76 | from pyvision.segmentation import pspnet
 77 | 
 78 | print(pspnet.models_available())
 79 | ```
 80 | 
 81 | - To run tests, from repo root, run,
 82 | 
 83 | ```shell
 84 | $ python tests/segmentation/pspnet/pspnet_test.py
 85 | ```
 86 | 
 87 | ## Examples
 88 | 
 89 | **Dataset: VOC2012**
 90 | 
 91 | |Original Image|Segmentation Map| Blend Image|
 92 | -----|-----|-----|
 93 | |<img src="examples/16.jpg" height=200 width=200>|<img src="examples/16_map.png" height=200 width=200>| <img src="examples/16_blend.png" height=200 widht=200>|
 94 | |<img src="examples/pascal_voc.jpg" height=200 width=200>|<img src="examples/pascal_voc_map.png" height=200 width=200>| <img src="examples/pascal_voc_blend.png" height=200 width=200>|
 95 | 
 96 | **Dataset: ADE20k**
 97 | 
 98 | |Original Image|Segmentation Map| Blend Image|
 99 | -----|-----|-----|
100 | |<img src="examples/ade20k.jpg" height=200 width=200>|<img src="examples/ade20k_map.png" height=200 width=200>| <img src="examples/ade20k_blend.png" height=200 widht=200>|
101 | 
102 | **Dataset: Cityscapes**
103 | 
104 | |Original Image|Segmentation Map| Blend Image|
105 | -----|-----|-----|
106 | |<img src="examples/cityscape.png" height=200 width=200>|<img src="examples/cityscapes_map.png" height=200 width=200>| <img src="examples/cityscapes_blend.png" height=200 widht=200>|
107 | 


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/pyvision/segmentation/pspnet/util/__init__.py


--------------------------------------------------------------------------------
/pyvision/segmentation/pspnet/util/utils.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import numpy as np
3 | 
4 | def make_color_seg_map(seg_map_np, palette):
5 |     color_img = Image.fromarray(seg_map_np.astype(np.uint8)).convert('P')
6 |     color_img.putpalette(palette)
7 |     return color_img


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | gdown
3 | numpy
4 | opencv-python==4.1.1
5 | matplotlib
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | def read_readme():
 4 |     with open("README.md") as fp:
 5 |         long_desc = fp.read()
 6 |     return long_desc
 7 | 
 8 | setup(
 9 |     name = "pyvision",
10 |     version = "1.0.0",
11 |     author = "Pranjal Datta",
12 |     description = ("Ready-to-use implementations of some of the most common "
13 |                 "computer vision algorithms."),
14 |     license = "MIT",
15 |     long_description = read_readme(),
16 |     url = "https://github.com/pranjaldatta/PyVision",
17 | 
18 |     packages = find_packages(),
19 |     include_package_data = True,
20 | )


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH='.'
2 | python tests/detection/yolov3/yolo_test.py
3 | 


--------------------------------------------------------------------------------
/tests/detection/detr/cars_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/detr/cars_test.jpg


--------------------------------------------------------------------------------
/tests/detection/detr/detr_test.py:
--------------------------------------------------------------------------------
 1 | from pyvision.detection import detr
 2 | import time
 3 | 
 4 | 
 5 | imgs = ["tests/detection/detr/cars_test.jpg", "tests/detection/detr/zebra_test.jpg"]
 6 | 
 7 | print(detr.available_models()) # show available models
 8 | 
 9 | # testing on defualt detr-resnet50 
10 | detr_object = detr.DETR(show=False) # make show True to see detections
11 | print("Testing with detr-resnet50")
12 | print("-"*50)
13 | start_time = time.time()
14 | for img in imgs:
15 |     _, objs = detr_object.detect(img)
16 |     print("No. of detections: ", len(objs))
17 |     print("-"*50)
18 | 
19 | print("Total detection time: ", time.time() - start_time)
20 | print("-"*50, end="\n\n")
21 | 
22 | # testing on detr-resnet101
23 | detr_object = detr.DETR(model="detr-resnet101", show=False)
24 | print("Testing with detr-resnet101")
25 | print("-"*50)
26 | start_time = time.time()
27 | for img in imgs:
28 |     _, objs = detr_object.detect(img)
29 |     print("No. of detections: ", len(objs))
30 |     print("-"*50)
31 | 
32 | print("Total detection time: ", time.time() - start_time)
33 | print("-"*50)


--------------------------------------------------------------------------------
/tests/detection/detr/zebra_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/detr/zebra_test.jpg


--------------------------------------------------------------------------------
/tests/detection/effdet/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/effdet/2.jpg


--------------------------------------------------------------------------------
/tests/detection/effdet/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/effdet/3.jpg


--------------------------------------------------------------------------------
/tests/detection/effdet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/effdet/__init__.py


--------------------------------------------------------------------------------
/tests/detection/effdet/test_effdet.py:
--------------------------------------------------------------------------------
 1 | import cv2 
 2 | from PIL import Image
 3 | from pyvision.detection import efficientdet
 4 | 
 5 | model = efficientdet.EfficientDet("coco", thresh=0.95)
 6 | 
 7 | img1 = cv2.imread("tests/detection/effdet/2.jpg")
 8 | img2 = cv2.imread("tests/detection/effdet/3.jpg")
 9 | 
10 | imgs = [img1, img2]
11 | 
12 | for img in imgs:
13 |     img = cv2.resize(img, (416, 416))
14 |     res = model.detect(img)
15 |     cv2.imshow("Frame", res[0])
16 |     if cv2.waitKey() == ord('q'):
17 |         continue


--------------------------------------------------------------------------------
/tests/detection/yolov3/cars_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/yolov3/cars_test.jpg


--------------------------------------------------------------------------------
/tests/detection/yolov3/yolo_test.py:
--------------------------------------------------------------------------------
 1 | from pyvision.detection import yolov3
 2 | import time
 3 | 
 4 | 
 5 | imgs = ["tests/detection/yolov3/cars_test.jpg", "tests/detection/yolov3/zebra_test.jpg"]
 6 | 
 7 | print(yolov3.available_models()) # show available models
 8 | 
 9 | # testing on defualt yolov3-416 
10 | yolo = yolov3.YOLOv3(show=False) # make show True to see detections
11 | print("Testing with yolov3-416")
12 | print("-"*50)
13 | start_time = time.time()
14 | for img in imgs:
15 |     _, objs = yolo.detect(img)
16 |     print("No. of detections: ", len(objs))
17 |     print("-"*50)
18 | 
19 | print("Total detection time: ", time.time() - start_time)
20 | print("-"*50, end="\n\n")
21 | 
22 | # testing on yolov3-tiny
23 | yolo = yolov3.YOLOv3(model="yolov3-tiny", show=False)
24 | print("Testing with yolov3-tiny")
25 | print("-"*50)
26 | start_time = time.time()
27 | for img in imgs:
28 |     _, objs = yolo.detect(img)
29 |     print("No. of detections: ", len(objs))
30 |     print("-"*50)
31 | 
32 | print("Total detection time: ", time.time() - start_time)
33 | print("-"*50)


--------------------------------------------------------------------------------
/tests/detection/yolov3/zebra_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/detection/yolov3/zebra_test.jpg


--------------------------------------------------------------------------------
/tests/face_detection/facenet/imgs/BarackObama.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/imgs/BarackObama.jpeg


--------------------------------------------------------------------------------
/tests/face_detection/facenet/imgs/ManojBajpayee.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/imgs/ManojBajpayee.jpeg


--------------------------------------------------------------------------------
/tests/face_detection/facenet/imgs/MarkZuckerberg.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/imgs/MarkZuckerberg.jpeg


--------------------------------------------------------------------------------
/tests/face_detection/facenet/multiple_img.py:
--------------------------------------------------------------------------------
 1 | from pyvision.face_detection.facenet import Facenet
 2 | 
 3 | # In this example, we take all the imgs from the ./imgs folder and 
 4 | # generate embeddings for them. We also associate each embedding with their
 5 | # filename which act as 'true labels'. Then we use these embeddings to 'classify'
 6 | # whether a supplied image belongs to any one of given categories
 7 | 
 8 | # First we instantiate the facenet object. saveLoc is the path to the
 9 | # folder wherein the embeddings will be saved. By default it will be saved
10 | # as "embeddings.pkl" but can be changed with the "saveName" param
11 | fc = Facenet(saveLoc="save", saveName="embeddings2.pkl")
12 | 
13 | embeddings = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs")
14 | 
15 | did_match, preds, loss = fc.compare_embeddings(
16 |     img="demo/face_detection/facenet/zucktest.jpeg",
17 |     embedLoc="save/embeddings2.pkl",
18 |     embeddings=None,
19 |     label="MarkZuckerberg"
20 | )
21 | print(did_match, preds, loss)
22 | print("For 'True' Image, we get: ", did_match)
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/face_detection/facenet/single_img.py:
--------------------------------------------------------------------------------
 1 | from pyvision.face_detection.facenet import Facenet
 2 | 
 3 | # In this example we take a single image from the ./imgs folder
 4 | # Generate embeddings and store them. Then use those embeddings to 
 5 | # check whether a previously unseen image is classified accurately or not
 6 | 
 7 | 
 8 | # First we instantiate the facenet object. saveLoc is the path to the
 9 | # folder wherein the embeddings will be saved. By default it will be saved
10 | # as "embeddings.pkl" but can be changed with the "saveName" param
11 | fc = Facenet(saveLoc="save/") 
12 | 
13 | # generate embeds
14 | _ = fc.generate_embeddings(img=None, path="demo/face_detection/facenet/imgs/BarackObama.jpeg", label="Barack Obama")
15 | 
16 | # now we compare it against a "False" image 
17 | did_match, pred, loss = fc.compare_embeddings(None, img="demo/face_detection/facenet/imgs/ManojBajpayee.jpeg", label="Barack Obama", embedLoc="save/embeddings.pkl")
18 | print(did_match, pred, loss)
19 | print("Comparing against 'False' image, we get: ", did_match)


--------------------------------------------------------------------------------
/tests/face_detection/facenet/zucktest.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/face_detection/facenet/zucktest.jpeg


--------------------------------------------------------------------------------
/tests/gans/deep_convolutional_gan/gan_test.py:
--------------------------------------------------------------------------------
1 | from pyvision.gans.deep_convolutional_gan import DeepConvGAN
2 | 
3 | ''' Initializing the DC_GAN module with the necessary paths '''
4 | DeepConvGAN.inference(DeepConvGAN, set_weight_dir = 'dcgan-model.pth', set_gen_dir='result_img')
5 | 


--------------------------------------------------------------------------------
/tests/gans/wasserstein_gan/gan_test.py:
--------------------------------------------------------------------------------
1 | from pyvision.gans.wasserstein_gan import WassGAN
2 | 
3 | wgan = WassGAN()
4 | wgan.inference(set_gen_dir="gen")


--------------------------------------------------------------------------------
/tests/misc/NeuralStyleTransfer/nst_test.py:
--------------------------------------------------------------------------------
 1 | #to run
 2 | from pyvision.misc.NeuralStyleTransfer import NeuralStyle
 3 | 
 4 | __PREFIX__ = "pyvision/misc/NeuralStyleTransfer/Examples/"
 5 | #provide the paths to the two images
 6 | style_img, content_img = (__PREFIX__+'images/style1.jpg', __PREFIX__+'images/content2.jpg')
 7 | 
 8 | #if you do not wish to use gpu, pass use_gpu=False as a parameter, i.e., nst=Neural_Style(num_steps=300, use_gpu=False)
 9 | nst = NeuralStyle(num_steps=300, retain_dims=False)
10 | 
11 | #call the function to run neural style transfer
12 | output, time = nst.run_style_transfer(style_img, content_img)
13 | print("time taken: ", time)
14 | 
15 | 


--------------------------------------------------------------------------------
/tests/misc/mtcnn/images/class2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/class2.jpg


--------------------------------------------------------------------------------
/tests/misc/mtcnn/images/designated-survivor-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/designated-survivor-2.jpg


--------------------------------------------------------------------------------
/tests/misc/mtcnn/images/person1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/person1.jpeg


--------------------------------------------------------------------------------
/tests/misc/mtcnn/images/scenery.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/scenery.jpeg


--------------------------------------------------------------------------------
/tests/misc/mtcnn/images/test5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/mtcnn/images/test5.jpg


--------------------------------------------------------------------------------
/tests/misc/mtcnn/mtcnn_test.py:
--------------------------------------------------------------------------------
 1 | from pyvision.misc.mtcnn import MTCNN
 2 | from pyvision.misc.mtcnn.utils.visualize import show_boxes, _show_boxes
 3 | from PIL import Image
 4 | import cv2
 5 | from glob import glob
 6 | 
 7 | 
 8 | a = [glob("tests/misc/mtcnn/images/*.{}".format(s)) for s in ["jpg", "jpeg", "png"]]
 9 | imgs = [i for ai in a for i in ai]
10 | 
11 | mtcnn = MTCNN()
12 | for img in imgs:
13 |     img = Image.open(img)
14 |     b = mtcnn.detect(img)
15 |     try:
16 |         img = show_boxes(img, b)
17 |     except:
18 |         img = _show_boxes(img, b)
19 | 
20 |     img.show()


--------------------------------------------------------------------------------
/tests/misc/mtcnn/net_test.py:
--------------------------------------------------------------------------------
 1 | from mtcnn.nets import ONet, PNet, RNet, FlattenTensorCustom
 2 | import torch
 3 | import numpy as np  
 4 | from colorama import Fore
 5 | 
 6 | 
 7 | 
 8 | pnet = PNet()
 9 | pnet.summary()
10 | 
11 | print("-"*50)
12 | 
13 | t = FlattenTensorCustom()
14 | ar = np.random.rand(64, 3, 32, 32)
15 | tensor = torch.FloatTensor(ar)
16 | tensor = t(tensor)
17 | if list(tensor.shape) == [64, 3*32*32]:
18 |     pass
19 | else:
20 |     print(tensor.shape)
21 |     print(Fore.RED+"ERROR: at FlattenTensorCustom Test"+Fore.RESET)
22 |     exit()
23 | 
24 | print("-"*50) 
25 | 
26 | rnet = RNet()
27 | rnet.summary()
28 | 
29 | print("-"*50) 
30 | 
31 | onet = ONet()
32 | onet.summary()


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_gaussian/denoised_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/denoised_1.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_gaussian/denoised_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/denoised_2.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_gaussian/denoised_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/denoised_3.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_gaussian/source_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/source_1.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_gaussian/source_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/source_2.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_gaussian/source_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_gaussian/source_3.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_text/denoised_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/denoised_1.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_text/denoised_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/denoised_2.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_text/denoised_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/denoised_3.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_text/source_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/source_1.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_text/source_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/source_2.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/Output_text/source_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/Output_text/source_3.png


--------------------------------------------------------------------------------
/tests/misc/noise2noise/n2n_test.py:
--------------------------------------------------------------------------------
1 | from pyvision.misc.noise2noise import Noise2Noise
2 | import os
3 | 
4 | #data_path = <Path to directory containing images>
5 | data_path = os.getcwd() + "/tests/misc/noise2noise/test_images"
6 | 
7 | #noise types: gaussian, text
8 | n2n = Noise2Noise(noise='text',data_path=data_path,mode='test')
9 | 


--------------------------------------------------------------------------------
/tests/misc/noise2noise/test.py:
--------------------------------------------------------------------------------
 1 | from pyvision.misc.noise2noise.model import Noise2Noise
 2 | import cv2  
 3 | from PIL import Image
 4 | import numpy as np
 5 | 
 6 | def gaussian_noise(img):
 7 |     '''
 8 |     Add Gaussian noise in dataset
 9 |     Input: img of type PIL.Image
10 |     Output: Noisy mage of type PIL.Image
11 |     '''
12 |     w,h = img.size
13 |     c = len(img.getbands())
14 | 
15 |     sigma = np.random.uniform(20,50)
16 |     gauss = np.random.normal(10,25,(h,w,c))
17 |     noisy = np.array(img) + gauss
18 |         
19 |     #Values less than 0 become 0 and more than 255 become 255
20 |     noisy = np.clip(noisy, 0, 255).astype(np.uint8)
21 |     img = Image.fromarray(noisy)
22 | 
23 |     return img
24 | 
25 | n2n = Noise2Noise(noise="gaussian")
26 | 
27 | img_path = "/home/pranjal/Projects/clone/PyVision/tests/misc/noise2noise/test_images/test.jpg"
28 | img = Image.open(img_path)
29 | img = gaussian_noise(img)
30 | 
31 | img.show()
32 | img.save("noised.png")
33 | n2n.inference(img, show=False, save="denoised.png")


--------------------------------------------------------------------------------
/tests/misc/noise2noise/test_images/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/test_images/test.jpg


--------------------------------------------------------------------------------
/tests/misc/noise2noise/test_images/test1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/test_images/test1.jpg


--------------------------------------------------------------------------------
/tests/misc/noise2noise/test_images/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/misc/noise2noise/test_images/test2.jpg


--------------------------------------------------------------------------------
/tests/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pranjaldatta/PyVision/ad57b27cf790c267772402e47bd9e140ba6f549e/tests/readme.md


--------------------------------------------------------------------------------
/tests/segmentation/fcn/fcn101.py:
--------------------------------------------------------------------------------
 1 | from pyvision.segmentation import fcn
 2 | from glob import glob
 3 | 
 4 | fcn_model = fcn.FCN(model="fcn-resnet101-coco", device="cpu", show=False)
 5 | 
 6 | for idx, item in enumerate(glob("pyvision/segmentation/fcn/examples/*.jpg")):
 7 |     print(f"#### Image #{idx+1} ####")
 8 |     preds, seg_map, blend_map = fcn_model.inference(item, save=item.split(".")[0]+"_101")
 9 |     print("Prediction matrix shape: ", preds.shape)
10 |     print("Segmentation Map shape: ", seg_map.size)
11 |     print("Blend Map shape: ", blend_map.size)


--------------------------------------------------------------------------------
/tests/segmentation/fcn/fcn50.py:
--------------------------------------------------------------------------------
 1 | from pyvision.segmentation import fcn
 2 | from glob import glob
 3 | 
 4 | fcn_model = fcn.FCN(device="cpu", show=False)
 5 | 
 6 | for idx, item in enumerate(glob("pyvision/segmentation/fcn/examples/*.jpg")):
 7 |     print(f"#### Image #{idx+1} ####")
 8 |     preds, seg_map, blend_map = fcn_model.inference(item, save=item.split(".")[0]+"_50")
 9 |     print("Prediction matrix shape: ", preds.shape)
10 |     print("Segmentation Map shape: ", seg_map.size)
11 |     print("Blend Map shape: ", blend_map.size)


--------------------------------------------------------------------------------
/tests/segmentation/pspnet/pspnet_test.py:
--------------------------------------------------------------------------------
1 | from  pyvision.segmentation.pspnet import PSPNet
2 | 
3 | m = PSPNet(model="pspnet-resnet50-ade20k")
4 | 
5 | m.inference("pyvision/segmentation/pspnet/examples/ade20k.jpg", save="ade20k")
6 | 


--------------------------------------------------------------------------------