├── schp
    ├── utils
    │   ├── __init__.py
    │   ├── consistency_loss.py
    │   ├── kl_loss.py
    │   ├── schp.py
    │   ├── warmup_scheduler.py
    │   ├── soft_dice_loss.py
    │   ├── transforms.py
    │   ├── miou.py
    │   ├── criterion.py
    │   ├── encoding.py
    │   └── lovasz_softmax.py
    ├── datasets
    │   ├── __init__.py
    │   ├── target_generation.py
    │   ├── simple_extractor_dataset.py
    │   └── datasets.py
    ├── requirements.txt
    ├── .gitignore
    ├── modules
    │   ├── __init__.py
    │   ├── src
    │   │   ├── checks.h
    │   │   ├── utils
    │   │   │   ├── checks.h
    │   │   │   ├── common.h
    │   │   │   └── cuda.cuh
    │   │   ├── inplace_abn.h
    │   │   ├── inplace_abn_cpu.cpp
    │   │   ├── inplace_abn.cpp
    │   │   ├── inplace_abn_cpu_only.cpp
    │   │   └── inplace_abn_cuda_half.cu
    │   ├── misc.py
    │   ├── dense.py
    │   ├── deeplab.py
    │   ├── bn.py
    │   ├── residual.py
    │   └── functions.py
    ├── networks
    │   ├── __init__.py
    │   ├── context_encoding
    │   │   ├── psp.py
    │   │   ├── aspp.py
    │   │   └── ocnet.py
    │   └── backbone
    │   │   ├── resnext.py
    │   │   ├── mobilenetv2.py
    │   │   └── resnet.py
    ├── LICENSE
    ├── environment.yaml
    ├── simple_extractor.py
    ├── README.md
    ├── evaluate.py
    └── train.py
├── requirements.txt
├── assets
    ├── demo2.jpg
    ├── demo3.jpg
    ├── demo2atr.png
    ├── demo2lip.png
    ├── demo3atr.png
    ├── demo3lip.png
    ├── atrexample.png
    ├── demo2pascal.png
    ├── demo3pascal.png
    ├── lipexample.png
    └── pascalexample.png
├── __init__.py
├── pyproject.toml
├── .github
    └── workflows
    │   └── publish.yml
├── HumanParserPascalCustomNode.py
├── HumanParserATRCustomNode.py
├── HumanParserLIPCustomNode.py
├── .gitignore
├── utils.py
└── README.md


/schp/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/schp/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/schp/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python==4.4.0.46
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | torch
3 | opencv-python
4 | numpy
5 | 


--------------------------------------------------------------------------------
/schp/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | 
3 | data/
4 | log/
5 | pretrain_model/
6 | 


--------------------------------------------------------------------------------
/assets/demo2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2.jpg


--------------------------------------------------------------------------------
/assets/demo3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3.jpg


--------------------------------------------------------------------------------
/assets/demo2atr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2atr.png


--------------------------------------------------------------------------------
/assets/demo2lip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2lip.png


--------------------------------------------------------------------------------
/assets/demo3atr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3atr.png


--------------------------------------------------------------------------------
/assets/demo3lip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3lip.png


--------------------------------------------------------------------------------
/assets/atrexample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/atrexample.png


--------------------------------------------------------------------------------
/assets/demo2pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2pascal.png


--------------------------------------------------------------------------------
/assets/demo3pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3pascal.png


--------------------------------------------------------------------------------
/assets/lipexample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/lipexample.png


--------------------------------------------------------------------------------
/assets/pascalexample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/pascalexample.png


--------------------------------------------------------------------------------
/schp/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .bn import ABN, InPlaceABN, InPlaceABNSync
2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
3 | from .misc import GlobalAvgPool2d, SingleGPU
4 | from .residual import IdentityResidualBlock
5 | from .dense import DenseModule
6 | 


--------------------------------------------------------------------------------
/schp/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from .AugmentCE2P import resnet101
 4 | 
 5 | __factory = {
 6 |     'resnet101': resnet101,
 7 | }
 8 | 
 9 | 
10 | def init_model(name, *args, **kwargs):
11 |     if name not in __factory.keys():
12 |         raise KeyError("Unknown model arch: {}".format(name))
13 |     return __factory[name](*args, **kwargs)


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | from .HumanParserLIPCustomNode import HumanParserLIPCustomNode
 2 | from .HumanParserATRCustomNode import HumanParserATRCustomNode
 3 | from .HumanParserPascalCustomNode import HumanParserPascalCustomNode
 4 | 
 5 | NODE_CLASS_MAPPINGS = {
 6 |   "Cozy Human Parser LIP" : HumanParserLIPCustomNode,
 7 |   "Cozy Human Parser ATR" : HumanParserATRCustomNode,
 8 |   "Cozy Human Parser Pascal" : HumanParserPascalCustomNode,
 9 | }
10 | 
11 | __all__ = ['NODE_CLASS_MAPPINGS']


--------------------------------------------------------------------------------
/schp/modules/src/checks.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
 6 | #ifndef AT_CHECK
 7 | #define AT_CHECK AT_ASSERT
 8 | #endif
 9 | 
10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
13 | 
14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)


--------------------------------------------------------------------------------
/schp/modules/src/utils/checks.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
 6 | #ifndef AT_CHECK
 7 | #define AT_CHECK AT_ASSERT
 8 | #endif
 9 | 
10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
13 | 
14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "human-parser-comfyui-node"
 3 | description = "A ComfyUI node to automatically extract masks for body regions and clothing/fashion items. Made with 💚 by the CozyMantis squad."
 4 | version = "1.0.0"
 5 | license = { file = "LICENSE.md" }
 6 | dependencies = ["ninja", "torch", "opencv-python", "numpy"]
 7 | 
 8 | [project.urls]
 9 | Repository = "https://github.com/cozymantis/human-parser-comfyui-node"
10 | #  Used by Comfy Registry https://comfyregistry.org
11 | 
12 | [tool.comfy]
13 | PublisherId = "cozymantis"
14 | DisplayName = "Human Body and Clothes Parser - Segmentation ComfyUI Node"
15 | Icon = ""
16 | 


--------------------------------------------------------------------------------
/schp/modules/misc.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.distributed as dist
 4 | 
 5 | class GlobalAvgPool2d(nn.Module):
 6 |     def __init__(self):
 7 |         """Global average pooling over the input's spatial dimensions"""
 8 |         super(GlobalAvgPool2d, self).__init__()
 9 | 
10 |     def forward(self, inputs):
11 |         in_size = inputs.size()
12 |         return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
13 | 
14 | class SingleGPU(nn.Module):
15 |     def __init__(self, module):
16 |         super(SingleGPU, self).__init__()
17 |         self.module=module
18 | 
19 |     def forward(self, input):
20 |         return self.module(input.cuda(non_blocking=True))
21 | 
22 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Comfy registry
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "pyproject.toml"
 9 | 
10 | permissions:
11 |   issues: write
12 | 
13 | jobs:
14 |   publish-node:
15 |     name: Publish Custom Node to registry
16 |     runs-on: ubuntu-latest
17 |     if: ${{ github.repository_owner == 'cozymantis' }}
18 |     steps:
19 |       - name: Check out code
20 |         uses: actions/checkout@v4
21 |       - name: Publish Custom Node
22 |         uses: Comfy-Org/publish-node-action@v1
23 |         with:
24 |           ## Add your own personal access token to your Github Repository secrets and reference it here.
25 |           personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}


--------------------------------------------------------------------------------
/schp/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Peike Li
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/schp/utils/consistency_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   kl_loss.py
 8 | @Time    :   7/23/19 4:02 PM
 9 | @Desc    :   
10 | @License :   This source code is licensed under the license found in the 
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | import torch
14 | import torch.nn.functional as F
15 | from torch import nn
16 | from datasets.target_generation import generate_edge_tensor
17 | 
18 | 
19 | class ConsistencyLoss(nn.Module):
20 |     def __init__(self, ignore_index=255):
21 |         super(ConsistencyLoss, self).__init__()
22 |         self.ignore_index=ignore_index
23 | 
24 |     def forward(self, parsing, edge, label):
25 |         parsing_pre = torch.argmax(parsing, dim=1)
26 |         parsing_pre[label==self.ignore_index]=self.ignore_index
27 |         generated_edge = generate_edge_tensor(parsing_pre)
28 |         edge_pre = torch.argmax(edge, dim=1)
29 |         v_generate_edge = generated_edge[label!=255]
30 |         v_edge_pre = edge_pre[label!=255]
31 |         v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor)
32 |         positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count
33 |         return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0))
34 | 


--------------------------------------------------------------------------------
/schp/datasets/target_generation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | def generate_edge_tensor(label, edge_width=3):
 6 |     label = label.type(torch.cuda.FloatTensor)
 7 |     if len(label.shape) == 2:
 8 |         label = label.unsqueeze(0)
 9 |     n, h, w = label.shape
10 |     edge = torch.zeros(label.shape, dtype=torch.float).cuda()
11 |     # right
12 |     edge_right = edge[:, 1:h, :]
13 |     edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255)
14 |                & (label[:, :h - 1, :] != 255)] = 1
15 | 
16 |     # up
17 |     edge_up = edge[:, :, :w - 1]
18 |     edge_up[(label[:, :, :w - 1] != label[:, :, 1:w])
19 |             & (label[:, :, :w - 1] != 255)
20 |             & (label[:, :, 1:w] != 255)] = 1
21 | 
22 |     # upright
23 |     edge_upright = edge[:, :h - 1, :w - 1]
24 |     edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w])
25 |                  & (label[:, :h - 1, :w - 1] != 255)
26 |                  & (label[:, 1:h, 1:w] != 255)] = 1
27 | 
28 |     # bottomright
29 |     edge_bottomright = edge[:, :h - 1, 1:w]
30 |     edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1])
31 |                      & (label[:, :h - 1, 1:w] != 255)
32 |                      & (label[:, 1:h, :w - 1] != 255)] = 1
33 | 
34 |     kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda()
35 |     with torch.no_grad():
36 |         edge = edge.unsqueeze(1)
37 |         edge = F.conv2d(edge, kernel, stride=1, padding=1)
38 |     edge[edge!=0] = 1
39 |     edge = edge.squeeze()
40 |     return edge
41 | 


--------------------------------------------------------------------------------
/schp/modules/dense.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from .bn import ABN
 7 | 
 8 | 
 9 | class DenseModule(nn.Module):
10 |     def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
11 |         super(DenseModule, self).__init__()
12 |         self.in_channels = in_channels
13 |         self.growth = growth
14 |         self.layers = layers
15 | 
16 |         self.convs1 = nn.ModuleList()
17 |         self.convs3 = nn.ModuleList()
18 |         for i in range(self.layers):
19 |             self.convs1.append(nn.Sequential(OrderedDict([
20 |                 ("bn", norm_act(in_channels)),
21 |                 ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
22 |             ])))
23 |             self.convs3.append(nn.Sequential(OrderedDict([
24 |                 ("bn", norm_act(self.growth * bottleneck_factor)),
25 |                 ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
26 |                                    dilation=dilation))
27 |             ])))
28 |             in_channels += self.growth
29 | 
30 |     @property
31 |     def out_channels(self):
32 |         return self.in_channels + self.growth * self.layers
33 | 
34 |     def forward(self, x):
35 |         inputs = [x]
36 |         for i in range(self.layers):
37 |             x = torch.cat(inputs, dim=1)
38 |             x = self.convs1[i](x)
39 |             x = self.convs3[i](x)
40 |             inputs += [x]
41 | 
42 |         return torch.cat(inputs, dim=1)
43 | 


--------------------------------------------------------------------------------
/schp/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: schp
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - _libgcc_mutex=0.1=main
 7 |   - blas=1.0=mkl
 8 |   - ca-certificates=2020.12.8=h06a4308_0
 9 |   - certifi=2020.12.5=py38h06a4308_0
10 |   - cudatoolkit=10.1.243=h6bb024c_0
11 |   - freetype=2.10.4=h5ab3b9f_0
12 |   - intel-openmp=2020.2=254
13 |   - jpeg=9b=h024ee3a_2
14 |   - lcms2=2.11=h396b838_0
15 |   - ld_impl_linux-64=2.33.1=h53a641e_7
16 |   - libedit=3.1.20191231=h14c3975_1
17 |   - libffi=3.3=he6710b0_2
18 |   - libgcc-ng=9.1.0=hdf63c60_0
19 |   - libpng=1.6.37=hbc83047_0
20 |   - libstdcxx-ng=9.1.0=hdf63c60_0
21 |   - libtiff=4.1.0=h2733197_1
22 |   - lz4-c=1.9.2=heb0550a_3
23 |   - mkl=2020.2=256
24 |   - mkl-service=2.3.0=py38he904b0f_0
25 |   - mkl_fft=1.2.0=py38h23d657b_0
26 |   - mkl_random=1.1.1=py38h0573a6f_0
27 |   - ncurses=6.2=he6710b0_1
28 |   - ninja=1.10.2=py38hff7bd54_0
29 |   - numpy=1.19.2=py38h54aff64_0
30 |   - numpy-base=1.19.2=py38hfa32c7d_0
31 |   - olefile=0.46=py_0
32 |   - openssl=1.1.1i=h27cfd23_0
33 |   - pillow=8.0.1=py38he98fc37_0
34 |   - pip=20.3.3=py38h06a4308_0
35 |   - python=3.8.5=h7579374_1
36 |   - readline=8.0=h7b6447c_0
37 |   - setuptools=51.0.0=py38h06a4308_2
38 |   - six=1.15.0=py38h06a4308_0
39 |   - sqlite=3.33.0=h62c20be_0
40 |   - tk=8.6.10=hbc83047_0
41 |   - tqdm=4.55.0=pyhd3eb1b0_0
42 |   - wheel=0.36.2=pyhd3eb1b0_0
43 |   - xz=5.2.5=h7b6447c_0
44 |   - zlib=1.2.11=h7b6447c_3
45 |   - zstd=1.4.5=h9ceee32_0
46 |   - pytorch=1.5.1=py3.8_cuda10.1.243_cudnn7.6.3_0
47 |   - torchvision=0.6.1=py38_cu101
48 | prefix: /home/peike/opt/anaconda3/envs/schp
49 | 
50 | 


--------------------------------------------------------------------------------
/schp/modules/src/utils/common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | /*
 6 |  * Functions to share code between CPU and GPU
 7 |  */
 8 | 
 9 | #ifdef __CUDACC__
10 | // CUDA versions
11 | 
12 | #define HOST_DEVICE __host__ __device__
13 | #define INLINE_HOST_DEVICE __host__ __device__ inline
14 | #define FLOOR(x) floor(x)
15 | 
16 | #if __CUDA_ARCH__ >= 600
17 | // Recent compute capabilities have block-level atomicAdd for all data types, so we use that
18 | #define ACCUM(x,y) atomicAdd_block(&(x),(y))
19 | #else
20 | // Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
21 | // and use the known atomicCAS-based implementation for double
22 | template<typename data_t>
23 | __device__ inline data_t atomic_add(data_t *address, data_t val) {
24 |   return atomicAdd(address, val);
25 | }
26 | 
27 | template<>
28 | __device__ inline double atomic_add(double *address, double val) {
29 |   unsigned long long int* address_as_ull = (unsigned long long int*)address;
30 |   unsigned long long int old = *address_as_ull, assumed;
31 |   do {
32 |     assumed = old;
33 |     old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
34 |   } while (assumed != old);
35 |   return __longlong_as_double(old);
36 | }
37 | 
38 | #define ACCUM(x,y) atomic_add(&(x),(y))
39 | #endif // #if __CUDA_ARCH__ >= 600
40 | 
41 | #else
42 | // CPU versions
43 | 
44 | #define HOST_DEVICE
45 | #define INLINE_HOST_DEVICE inline
46 | #define FLOOR(x) std::floor(x)
47 | #define ACCUM(x,y) (x) += (y)
48 | 
49 | #endif // #ifdef __CUDACC__


--------------------------------------------------------------------------------
/schp/utils/kl_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   kl_loss.py
 8 | @Time    :   7/23/19 4:02 PM
 9 | @Desc    :   
10 | @License :   This source code is licensed under the license found in the 
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | import torch
14 | import torch.nn.functional as F
15 | from torch import nn
16 | 
17 | 
18 | def flatten_probas(input, target, labels, ignore=255):
19 |     """
20 |     Flattens predictions in the batch.
21 |     """
22 |     B, C, H, W = input.size()
23 |     input = input.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
24 |     target = target.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
25 |     labels = labels.view(-1)
26 |     if ignore is None:
27 |         return input, target
28 |     valid = (labels != ignore)
29 |     vinput = input[valid.nonzero().squeeze()]
30 |     vtarget = target[valid.nonzero().squeeze()]
31 |     return vinput, vtarget
32 | 
33 | 
34 | class KLDivergenceLoss(nn.Module):
35 |     def __init__(self, ignore_index=255, T=1):
36 |         super(KLDivergenceLoss, self).__init__()
37 |         self.ignore_index=ignore_index
38 |         self.T = T
39 | 
40 |     def forward(self, input, target, label):
41 |         log_input_prob = F.log_softmax(input / self.T, dim=1)
42 |         target_porb = F.softmax(target / self.T, dim=1)
43 |         loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index))
44 |         return self.T*self.T*loss # balanced
45 | 


--------------------------------------------------------------------------------
/schp/networks/context_encoding/psp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   psp.py
 8 | @Time    :   8/4/19 3:36 PM
 9 | @Desc    :   
10 | @License :   This source code is licensed under the license found in the 
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | 
14 | import torch
15 | import torch.nn as nn
16 | from torch.nn import functional as F
17 | 
18 | from modules import InPlaceABNSync
19 | 
20 | 
21 | class PSPModule(nn.Module):
22 |     """
23 |     Reference:
24 |         Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
25 |     """
26 |     def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
27 |         super(PSPModule, self).__init__()
28 | 
29 |         self.stages = []
30 |         self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
31 |         self.bottleneck = nn.Sequential(
32 |             nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
33 |                       bias=False),
34 |             InPlaceABNSync(out_features),
35 |         )
36 | 
37 |     def _make_stage(self, features, out_features, size):
38 |         prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
39 |         conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
40 |         bn = InPlaceABNSync(out_features)
41 |         return nn.Sequential(prior, conv, bn)
42 | 
43 |     def forward(self, feats):
44 |         h, w = feats.size(2), feats.size(3)
45 |         priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
46 |                   self.stages] + [feats]
47 |         bottle = self.bottleneck(torch.cat(priors, 1))
48 |         return bottle


--------------------------------------------------------------------------------
/schp/modules/src/utils/cuda.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | /*
 4 |  * General settings and functions
 5 |  */
 6 | const int WARP_SIZE = 32;
 7 | const int MAX_BLOCK_SIZE = 1024;
 8 | 
 9 | static int getNumThreads(int nElem) {
10 |   int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
11 |   for (int i = 0; i < 6; ++i) {
12 |     if (nElem <= threadSizes[i]) {
13 |       return threadSizes[i];
14 |     }
15 |   }
16 |   return MAX_BLOCK_SIZE;
17 | }
18 | 
19 | /*
20 |  * Reduction utilities
21 |  */
22 | template <typename T>
23 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
24 |                                            unsigned int mask = 0xffffffff) {
25 | #if CUDART_VERSION >= 9000
26 |   return __shfl_xor_sync(mask, value, laneMask, width);
27 | #else
28 |   return __shfl_xor(value, laneMask, width);
29 | #endif
30 | }
31 | 
32 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
33 | 
34 | template<typename T>
35 | struct Pair {
36 |   T v1, v2;
37 |   __device__ Pair() {}
38 |   __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
39 |   __device__ Pair(T v) : v1(v), v2(v) {}
40 |   __device__ Pair(int v) : v1(v), v2(v) {}
41 |   __device__ Pair &operator+=(const Pair<T> &a) {
42 |     v1 += a.v1;
43 |     v2 += a.v2;
44 |     return *this;
45 |   }
46 | };
47 | 
48 | template<typename T>
49 | static __device__ __forceinline__ T warpSum(T val) {
50 | #if __CUDA_ARCH__ >= 300
51 |   for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
52 |     val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
53 |   }
54 | #else
55 |   __shared__ T values[MAX_BLOCK_SIZE];
56 |   values[threadIdx.x] = val;
57 |   __threadfence_block();
58 |   const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
59 |   for (int i = 1; i < WARP_SIZE; i++) {
60 |     val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
61 |   }
62 | #endif
63 |   return val;
64 | }
65 | 
66 | template<typename T>
67 | static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
68 |   value.v1 = warpSum(value.v1);
69 |   value.v2 = warpSum(value.v2);
70 |   return value;
71 | }


--------------------------------------------------------------------------------
/HumanParserPascalCustomNode.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | from .utils import generate
 6 | ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
 7 | class HumanParserPascalCustomNode:
 8 |   @classmethod
 9 |   def INPUT_TYPES(cls):
10 |     return {
11 |       "required": {
12 |         "image" : ("IMAGE", {}),
13 |         "background": ("BOOLEAN", {"default": False}),
14 |         "head": ("BOOLEAN", {"default": False}),
15 |         "torso": ("BOOLEAN", {"default": False}),
16 |         "upper_arms": ("BOOLEAN", {"default": False}),
17 |         "lower_arms": ("BOOLEAN", {"default": False}),
18 |         "upper_legs": ("BOOLEAN", {"default": False}),
19 |         "lower_legs": ("BOOLEAN", {"default": False}),
20 |       },
21 |     }
22 | 
23 |   RETURN_TYPES = ("MASK", "IMAGE")
24 |   RETURN_NAMES = ("mask", "map")
25 |   FUNCTION = "run"
26 |   CATEGORY = "CozyMantis"
27 | 
28 |   def run(self, image, background, head, torso, upper_arms, lower_arms, upper_legs, lower_legs):
29 |     if torch.cuda.is_available():
30 |         device = 'cuda'
31 |     else:
32 |         device = 'cpu'
33 | 
34 |     output_img = generate(image[0], 'pascal', device)
35 | 
36 |     mask_components = []
37 | 
38 |     if background:
39 |       mask_components.append(0)
40 |     if head:
41 |       mask_components.append(1)
42 |     if torso:
43 |       mask_components.append(2)
44 |     if upper_arms:
45 |       mask_components.append(3)
46 |     if lower_arms:
47 |       mask_components.append(4)
48 |     if upper_legs:
49 |       mask_components.append(5)
50 |     if lower_legs:
51 |       mask_components.append(6)
52 | 
53 |     mask = np.isin(output_img, mask_components).astype(np.uint8)
54 |     mask_image = Image.fromarray(mask * 255)
55 |     mask_image = mask_image.convert("RGB")
56 |     mask_image = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0)
57 | 
58 |     output_img = output_img.convert('RGB')
59 |     output_img = torch.from_numpy(np.array(output_img).astype(np.float32) / 255.0).unsqueeze(0)
60 |     return (mask_image[:, :, :, 0], output_img,)
61 | 


--------------------------------------------------------------------------------
/schp/datasets/simple_extractor_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   dataset.py
 8 | @Time    :   8/30/19 9:12 PM
 9 | @Desc    :   Dataset Definition
10 | @License :   This source code is licensed under the license found in the
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | 
14 | import os
15 | import cv2
16 | import numpy as np
17 | 
18 | from torch.utils import data
19 | from utils.transforms import get_affine_transform
20 | 
21 | 
22 | class SimpleFolderDataset(data.Dataset):
23 |     def __init__(self, root, input_size=[512, 512], transform=None):
24 |         self.root = root
25 |         self.input_size = input_size
26 |         self.transform = transform
27 |         self.aspect_ratio = input_size[1] * 1.0 / input_size[0]
28 |         self.input_size = np.asarray(input_size)
29 | 
30 |         self.file_list = os.listdir(self.root)
31 | 
32 |     def __len__(self):
33 |         return len(self.file_list)
34 | 
35 |     def _box2cs(self, box):
36 |         x, y, w, h = box[:4]
37 |         return self._xywh2cs(x, y, w, h)
38 | 
39 |     def _xywh2cs(self, x, y, w, h):
40 |         center = np.zeros((2), dtype=np.float32)
41 |         center[0] = x + w * 0.5
42 |         center[1] = y + h * 0.5
43 |         if w > self.aspect_ratio * h:
44 |             h = w * 1.0 / self.aspect_ratio
45 |         elif w < self.aspect_ratio * h:
46 |             w = h * self.aspect_ratio
47 |         scale = np.array([w, h], dtype=np.float32)
48 |         return center, scale
49 | 
50 |     def __getitem__(self, index):
51 |         img_name = self.file_list[index]
52 |         img_path = os.path.join(self.root, img_name)
53 |         img = cv2.imread(img_path, cv2.IMREAD_COLOR)
54 |         h, w, _ = img.shape
55 | 
56 |         # Get person center and scale
57 |         person_center, s = self._box2cs([0, 0, w - 1, h - 1])
58 |         r = 0
59 |         trans = get_affine_transform(person_center, s, r, self.input_size)
60 |         input = cv2.warpAffine(
61 |             img,
62 |             trans,
63 |             (int(self.input_size[1]), int(self.input_size[0])),
64 |             flags=cv2.INTER_LINEAR,
65 |             borderMode=cv2.BORDER_CONSTANT,
66 |             borderValue=(0, 0, 0))
67 | 
68 |         input = self.transform(input)
69 |         meta = {
70 |             'name': img_name,
71 |             'center': person_center,
72 |             'height': h,
73 |             'width': w,
74 |             'scale': s,
75 |             'rotation': r
76 |         }
77 | 
78 |         return input, meta
79 | 


--------------------------------------------------------------------------------
/schp/utils/schp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   schp.py
 8 | @Time    :   4/8/19 2:11 PM
 9 | @Desc    :   
10 | @License :   This source code is licensed under the license found in the 
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | 
14 | import os
15 | import torch
16 | import modules
17 | 
18 | def moving_average(net1, net2, alpha=1):
19 |     for param1, param2 in zip(net1.parameters(), net2.parameters()):
20 |         param1.data *= (1.0 - alpha)
21 |         param1.data += param2.data * alpha
22 | 
23 | 
24 | def _check_bn(module, flag):
25 |     if issubclass(module.__class__, modules.bn.InPlaceABNSync):
26 |         flag[0] = True
27 | 
28 | 
29 | def check_bn(model):
30 |     flag = [False]
31 |     model.apply(lambda module: _check_bn(module, flag))
32 |     return flag[0]
33 | 
34 | 
35 | def reset_bn(module):
36 |     if issubclass(module.__class__, modules.bn.InPlaceABNSync):
37 |         module.running_mean = torch.zeros_like(module.running_mean)
38 |         module.running_var = torch.ones_like(module.running_var)
39 | 
40 | 
41 | def _get_momenta(module, momenta):
42 |     if issubclass(module.__class__, modules.bn.InPlaceABNSync):
43 |         momenta[module] = module.momentum
44 | 
45 | 
46 | def _set_momenta(module, momenta):
47 |     if issubclass(module.__class__, modules.bn.InPlaceABNSync):
48 |         module.momentum = momenta[module]
49 | 
50 | 
51 | def bn_re_estimate(loader, model):
52 |     if not check_bn(model):
53 |         print('No batch norm layer detected')
54 |         return
55 |     model.train()
56 |     momenta = {}
57 |     model.apply(reset_bn)
58 |     model.apply(lambda module: _get_momenta(module, momenta))
59 |     n = 0
60 |     for i_iter, batch in enumerate(loader):
61 |         images, labels, _ = batch
62 |         b = images.data.size(0)
63 |         momentum = b / (n + b)
64 |         for module in momenta.keys():
65 |             module.momentum = momentum
66 |         model(images)
67 |         n += b
68 |     model.apply(lambda module: _set_momenta(module, momenta))
69 | 
70 | 
71 | def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'):
72 |     save_path = os.path.join(output_dir, filename)
73 |     if os.path.exists(save_path):
74 |         os.remove(save_path)
75 |     torch.save(states, save_path)
76 |     if is_best_parsing and 'state_dict' in states:
77 |         best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar')
78 |         if os.path.exists(best_save_path):
79 |             os.remove(best_save_path)
80 |         torch.save(states, best_save_path)
81 | 


--------------------------------------------------------------------------------
/schp/networks/context_encoding/aspp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   aspp.py
 8 | @Time    :   8/4/19 3:36 PM
 9 | @Desc    :   
10 | @License :   This source code is licensed under the license found in the 
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | 
14 | import torch
15 | import torch.nn as nn
16 | from torch.nn import functional as F
17 | 
18 | from modules import InPlaceABNSync
19 | 
20 | 
21 | class ASPPModule(nn.Module):
22 |     """
23 |     Reference:
24 |         Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
25 |     """
26 |     def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)):
27 |         super(ASPPModule, self).__init__()
28 | 
29 |         self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
30 |                                    nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
31 |                                              bias=False),
32 |                                    InPlaceABNSync(inner_features))
33 |         self.conv2 = nn.Sequential(
34 |             nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
35 |             InPlaceABNSync(inner_features))
36 |         self.conv3 = nn.Sequential(
37 |             nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
38 |             InPlaceABNSync(inner_features))
39 |         self.conv4 = nn.Sequential(
40 |             nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
41 |             InPlaceABNSync(inner_features))
42 |         self.conv5 = nn.Sequential(
43 |             nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
44 |             InPlaceABNSync(inner_features))
45 | 
46 |         self.bottleneck = nn.Sequential(
47 |             nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
48 |             InPlaceABNSync(out_features),
49 |             nn.Dropout2d(0.1)
50 |         )
51 | 
52 |     def forward(self, x):
53 |         _, _, h, w = x.size()
54 | 
55 |         feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
56 | 
57 |         feat2 = self.conv2(x)
58 |         feat3 = self.conv3(x)
59 |         feat4 = self.conv4(x)
60 |         feat5 = self.conv5(x)
61 |         out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
62 | 
63 |         bottle = self.bottleneck(out)
64 |         return bottle


--------------------------------------------------------------------------------
/HumanParserATRCustomNode.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | from .utils import generate
 6 | 
 7 | class HumanParserATRCustomNode:
 8 |   @classmethod
 9 |   def INPUT_TYPES(cls):
10 |     return {
11 |       "required": {
12 |         "image" : ("IMAGE", {}),
13 |         "background": ("BOOLEAN", {"default": False}),
14 |         "hat": ("BOOLEAN", {"default": False}),
15 |         "hair": ("BOOLEAN", {"default": False}),
16 |         "sunglasses": ("BOOLEAN", {"default": False}),
17 |         "upper_clothes": ("BOOLEAN", {"default": False}),
18 |         "skirt": ("BOOLEAN", {"default": False}),
19 |         "pants": ("BOOLEAN", {"default": False}),
20 |         "dress": ("BOOLEAN", {"default": False}),
21 |         "belt": ("BOOLEAN", {"default": False}),
22 |         "left_shoe": ("BOOLEAN", {"default": False}),
23 |         "right_shoe": ("BOOLEAN", {"default": False}),
24 |         "face": ("BOOLEAN", {"default": False}),
25 |         "left_leg": ("BOOLEAN", {"default": False}),
26 |         "right_leg": ("BOOLEAN", {"default": False}),
27 |         "left_arm": ("BOOLEAN", {"default": False}),
28 |         "right_arm": ("BOOLEAN", {"default": False}),
29 |         "bag": ("BOOLEAN", {"default": False}),
30 |         "scarf": ("BOOLEAN", {"default": False}),
31 |       },
32 |     }
33 | 
34 |   RETURN_TYPES = ("MASK", "IMAGE")
35 |   RETURN_NAMES = ("mask", "map")
36 |   FUNCTION = "run"
37 |   CATEGORY = "CozyMantis"
38 | 
39 |   def run(self, image, background, hat, hair, sunglasses, upper_clothes, skirt, pants, dress, belt, left_shoe, right_shoe, face, left_leg, right_leg, left_arm, right_arm, bag, scarf):
40 |     if torch.cuda.is_available():
41 |         device = 'cuda'
42 |     else:
43 |         device = 'cpu'
44 | 
45 |     output_img = generate(image[0], 'atr', device)
46 | 
47 |     mask_components = []
48 | 
49 |     if background:
50 |       mask_components.append(0)
51 |     if hat:
52 |       mask_components.append(1)
53 |     if hair:
54 |       mask_components.append(2)
55 |     if sunglasses:
56 |       mask_components.append(3)
57 |     if upper_clothes:
58 |       mask_components.append(4)
59 |     if skirt:
60 |       mask_components.append(5)
61 |     if pants:
62 |       mask_components.append(6)
63 |     if dress:
64 |       mask_components.append(7)
65 |     if belt:
66 |       mask_components.append(8)
67 |     if left_shoe:
68 |       mask_components.append(9)
69 |     if right_shoe:
70 |       mask_components.append(10)
71 |     if face:
72 |       mask_components.append(11)
73 |     if left_leg:
74 |       mask_components.append(12)
75 |     if right_leg:
76 |       mask_components.append(13)
77 |     if left_arm:
78 |       mask_components.append(14)
79 |     if right_arm:
80 |       mask_components.append(15)
81 |     if bag:
82 |       mask_components.append(16)
83 |     if scarf:
84 |       mask_components.append(17)
85 | 
86 |     mask = np.isin(output_img, mask_components).astype(np.uint8)
87 |     mask_image = Image.fromarray(mask * 255)
88 |     mask_image = mask_image.convert("RGB")
89 |     mask_image = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0)
90 | 
91 |     output_img = output_img.convert('RGB')
92 |     output_img = torch.from_numpy(np.array(output_img).astype(np.float32) / 255.0).unsqueeze(0)
93 |     return (mask_image[:, :, :, 0], output_img,)
94 | 


--------------------------------------------------------------------------------
/schp/utils/warmup_scheduler.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """
 5 | @Author  :   Peike Li
 6 | @Contact :   peike.li@yahoo.com
 7 | @File    :   warmup_scheduler.py
 8 | @Time    :   3/28/19 2:24 PM
 9 | @Desc    :   
10 | @License :   This source code is licensed under the license found in the 
11 |              LICENSE file in the root directory of this source tree.
12 | """
13 | 
14 | import math
15 | from torch.optim.lr_scheduler import _LRScheduler
16 | 
17 | 
18 | class GradualWarmupScheduler(_LRScheduler):
19 |     """ Gradually warm-up learning rate with cosine annealing in optimizer.
20 |     Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
21 |     """
22 | 
23 |     def __init__(self, optimizer, total_epoch, eta_min=0, warmup_epoch=10, last_epoch=-1):
24 |         self.total_epoch = total_epoch
25 |         self.eta_min = eta_min
26 |         self.warmup_epoch = warmup_epoch
27 |         super(GradualWarmupScheduler, self).__init__(optimizer, last_epoch)
28 | 
29 |     def get_lr(self):
30 |         if self.last_epoch <= self.warmup_epoch:
31 |             return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
32 |         else:
33 |             return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.total_epoch-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
34 | 
35 | 
36 | class SGDRScheduler(_LRScheduler):
37 |     """ Consine annealing with warm up and restarts.
38 |     Proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`.
39 |     """
40 |     def __init__(self, optimizer, total_epoch=150, start_cyclical=100, cyclical_base_lr=7e-4, cyclical_epoch=10, eta_min=0, warmup_epoch=10, last_epoch=-1):
41 |         self.total_epoch = total_epoch
42 |         self.start_cyclical = start_cyclical
43 |         self.cyclical_epoch = cyclical_epoch
44 |         self.cyclical_base_lr = cyclical_base_lr
45 |         self.eta_min = eta_min
46 |         self.warmup_epoch = warmup_epoch
47 |         super(SGDRScheduler, self).__init__(optimizer, last_epoch)
48 | 
49 |     def get_lr(self):
50 |         if self.last_epoch < self.warmup_epoch:
51 |             return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
52 |         elif self.last_epoch < self.start_cyclical:
53 |             return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.start_cyclical-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
54 |         else:
55 |             return [self.eta_min + (self.cyclical_base_lr-self.eta_min)*(1+math.cos(math.pi* ((self.last_epoch-self.start_cyclical)% self.cyclical_epoch)/self.cyclical_epoch)) / 2 for base_lr in self.base_lrs]
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     import matplotlib.pyplot as plt
60 |     import torch
61 |     model = torch.nn.Linear(10, 2)
62 |     optimizer = torch.optim.SGD(params=model.parameters(), lr=7e-3, momentum=0.9, weight_decay=5e-4)
63 |     scheduler_warmup = SGDRScheduler(optimizer, total_epoch=150, eta_min=7e-5, warmup_epoch=10, start_cyclical=100, cyclical_base_lr=3.5e-3, cyclical_epoch=10)
64 |     lr = []
65 |     for epoch in range(0,150):
66 |         scheduler_warmup.step(epoch)
67 |         lr.append(scheduler_warmup.get_lr())
68 |     plt.style.use('ggplot')
69 |     plt.plot(list(range(0,150)), lr)
70 |     plt.show()
71 | 
72 | 


--------------------------------------------------------------------------------
/HumanParserLIPCustomNode.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from PIL import Image
  4 | 
  5 | from .utils import generate
  6 | 
  7 | class HumanParserLIPCustomNode:
  8 |   @classmethod
  9 |   def INPUT_TYPES(cls):
 10 |     return {
 11 |       "required": {
 12 |         "image" : ("IMAGE", {}),
 13 |         "background": ("BOOLEAN", {"default": False}),
 14 |         "hat": ("BOOLEAN", {"default": False}),
 15 |         "hair": ("BOOLEAN", {"default": False}),
 16 |         "glove": ("BOOLEAN", {"default": False}),
 17 |         "sunglasses": ("BOOLEAN", {"default": False}),
 18 |         "upper_clothes": ("BOOLEAN", {"default": False}),
 19 |         "dress": ("BOOLEAN", {"default": False}),
 20 |         "coat": ("BOOLEAN", {"default": False}),
 21 |         "socks": ("BOOLEAN", {"default": False}),
 22 |         "pants": ("BOOLEAN", {"default": False}),
 23 |         "jumpsuits": ("BOOLEAN", {"default": False}),
 24 |         "scarf": ("BOOLEAN", {"default": False}),
 25 |         "skirt": ("BOOLEAN", {"default": False}),
 26 |         "face": ("BOOLEAN", {"default": False}),
 27 |         "left_arm": ("BOOLEAN", {"default": False}),
 28 |         "right_arm": ("BOOLEAN", {"default": False}),
 29 |         "left_leg": ("BOOLEAN", {"default": False}),
 30 |         "right_leg": ("BOOLEAN", {"default": False}),
 31 |         "left_shoe": ("BOOLEAN", {"default": False}),
 32 |         "right_shoe": ("BOOLEAN", {"default": False}),
 33 |       },
 34 |     }
 35 | 
 36 |   RETURN_TYPES = ("MASK", "IMAGE")
 37 |   RETURN_NAMES = ("mask", "map")
 38 |   FUNCTION = "run"
 39 |   CATEGORY = "CozyMantis"
 40 | 
 41 |   def run(self, image, background, hat, hair, glove, sunglasses, upper_clothes, dress, coat, socks, pants, jumpsuits, scarf, skirt, face, left_arm, right_arm, left_leg, right_leg, left_shoe, right_shoe):
 42 |     if torch.cuda.is_available():
 43 |         device = 'cuda'
 44 |     else:
 45 |         device = 'cpu'
 46 | 
 47 |     output_img = generate(image[0], 'lip', device)
 48 | 
 49 |     mask_components = []
 50 | 
 51 |     if background:
 52 |       mask_components.append(0)
 53 |     if hat:
 54 |       mask_components.append(1)
 55 |     if hair:
 56 |       mask_components.append(2)
 57 |     if glove:
 58 |       mask_components.append(3)
 59 |     if sunglasses:
 60 |       mask_components.append(4)
 61 |     if upper_clothes:
 62 |       mask_components.append(5)
 63 |     if dress:
 64 |       mask_components.append(6)
 65 |     if coat:
 66 |       mask_components.append(7)
 67 |     if socks:
 68 |       mask_components.append(8)
 69 |     if pants:
 70 |       mask_components.append(9)
 71 |     if jumpsuits:
 72 |       mask_components.append(10)
 73 |     if scarf:
 74 |       mask_components.append(11)
 75 |     if skirt:
 76 |       mask_components.append(12)
 77 |     if face:
 78 |       mask_components.append(13)
 79 |     if left_arm:
 80 |       mask_components.append(14)
 81 |     if right_arm:
 82 |       mask_components.append(15)
 83 |     if left_leg:
 84 |       mask_components.append(16)
 85 |     if right_leg:
 86 |       mask_components.append(17)
 87 |     if left_shoe:
 88 |       mask_components.append(18)
 89 |     if right_shoe:
 90 |       mask_components.append(19)
 91 | 
 92 |     mask = np.isin(output_img, mask_components).astype(np.uint8)
 93 |     mask_image = Image.fromarray(mask * 255)
 94 |     mask_image = mask_image.convert("RGB")
 95 |     mask_image = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0)
 96 | 
 97 |     output_img = output_img.convert('RGB')
 98 |     output_img = torch.from_numpy(np.array(output_img).astype(np.float32) / 255.0).unsqueeze(0)
 99 |     return (mask_image[:, :, :, 0], output_img,)
100 | 


--------------------------------------------------------------------------------
/schp/modules/src/inplace_abn.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | #include <vector>
 6 | 
 7 | std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
 8 | std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
 9 | std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
10 | 
11 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
12 |                        bool affine, float eps);
13 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
14 |                         bool affine, float eps);
15 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
16 |                           bool affine, float eps);
17 | 
18 | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
19 |                                      bool affine, float eps);
20 | std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
21 |                                       bool affine, float eps);
22 | std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
23 |                                         bool affine, float eps);
24 | 
25 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
26 |                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
27 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
28 |                                       at::Tensor edz, at::Tensor eydz, bool affine, float eps);
29 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
30 |                                         at::Tensor edz, at::Tensor eydz, bool affine, float eps);
31 | 
32 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
33 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
34 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
35 | 
36 | void elu_backward_cpu(at::Tensor z, at::Tensor dz);
37 | void elu_backward_cuda(at::Tensor z, at::Tensor dz);
38 | 
39 | static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
40 |   num = x.size(0);
41 |   chn = x.size(1);
42 |   sp = 1;
43 |   for (int64_t i = 2; i < x.ndimension(); ++i)
44 |     sp *= x.size(i);
45 | }
46 | 
47 | /*
48 |  * Specialized CUDA reduction functions for BN
49 |  */
50 | #ifdef __CUDACC__
51 | 
52 | #include "utils/cuda.cuh"
53 | 
54 | template <typename T, typename Op>
55 | __device__ T reduce(Op op, int plane, int N, int S) {
56 |   T sum = (T)0;
57 |   for (int batch = 0; batch < N; ++batch) {
58 |     for (int x = threadIdx.x; x < S; x += blockDim.x) {
59 |       sum += op(batch, plane, x);
60 |     }
61 |   }
62 | 
63 |   // sum over NumThreads within a warp
64 |   sum = warpSum(sum);
65 | 
66 |   // 'transpose', and reduce within warp again
67 |   __shared__ T shared[32];
68 |   __syncthreads();
69 |   if (threadIdx.x % WARP_SIZE == 0) {
70 |     shared[threadIdx.x / WARP_SIZE] = sum;
71 |   }
72 |   if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
73 |     // zero out the other entries in shared
74 |     shared[threadIdx.x] = (T)0;
75 |   }
76 |   __syncthreads();
77 |   if (threadIdx.x / WARP_SIZE == 0) {
78 |     sum = warpSum(shared[threadIdx.x]);
79 |     if (threadIdx.x == 0) {
80 |       shared[0] = sum;
81 |     }
82 |   }
83 |   __syncthreads();
84 | 
85 |   // Everyone picks it up, should be broadcast into the whole gradInput
86 |   return shared[0];
87 | }
88 | #endif
89 | 


--------------------------------------------------------------------------------
/schp/utils/soft_dice_loss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   soft_dice_loss.py
  8 | @Time    :   8/13/19 5:09 PM
  9 | @Desc    :   
 10 | @License :   This source code is licensed under the license found in the 
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | from __future__ import print_function, division
 15 | 
 16 | import torch
 17 | import torch.nn.functional as F
 18 | from torch import nn
 19 | 
 20 | try:
 21 |     from itertools import ifilterfalse
 22 | except ImportError:  # py3k
 23 |     from itertools import filterfalse as ifilterfalse
 24 | 
 25 | 
 26 | def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6):
 27 |     '''
 28 |     Tversky loss function.
 29 |         probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
 30 |         labels: [P] Tensor, ground truth labels (between 0 and C - 1)
 31 | 
 32 |     Same as soft dice loss when alpha=beta=0.5.
 33 |     Same as Jaccord loss when alpha=beta=1.0.
 34 |     See `Tversky loss function for image segmentation using 3D fully convolutional deep networks`
 35 |     https://arxiv.org/pdf/1706.05721.pdf
 36 |     '''
 37 |     C = probas.size(1)
 38 |     losses = []
 39 |     for c in list(range(C)):
 40 |         fg = (labels == c).float()
 41 |         if fg.sum() == 0:
 42 |             continue
 43 |         class_pred = probas[:, c]
 44 |         p0 = class_pred
 45 |         p1 = 1 - class_pred
 46 |         g0 = fg
 47 |         g1 = 1 - fg
 48 |         numerator = torch.sum(p0 * g0)
 49 |         denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0)
 50 |         losses.append(1 - ((numerator) / (denominator + epsilon)))
 51 |     return mean(losses)
 52 | 
 53 | 
 54 | def flatten_probas(probas, labels, ignore=255):
 55 |     """
 56 |     Flattens predictions in the batch
 57 |     """
 58 |     B, C, H, W = probas.size()
 59 |     probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
 60 |     labels = labels.view(-1)
 61 |     if ignore is None:
 62 |         return probas, labels
 63 |     valid = (labels != ignore)
 64 |     vprobas = probas[valid.nonzero().squeeze()]
 65 |     vlabels = labels[valid]
 66 |     return vprobas, vlabels
 67 | 
 68 | 
 69 | def isnan(x):
 70 |     return x != x
 71 | 
 72 | 
 73 | def mean(l, ignore_nan=False, empty=0):
 74 |     """
 75 |     nanmean compatible with generators.
 76 |     """
 77 |     l = iter(l)
 78 |     if ignore_nan:
 79 |         l = ifilterfalse(isnan, l)
 80 |     try:
 81 |         n = 1
 82 |         acc = next(l)
 83 |     except StopIteration:
 84 |         if empty == 'raise':
 85 |             raise ValueError('Empty mean')
 86 |         return empty
 87 |     for n, v in enumerate(l, 2):
 88 |         acc += v
 89 |     if n == 1:
 90 |         return acc
 91 |     return acc / n
 92 | 
 93 | 
 94 | class SoftDiceLoss(nn.Module):
 95 |     def __init__(self, ignore_index=255):
 96 |         super(SoftDiceLoss, self).__init__()
 97 |         self.ignore_index = ignore_index
 98 | 
 99 |     def forward(self, pred, label):
100 |         pred = F.softmax(pred, dim=1)
101 |         return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5)
102 | 
103 | 
104 | class SoftJaccordLoss(nn.Module):
105 |     def __init__(self, ignore_index=255):
106 |         super(SoftJaccordLoss, self).__init__()
107 |         self.ignore_index = ignore_index
108 | 
109 |     def forward(self, pred, label):
110 |         pred = F.softmax(pred, dim=1)
111 |         return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0)
112 | 


--------------------------------------------------------------------------------
/schp/modules/deeplab.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as functional
 4 | 
 5 | from models._util import try_index
 6 | from .bn import ABN
 7 | 
 8 | 
 9 | class DeeplabV3(nn.Module):
10 |     def __init__(self,
11 |                  in_channels,
12 |                  out_channels,
13 |                  hidden_channels=256,
14 |                  dilations=(12, 24, 36),
15 |                  norm_act=ABN,
16 |                  pooling_size=None):
17 |         super(DeeplabV3, self).__init__()
18 |         self.pooling_size = pooling_size
19 | 
20 |         self.map_convs = nn.ModuleList([
21 |             nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
22 |             nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
23 |             nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
24 |             nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
25 |         ])
26 |         self.map_bn = norm_act(hidden_channels * 4)
27 | 
28 |         self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
29 |         self.global_pooling_bn = norm_act(hidden_channels)
30 | 
31 |         self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
32 |         self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
33 |         self.red_bn = norm_act(out_channels)
34 | 
35 |         self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
36 | 
37 |     def reset_parameters(self, activation, slope):
38 |         gain = nn.init.calculate_gain(activation, slope)
39 |         for m in self.modules():
40 |             if isinstance(m, nn.Conv2d):
41 |                 nn.init.xavier_normal_(m.weight.data, gain)
42 |                 if hasattr(m, "bias") and m.bias is not None:
43 |                     nn.init.constant_(m.bias, 0)
44 |             elif isinstance(m, ABN):
45 |                 if hasattr(m, "weight") and m.weight is not None:
46 |                     nn.init.constant_(m.weight, 1)
47 |                 if hasattr(m, "bias") and m.bias is not None:
48 |                     nn.init.constant_(m.bias, 0)
49 | 
50 |     def forward(self, x):
51 |         # Map convolutions
52 |         out = torch.cat([m(x) for m in self.map_convs], dim=1)
53 |         out = self.map_bn(out)
54 |         out = self.red_conv(out)
55 | 
56 |         # Global pooling
57 |         pool = self._global_pooling(x)
58 |         pool = self.global_pooling_conv(pool)
59 |         pool = self.global_pooling_bn(pool)
60 |         pool = self.pool_red_conv(pool)
61 |         if self.training or self.pooling_size is None:
62 |             pool = pool.repeat(1, 1, x.size(2), x.size(3))
63 | 
64 |         out += pool
65 |         out = self.red_bn(out)
66 |         return out
67 | 
68 |     def _global_pooling(self, x):
69 |         if self.training or self.pooling_size is None:
70 |             pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
71 |             pool = pool.view(x.size(0), x.size(1), 1, 1)
72 |         else:
73 |             pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
74 |                             min(try_index(self.pooling_size, 1), x.shape[3]))
75 |             padding = (
76 |                 (pooling_size[1] - 1) // 2,
77 |                 (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
78 |                 (pooling_size[0] - 1) // 2,
79 |                 (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
80 |             )
81 | 
82 |             pool = functional.avg_pool2d(x, pooling_size, stride=1)
83 |             pool = functional.pad(pool, pad=padding, mode="replicate")
84 |         return pool
85 | 


--------------------------------------------------------------------------------
/schp/modules/src/inplace_abn_cpu.cpp:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "utils/checks.h"
  6 | #include "inplace_abn.h"
  7 | 
  8 | at::Tensor reduce_sum(at::Tensor x) {
  9 |   if (x.ndimension() == 2) {
 10 |     return x.sum(0);
 11 |   } else {
 12 |     auto x_view = x.view({x.size(0), x.size(1), -1});
 13 |     return x_view.sum(-1).sum(0);
 14 |   }
 15 | }
 16 | 
 17 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
 18 |   if (x.ndimension() == 2) {
 19 |     return v;
 20 |   } else {
 21 |     std::vector<int64_t> broadcast_size = {1, -1};
 22 |     for (int64_t i = 2; i < x.ndimension(); ++i)
 23 |       broadcast_size.push_back(1);
 24 | 
 25 |     return v.view(broadcast_size);
 26 |   }
 27 | }
 28 | 
 29 | int64_t count(at::Tensor x) {
 30 |   int64_t count = x.size(0);
 31 |   for (int64_t i = 2; i < x.ndimension(); ++i)
 32 |     count *= x.size(i);
 33 | 
 34 |   return count;
 35 | }
 36 | 
 37 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
 38 |   if (affine) {
 39 |     return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
 40 |   } else {
 41 |     return z;
 42 |   }
 43 | }
 44 | 
 45 | std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
 46 |   auto num = count(x);
 47 |   auto mean = reduce_sum(x) / num;
 48 |   auto diff = x - broadcast_to(mean, x);
 49 |   auto var = reduce_sum(diff.pow(2)) / num;
 50 | 
 51 |   return {mean, var};
 52 | }
 53 | 
 54 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 55 |                        bool affine, float eps) {
 56 |   auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
 57 |   auto mul = at::rsqrt(var + eps) * gamma;
 58 | 
 59 |   x.sub_(broadcast_to(mean, x));
 60 |   x.mul_(broadcast_to(mul, x));
 61 |   if (affine) x.add_(broadcast_to(bias, x));
 62 | 
 63 |   return x;
 64 | }
 65 | 
 66 | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 67 |                                      bool affine, float eps) {
 68 |   auto edz = reduce_sum(dz);
 69 |   auto y = invert_affine(z, weight, bias, affine, eps);
 70 |   auto eydz = reduce_sum(y * dz);
 71 | 
 72 |   return {edz, eydz};
 73 | }
 74 | 
 75 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 76 |                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
 77 |   auto y = invert_affine(z, weight, bias, affine, eps);
 78 |   auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
 79 | 
 80 |   auto num = count(z);
 81 |   auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
 82 |   return dx;
 83 | }
 84 | 
 85 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
 86 |   CHECK_CPU_INPUT(z);
 87 |   CHECK_CPU_INPUT(dz);
 88 | 
 89 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
 90 |     int64_t count = z.numel();
 91 |     auto *_z = z.data<scalar_t>();
 92 |     auto *_dz = dz.data<scalar_t>();
 93 | 
 94 |     for (int64_t i = 0; i < count; ++i) {
 95 |       if (_z[i] < 0) {
 96 |         _z[i] *= 1 / slope;
 97 |         _dz[i] *= slope;
 98 |       }
 99 |     }
100 |   }));
101 | }
102 | 
103 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
104 |   CHECK_CPU_INPUT(z);
105 |   CHECK_CPU_INPUT(dz);
106 | 
107 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
108 |     int64_t count = z.numel();
109 |     auto *_z = z.data<scalar_t>();
110 |     auto *_dz = dz.data<scalar_t>();
111 | 
112 |     for (int64_t i = 0; i < count; ++i) {
113 |       if (_z[i] < 0) {
114 |         _z[i] = log1p(_z[i]);
115 |         _dz[i] *= (_z[i] + 1.f);
116 |       }
117 |     }
118 |   }));
119 | }
120 | 


--------------------------------------------------------------------------------
/schp/modules/src/inplace_abn.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "inplace_abn.h"
  6 | 
  7 | std::vector<at::Tensor> mean_var(at::Tensor x)
  8 | {
  9 |   if (x.is_cuda())
 10 |   {
 11 |     if (x.type().scalarType() == at::ScalarType::Half)
 12 |     {
 13 |       return mean_var_cuda_h(x);
 14 |     }
 15 |     else
 16 |     {
 17 |       return mean_var_cuda(x);
 18 |     }
 19 |   }
 20 |   else
 21 |   {
 22 |     return mean_var_cpu(x);
 23 |   }
 24 | }
 25 | 
 26 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 27 |                    bool affine, float eps)
 28 | {
 29 |   if (x.is_cuda())
 30 |   {
 31 |     if (x.type().scalarType() == at::ScalarType::Half)
 32 |     {
 33 |       return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
 34 |     }
 35 |     else
 36 |     {
 37 |       return forward_cuda(x, mean, var, weight, bias, affine, eps);
 38 |     }
 39 |   }
 40 |   else
 41 |   {
 42 |     return forward_cpu(x, mean, var, weight, bias, affine, eps);
 43 |   }
 44 | }
 45 | 
 46 | std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 47 |                                  bool affine, float eps)
 48 | {
 49 |   if (z.is_cuda())
 50 |   {
 51 |     if (z.type().scalarType() == at::ScalarType::Half)
 52 |     {
 53 |       return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
 54 |     }
 55 |     else
 56 |     {
 57 |       return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
 58 |     }
 59 |   }
 60 |   else
 61 |   {
 62 |     return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
 63 |   }
 64 | }
 65 | 
 66 | at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 67 |                     at::Tensor edz, at::Tensor eydz, bool affine, float eps)
 68 | {
 69 |   if (z.is_cuda())
 70 |   {
 71 |     if (z.type().scalarType() == at::ScalarType::Half)
 72 |     {
 73 |       return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
 74 |     }
 75 |     else
 76 |     {
 77 |       return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
 78 |     }
 79 |   }
 80 |   else
 81 |   {
 82 |     return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
 83 |   }
 84 | }
 85 | 
 86 | void leaky_relu_forward(at::Tensor z, float slope)
 87 | {
 88 |   at::leaky_relu_(z, slope);
 89 | }
 90 | 
 91 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope)
 92 | {
 93 |   if (z.is_cuda())
 94 |   {
 95 |     if (z.type().scalarType() == at::ScalarType::Half)
 96 |     {
 97 |       return leaky_relu_backward_cuda_h(z, dz, slope);
 98 |     }
 99 |     else
100 |     {
101 |       return leaky_relu_backward_cuda(z, dz, slope);
102 |     }
103 |   }
104 |   else
105 |   {
106 |     return leaky_relu_backward_cpu(z, dz, slope);
107 |   }
108 | }
109 | 
110 | void elu_forward(at::Tensor z)
111 | {
112 |   at::elu_(z);
113 | }
114 | 
115 | void elu_backward(at::Tensor z, at::Tensor dz)
116 | {
117 |   if (z.is_cuda())
118 |   {
119 |     return elu_backward_cuda(z, dz);
120 |   }
121 |   else
122 |   {
123 |     return elu_backward_cpu(z, dz);
124 |   }
125 | }
126 | 
127 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
128 | {
129 |   m.def("mean_var", &mean_var, "Mean and variance computation");
130 |   m.def("forward", &forward, "In-place forward computation");
131 |   m.def("edz_eydz", &edz_eydz, "First part of backward computation");
132 |   m.def("backward", &backward, "Second part of backward computation");
133 |   m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
134 |   m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
135 |   m.def("elu_forward", &elu_forward, "Elu forward computation");
136 |   m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
137 | }
138 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/


--------------------------------------------------------------------------------
/schp/modules/src/inplace_abn_cpu_only.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "inplace_abn.h"
  6 | 
  7 | std::vector<at::Tensor> mean_var(at::Tensor x)
  8 | {
  9 |   if (x.is_cuda())
 10 |   {
 11 |     if (x.type().scalarType() == at::ScalarType::Half)
 12 |     {
 13 |       return mean_var_cuda_h(x);
 14 |     }
 15 |     else
 16 |     {
 17 |       return mean_var_cuda(x);
 18 |     }
 19 |   }
 20 |   else
 21 |   {
 22 |     return mean_var_cpu(x);
 23 |   }
 24 | }
 25 | 
 26 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 27 |                           bool affine, float eps) {}
 28 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 29 |                         bool affine, float eps) {}
 30 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 31 |                          at::Tensor edz, at::Tensor eydz, bool affine, float eps) {}
 32 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 33 |                            at::Tensor edz, at::Tensor eydz, bool affine, float eps) {}
 34 | std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 35 |                                       bool affine, float eps) {}
 36 | std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 37 |                                         bool affine, float eps) {}
 38 | std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {}
 39 | std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {}
 40 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) {}
 41 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {}
 42 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {}
 43 | 
 44 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 45 |                    bool affine, float eps)
 46 | {
 47 |   if (x.is_cuda())
 48 |   {
 49 |     if (x.type().scalarType() == at::ScalarType::Half)
 50 |     {
 51 |       return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
 52 |     }
 53 |     else
 54 |     {
 55 |       return forward_cuda(x, mean, var, weight, bias, affine, eps);
 56 |     }
 57 |   }
 58 |   else
 59 |   {
 60 |     return forward_cpu(x, mean, var, weight, bias, affine, eps);
 61 |   }
 62 | }
 63 | 
 64 | std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 65 |                                  bool affine, float eps)
 66 | {
 67 |   if (z.is_cuda())
 68 |   {
 69 |     if (z.type().scalarType() == at::ScalarType::Half)
 70 |     {
 71 |       return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
 72 |     }
 73 |     else
 74 |     {
 75 |       return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
 76 |     }
 77 |   }
 78 |   else
 79 |   {
 80 |     return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
 81 |   }
 82 | }
 83 | 
 84 | at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 85 |                     at::Tensor edz, at::Tensor eydz, bool affine, float eps)
 86 | {
 87 |   if (z.is_cuda())
 88 |   {
 89 |     if (z.type().scalarType() == at::ScalarType::Half)
 90 |     {
 91 |       return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
 92 |     }
 93 |     else
 94 |     {
 95 |       return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
 96 |     }
 97 |   }
 98 |   else
 99 |   {
100 |     return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
101 |   }
102 | }
103 | 
104 | void leaky_relu_forward(at::Tensor z, float slope)
105 | {
106 |   at::leaky_relu_(z, slope);
107 | }
108 | 
109 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope)
110 | {
111 |   if (z.is_cuda())
112 |   {
113 |     if (z.type().scalarType() == at::ScalarType::Half)
114 |     {
115 |       return leaky_relu_backward_cuda_h(z, dz, slope);
116 |     }
117 |     else
118 |     {
119 |       return leaky_relu_backward_cuda(z, dz, slope);
120 |     }
121 |   }
122 |   else
123 |   {
124 |     return leaky_relu_backward_cpu(z, dz, slope);
125 |   }
126 | }
127 | 
128 | void elu_forward(at::Tensor z)
129 | {
130 |   at::elu_(z);
131 | }
132 | 
133 | void elu_backward(at::Tensor z, at::Tensor dz)
134 | {
135 |   if (z.is_cuda())
136 |   {
137 |     return elu_backward_cuda(z, dz);
138 |   }
139 |   else
140 |   {
141 |     return elu_backward_cpu(z, dz);
142 |   }
143 | }
144 | 
145 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
146 | {
147 |   m.def("mean_var", &mean_var, "Mean and variance computation");
148 |   m.def("forward", &forward, "In-place forward computation");
149 |   m.def("edz_eydz", &edz_eydz, "First part of backward computation");
150 |   m.def("backward", &backward, "Second part of backward computation");
151 |   m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
152 |   m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
153 |   m.def("elu_forward", &elu_forward, "Elu forward computation");
154 |   m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
155 | }
156 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import torch
  3 | import os
  4 | import numpy as np
  5 | from collections import OrderedDict
  6 | import torchvision.transforms as transforms
  7 | from PIL import Image
  8 | 
  9 | from .schp import networks
 10 | from .schp.utils.transforms import transform_logits, get_affine_transform
 11 | 
 12 | dataset_settings = {
 13 |     'lip': {
 14 |         'input_size': [473, 473],
 15 |         'num_classes': 20,
 16 |         'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat',
 17 |                   'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm',
 18 |                   'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe']
 19 |     },
 20 |     'atr': {
 21 |         'input_size': [512, 512],
 22 |         'num_classes': 18,
 23 |         'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt',
 24 |                   'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf']
 25 |     },
 26 |     'pascal': {
 27 |         'input_size': [512, 512],
 28 |         'num_classes': 7,
 29 |         'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'],
 30 |     }
 31 | }
 32 | 
 33 | def get_palette(num_cls):
 34 |     """ Returns the color map for visualizing the segmentation mask.
 35 |     Args:
 36 |         num_cls: Number of classes
 37 |     Returns:
 38 |         The color map
 39 |     """
 40 |     n = num_cls
 41 |     palette = [0] * (n * 3)
 42 |     for j in range(0, n):
 43 |         lab = j
 44 |         palette[j * 3 + 0] = 0
 45 |         palette[j * 3 + 1] = 0
 46 |         palette[j * 3 + 2] = 0
 47 |         i = 0
 48 |         while lab:
 49 |             palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
 50 |             palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
 51 |             palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
 52 |             i += 1
 53 |             lab >>= 3
 54 |     return palette
 55 | 
 56 | def _box2cs(box, aspect_ratio):
 57 |     x, y, w, h = box[:4]
 58 |     return _xywh2cs(x, y, w, h, aspect_ratio)
 59 | 
 60 | def _xywh2cs(x, y, w, h, aspect_ratio):
 61 |     center = np.zeros((2), dtype=np.float32)
 62 |     center[0] = x + w * 0.5
 63 |     center[1] = y + h * 0.5
 64 |     if w > aspect_ratio * h:
 65 |         h = w * 1.0 / aspect_ratio
 66 |     elif w < aspect_ratio * h:
 67 |         w = h * aspect_ratio
 68 |     scale = np.array([w, h], dtype=np.float32)
 69 |     return center, scale
 70 | 
 71 | def check_model_path(model_path):
 72 |     # Checks to see if the model exists, if not try adding ComfyUI/ to the start to fix possible errors on Windows (maybe others too)
 73 |     if not os.path.exists(model_path):
 74 |         new_model_path = os.path.join("ComfyUI", model_path)
 75 |         if os.path.exists(new_model_path):
 76 |             return new_model_path
 77 |     return model_path
 78 | 
 79 | def generate(image, type, device):
 80 |   num_classes = dataset_settings[type]['num_classes']
 81 |   input_size = dataset_settings[type]['input_size']
 82 |   aspect_ratio = input_size[1] * 1.0 / input_size[0]
 83 |   if type == 'lip':
 84 |     model_path = 'models/schp/exp-schp-201908261155-lip.pth'
 85 |   elif type == 'atr':
 86 |     model_path = 'models/schp/exp-schp-201908301523-atr.pth'
 87 |   elif type == 'pascal':
 88 |     model_path = 'models/schp/exp-schp-201908270938-pascal-person-part.pth'
 89 | 
 90 |   # Check and adjust the model path if necessary
 91 |   model_path = check_model_path(model_path)
 92 | 
 93 |   model = networks.init_model('resnet101', num_classes=num_classes, pretrained=None)
 94 |   state_dict = torch.load(model_path)['state_dict']
 95 |   new_state_dict = OrderedDict()
 96 |   for k, v in state_dict.items():
 97 |       name = k[7:]
 98 |       new_state_dict[name] = v
 99 |   model.load_state_dict(new_state_dict)
100 |   model.to(device)
101 |   model.eval()
102 | 
103 |   # Get person center and scale
104 |   input = 255. * image.cpu().numpy()
105 |   input = np.clip(input, 0, 255).astype(np.uint8)
106 |   input = cv2.cvtColor(input, cv2.COLOR_RGB2BGR)
107 |   h, w, _ = input.shape
108 | 
109 |   person_center, s = _box2cs([0, 0, w - 1, h - 1], aspect_ratio)
110 |   trans = get_affine_transform(person_center, s, 0, input_size)
111 |   input = cv2.warpAffine(
112 |       input,
113 |       trans,
114 |       (int(input_size[1]), int(input_size[0])),
115 |       flags=cv2.INTER_LINEAR,
116 |       borderMode=cv2.BORDER_CONSTANT,
117 |       borderValue=(0, 0, 0))
118 | 
119 |   transform = transforms.Compose([
120 |       transforms.ToTensor(),
121 |       transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
122 |   ])
123 |   input = transform(input)
124 | 
125 |   palette = get_palette(num_classes)
126 |   with torch.no_grad():
127 |     input = input[None, :, :, :]
128 |     output = model(input.to(device))
129 |     upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True)
130 |     upsample_output = upsample(output[0][-1][0].unsqueeze(0))
131 |     upsample_output = upsample_output.squeeze()
132 |     upsample_output = upsample_output.permute(1, 2, 0)  # CHW -> HWC
133 | 
134 |     logits_result = transform_logits(upsample_output.data.cpu().numpy(), person_center, s, w, h, input_size=input_size)
135 |     parsing_result = np.argmax(logits_result, axis=2)
136 | 
137 |     output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8))
138 |     output_img.putpalette(palette)
139 |   return output_img
140 | 


--------------------------------------------------------------------------------
/schp/modules/bn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as functional
  4 | 
  5 | try:
  6 |     from queue import Queue
  7 | except ImportError:
  8 |     from Queue import Queue
  9 | 
 10 | from .functions import *
 11 | 
 12 | 
 13 | class ABN(nn.Module):
 14 |     """Activated Batch Normalization
 15 | 
 16 |     This gathers a `BatchNorm2d` and an activation function in a single module
 17 |     """
 18 | 
 19 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
 20 |         """Creates an Activated Batch Normalization module
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         num_features : int
 25 |             Number of feature channels in the input and output.
 26 |         eps : float
 27 |             Small constant to prevent numerical issues.
 28 |         momentum : float
 29 |             Momentum factor applied to compute running statistics as.
 30 |         affine : bool
 31 |             If `True` apply learned scale and shift transformation after normalization.
 32 |         activation : str
 33 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
 34 |         slope : float
 35 |             Negative slope for the `leaky_relu` activation.
 36 |         """
 37 |         super(ABN, self).__init__()
 38 |         self.num_features = num_features
 39 |         self.affine = affine
 40 |         self.eps = eps
 41 |         self.momentum = momentum
 42 |         self.activation = activation
 43 |         self.slope = slope
 44 |         if self.affine:
 45 |             self.weight = nn.Parameter(torch.ones(num_features))
 46 |             self.bias = nn.Parameter(torch.zeros(num_features))
 47 |         else:
 48 |             self.register_parameter('weight', None)
 49 |             self.register_parameter('bias', None)
 50 |         self.register_buffer('running_mean', torch.zeros(num_features))
 51 |         self.register_buffer('running_var', torch.ones(num_features))
 52 |         self.reset_parameters()
 53 | 
 54 |     def reset_parameters(self):
 55 |         nn.init.constant_(self.running_mean, 0)
 56 |         nn.init.constant_(self.running_var, 1)
 57 |         if self.affine:
 58 |             nn.init.constant_(self.weight, 1)
 59 |             nn.init.constant_(self.bias, 0)
 60 | 
 61 |     def forward(self, x):
 62 |         x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
 63 |                                   self.training, self.momentum, self.eps)
 64 | 
 65 |         if self.activation == ACT_RELU:
 66 |             return functional.relu(x, inplace=True)
 67 |         elif self.activation == ACT_LEAKY_RELU:
 68 |             return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
 69 |         elif self.activation == ACT_ELU:
 70 |             return functional.elu(x, inplace=True)
 71 |         else:
 72 |             return x
 73 | 
 74 |     def __repr__(self):
 75 |         rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
 76 |               ' affine={affine}, activation={activation}'
 77 |         if self.activation == "leaky_relu":
 78 |             rep += ', slope={slope})'
 79 |         else:
 80 |             rep += ')'
 81 |         return rep.format(name=self.__class__.__name__, **self.__dict__)
 82 | 
 83 | 
 84 | class InPlaceABN(ABN):
 85 |     """InPlace Activated Batch Normalization"""
 86 | 
 87 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
 88 |         """Creates an InPlace Activated Batch Normalization module
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         num_features : int
 93 |             Number of feature channels in the input and output.
 94 |         eps : float
 95 |             Small constant to prevent numerical issues.
 96 |         momentum : float
 97 |             Momentum factor applied to compute running statistics as.
 98 |         affine : bool
 99 |             If `True` apply learned scale and shift transformation after normalization.
100 |         activation : str
101 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
102 |         slope : float
103 |             Negative slope for the `leaky_relu` activation.
104 |         """
105 |         super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
106 | 
107 |     def forward(self, x):
108 |         x, _, _ = inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
109 |                            self.training, self.momentum, self.eps, self.activation, self.slope)
110 |         return x
111 | 
112 | 
113 | class InPlaceABNSync(ABN):
114 |     """InPlace Activated Batch Normalization with cross-GPU synchronization
115 |     This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
116 |     """
117 | 
118 |     def forward(self, x):
119 |         x, _, _ =  inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
120 |                                    self.training, self.momentum, self.eps, self.activation, self.slope)
121 |         return x
122 | 
123 |     def __repr__(self):
124 |         rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
125 |               ' affine={affine}, activation={activation}'
126 |         if self.activation == "leaky_relu":
127 |             rep += ', slope={slope})'
128 |         else:
129 |             rep += ')'
130 |         return rep.format(name=self.__class__.__name__, **self.__dict__)
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/schp/networks/backbone/resnext.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   resnext.py.py
  8 | @Time    :   8/11/19 8:58 PM
  9 | @Desc    :   
 10 | @License :   This source code is licensed under the license found in the 
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | import functools
 14 | import torch.nn as nn
 15 | import math
 16 | from torch.utils.model_zoo import load_url
 17 | 
 18 | from modules import InPlaceABNSync
 19 | 
 20 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
 21 | 
 22 | __all__ = ['ResNeXt', 'resnext101']  # support resnext 101
 23 | 
 24 | model_urls = {
 25 |     'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
 26 |     'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
 27 | }
 28 | 
 29 | 
 30 | def conv3x3(in_planes, out_planes, stride=1):
 31 |     "3x3 convolution with padding"
 32 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 33 |                      padding=1, bias=False)
 34 | 
 35 | 
 36 | class GroupBottleneck(nn.Module):
 37 |     expansion = 2
 38 | 
 39 |     def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
 40 |         super(GroupBottleneck, self).__init__()
 41 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 42 |         self.bn1 = BatchNorm2d(planes)
 43 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 44 |                                padding=1, groups=groups, bias=False)
 45 |         self.bn2 = BatchNorm2d(planes)
 46 |         self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
 47 |         self.bn3 = BatchNorm2d(planes * 2)
 48 |         self.relu = nn.ReLU(inplace=True)
 49 |         self.downsample = downsample
 50 |         self.stride = stride
 51 | 
 52 |     def forward(self, x):
 53 |         residual = x
 54 | 
 55 |         out = self.conv1(x)
 56 |         out = self.bn1(out)
 57 |         out = self.relu(out)
 58 | 
 59 |         out = self.conv2(out)
 60 |         out = self.bn2(out)
 61 |         out = self.relu(out)
 62 | 
 63 |         out = self.conv3(out)
 64 |         out = self.bn3(out)
 65 | 
 66 |         if self.downsample is not None:
 67 |             residual = self.downsample(x)
 68 | 
 69 |         out += residual
 70 |         out = self.relu(out)
 71 | 
 72 |         return out
 73 | 
 74 | 
 75 | class ResNeXt(nn.Module):
 76 | 
 77 |     def __init__(self, block, layers, groups=32, num_classes=1000):
 78 |         self.inplanes = 128
 79 |         super(ResNeXt, self).__init__()
 80 |         self.conv1 = conv3x3(3, 64, stride=2)
 81 |         self.bn1 = BatchNorm2d(64)
 82 |         self.relu1 = nn.ReLU(inplace=True)
 83 |         self.conv2 = conv3x3(64, 64)
 84 |         self.bn2 = BatchNorm2d(64)
 85 |         self.relu2 = nn.ReLU(inplace=True)
 86 |         self.conv3 = conv3x3(64, 128)
 87 |         self.bn3 = BatchNorm2d(128)
 88 |         self.relu3 = nn.ReLU(inplace=True)
 89 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 90 | 
 91 |         self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
 92 |         self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
 93 |         self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
 94 |         self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
 95 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 96 |         self.fc = nn.Linear(1024 * block.expansion, num_classes)
 97 | 
 98 |         for m in self.modules():
 99 |             if isinstance(m, nn.Conv2d):
100 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
101 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
102 |             elif isinstance(m, BatchNorm2d):
103 |                 m.weight.data.fill_(1)
104 |                 m.bias.data.zero_()
105 | 
106 |     def _make_layer(self, block, planes, blocks, stride=1, groups=1):
107 |         downsample = None
108 |         if stride != 1 or self.inplanes != planes * block.expansion:
109 |             downsample = nn.Sequential(
110 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
111 |                           kernel_size=1, stride=stride, bias=False),
112 |                 BatchNorm2d(planes * block.expansion),
113 |             )
114 | 
115 |         layers = []
116 |         layers.append(block(self.inplanes, planes, stride, groups, downsample))
117 |         self.inplanes = planes * block.expansion
118 |         for i in range(1, blocks):
119 |             layers.append(block(self.inplanes, planes, groups=groups))
120 | 
121 |         return nn.Sequential(*layers)
122 | 
123 |     def forward(self, x):
124 |         x = self.relu1(self.bn1(self.conv1(x)))
125 |         x = self.relu2(self.bn2(self.conv2(x)))
126 |         x = self.relu3(self.bn3(self.conv3(x)))
127 |         x = self.maxpool(x)
128 | 
129 |         x = self.layer1(x)
130 |         x = self.layer2(x)
131 |         x = self.layer3(x)
132 |         x = self.layer4(x)
133 | 
134 |         x = self.avgpool(x)
135 |         x = x.view(x.size(0), -1)
136 |         x = self.fc(x)
137 | 
138 |         return x
139 | 
140 | 
141 | def resnext101(pretrained=False, **kwargs):
142 |     """Constructs a ResNet-101 model.
143 |     Args:
144 |         pretrained (bool): If True, returns a model pre-trained on Places
145 |     """
146 |     model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
147 |     if pretrained:
148 |         model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
149 |     return model
150 | 


--------------------------------------------------------------------------------
/schp/networks/backbone/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   mobilenetv2.py
  8 | @Time    :   8/4/19 3:35 PM
  9 | @Desc    :   
 10 | @License :   This source code is licensed under the license found in the 
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import torch.nn as nn
 15 | import math
 16 | import functools
 17 | 
 18 | from modules import InPlaceABN, InPlaceABNSync
 19 | 
 20 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
 21 | 
 22 | __all__ = ['mobilenetv2']
 23 | 
 24 | 
 25 | def conv_bn(inp, oup, stride):
 26 |     return nn.Sequential(
 27 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 28 |         BatchNorm2d(oup),
 29 |         nn.ReLU6(inplace=True)
 30 |     )
 31 | 
 32 | 
 33 | def conv_1x1_bn(inp, oup):
 34 |     return nn.Sequential(
 35 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 36 |         BatchNorm2d(oup),
 37 |         nn.ReLU6(inplace=True)
 38 |     )
 39 | 
 40 | 
 41 | class InvertedResidual(nn.Module):
 42 |     def __init__(self, inp, oup, stride, expand_ratio):
 43 |         super(InvertedResidual, self).__init__()
 44 |         self.stride = stride
 45 |         assert stride in [1, 2]
 46 | 
 47 |         hidden_dim = round(inp * expand_ratio)
 48 |         self.use_res_connect = self.stride == 1 and inp == oup
 49 | 
 50 |         if expand_ratio == 1:
 51 |             self.conv = nn.Sequential(
 52 |                 # dw
 53 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 54 |                 BatchNorm2d(hidden_dim),
 55 |                 nn.ReLU6(inplace=True),
 56 |                 # pw-linear
 57 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 58 |                 BatchNorm2d(oup),
 59 |             )
 60 |         else:
 61 |             self.conv = nn.Sequential(
 62 |                 # pw
 63 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
 64 |                 BatchNorm2d(hidden_dim),
 65 |                 nn.ReLU6(inplace=True),
 66 |                 # dw
 67 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 68 |                 BatchNorm2d(hidden_dim),
 69 |                 nn.ReLU6(inplace=True),
 70 |                 # pw-linear
 71 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 72 |                 BatchNorm2d(oup),
 73 |             )
 74 | 
 75 |     def forward(self, x):
 76 |         if self.use_res_connect:
 77 |             return x + self.conv(x)
 78 |         else:
 79 |             return self.conv(x)
 80 | 
 81 | 
 82 | class MobileNetV2(nn.Module):
 83 |     def __init__(self, n_class=1000, input_size=224, width_mult=1.):
 84 |         super(MobileNetV2, self).__init__()
 85 |         block = InvertedResidual
 86 |         input_channel = 32
 87 |         last_channel = 1280
 88 |         interverted_residual_setting = [
 89 |             # t, c, n, s
 90 |             [1, 16, 1, 1],
 91 |             [6, 24, 2, 2],  # layer 2
 92 |             [6, 32, 3, 2],  # layer 3
 93 |             [6, 64, 4, 2],
 94 |             [6, 96, 3, 1],  # layer 4
 95 |             [6, 160, 3, 2],
 96 |             [6, 320, 1, 1],  # layer 5
 97 |         ]
 98 | 
 99 |         # building first layer
100 |         assert input_size % 32 == 0
101 |         input_channel = int(input_channel * width_mult)
102 |         self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
103 |         self.features = [conv_bn(3, input_channel, 2)]
104 |         # building inverted residual blocks
105 |         for t, c, n, s in interverted_residual_setting:
106 |             output_channel = int(c * width_mult)
107 |             for i in range(n):
108 |                 if i == 0:
109 |                     self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
110 |                 else:
111 |                     self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
112 |                 input_channel = output_channel
113 |         # building last several layers
114 |         self.features.append(conv_1x1_bn(input_channel, self.last_channel))
115 |         # make it nn.Sequential
116 |         self.features = nn.Sequential(*self.features)
117 | 
118 |         # building classifier
119 |         self.classifier = nn.Sequential(
120 |             nn.Dropout(0.2),
121 |             nn.Linear(self.last_channel, n_class),
122 |         )
123 | 
124 |         self._initialize_weights()
125 | 
126 |     def forward(self, x):
127 |         x = self.features(x)
128 |         x = x.mean(3).mean(2)
129 |         x = self.classifier(x)
130 |         return x
131 | 
132 |     def _initialize_weights(self):
133 |         for m in self.modules():
134 |             if isinstance(m, nn.Conv2d):
135 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
136 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
137 |                 if m.bias is not None:
138 |                     m.bias.data.zero_()
139 |             elif isinstance(m, BatchNorm2d):
140 |                 m.weight.data.fill_(1)
141 |                 m.bias.data.zero_()
142 |             elif isinstance(m, nn.Linear):
143 |                 n = m.weight.size(1)
144 |                 m.weight.data.normal_(0, 0.01)
145 |                 m.bias.data.zero_()
146 | 
147 | 
148 | def mobilenetv2(pretrained=False, **kwargs):
149 |     """Constructs a MobileNet_V2 model.
150 |     Args:
151 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
152 |     """
153 |     model = MobileNetV2(n_class=1000, **kwargs)
154 |     if pretrained:
155 |         model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
156 |     return model
157 | 


--------------------------------------------------------------------------------
/schp/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import cv2
 13 | import torch
 14 | 
 15 | class BRG2Tensor_transform(object):
 16 |     def __call__(self, pic):
 17 |         img = torch.from_numpy(pic.transpose((2, 0, 1)))
 18 |         if isinstance(img, torch.ByteTensor):
 19 |             return img.float()
 20 |         else:
 21 |             return img
 22 | 
 23 | class BGR2RGB_transform(object):
 24 |     def __call__(self, tensor):
 25 |         return tensor[[2,1,0],:,:]
 26 | 
 27 | def flip_back(output_flipped, matched_parts):
 28 |     '''
 29 |     ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
 30 |     '''
 31 |     assert output_flipped.ndim == 4,\
 32 |         'output_flipped should be [batch_size, num_joints, height, width]'
 33 | 
 34 |     output_flipped = output_flipped[:, :, :, ::-1]
 35 | 
 36 |     for pair in matched_parts:
 37 |         tmp = output_flipped[:, pair[0], :, :].copy()
 38 |         output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
 39 |         output_flipped[:, pair[1], :, :] = tmp
 40 | 
 41 |     return output_flipped
 42 | 
 43 | 
 44 | def fliplr_joints(joints, joints_vis, width, matched_parts):
 45 |     """
 46 |     flip coords
 47 |     """
 48 |     # Flip horizontal
 49 |     joints[:, 0] = width - joints[:, 0] - 1
 50 | 
 51 |     # Change left-right parts
 52 |     for pair in matched_parts:
 53 |         joints[pair[0], :], joints[pair[1], :] = \
 54 |             joints[pair[1], :], joints[pair[0], :].copy()
 55 |         joints_vis[pair[0], :], joints_vis[pair[1], :] = \
 56 |             joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
 57 | 
 58 |     return joints*joints_vis, joints_vis
 59 | 
 60 | 
 61 | def transform_preds(coords, center, scale, input_size):
 62 |     target_coords = np.zeros(coords.shape)
 63 |     trans = get_affine_transform(center, scale, 0, input_size, inv=1)
 64 |     for p in range(coords.shape[0]):
 65 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 66 |     return target_coords
 67 | 
 68 | def transform_parsing(pred, center, scale, width, height, input_size):
 69 | 
 70 |     trans = get_affine_transform(center, scale, 0, input_size, inv=1)
 71 |     target_pred = cv2.warpAffine(
 72 |             pred,
 73 |             trans,
 74 |             (int(width), int(height)), #(int(width), int(height)),
 75 |             flags=cv2.INTER_NEAREST,
 76 |             borderMode=cv2.BORDER_CONSTANT,
 77 |             borderValue=(0))
 78 | 
 79 |     return target_pred
 80 | 
 81 | def transform_logits(logits, center, scale, width, height, input_size):
 82 | 
 83 |     trans = get_affine_transform(center, scale, 0, input_size, inv=1)
 84 |     channel = logits.shape[2]
 85 |     target_logits = []
 86 |     for i in range(channel):
 87 |         target_logit = cv2.warpAffine(
 88 |             logits[:,:,i],
 89 |             trans,
 90 |             (int(width), int(height)), #(int(width), int(height)),
 91 |             flags=cv2.INTER_LINEAR,
 92 |             borderMode=cv2.BORDER_CONSTANT,
 93 |             borderValue=(0))
 94 |         target_logits.append(target_logit)
 95 |     target_logits = np.stack(target_logits,axis=2)
 96 | 
 97 |     return target_logits
 98 | 
 99 | 
100 | def get_affine_transform(center,
101 |                          scale,
102 |                          rot,
103 |                          output_size,
104 |                          shift=np.array([0, 0], dtype=np.float32),
105 |                          inv=0):
106 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
107 |         print(scale)
108 |         scale = np.array([scale, scale])
109 | 
110 |     scale_tmp = scale
111 | 
112 |     src_w = scale_tmp[0]
113 |     dst_w = output_size[1]
114 |     dst_h = output_size[0]
115 | 
116 |     rot_rad = np.pi * rot / 180
117 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
118 |     dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
119 | 
120 |     src = np.zeros((3, 2), dtype=np.float32)
121 |     dst = np.zeros((3, 2), dtype=np.float32)
122 |     src[0, :] = center + scale_tmp * shift
123 |     src[1, :] = center + src_dir + scale_tmp * shift
124 |     dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
125 |     dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
126 | 
127 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
128 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
129 | 
130 |     if inv:
131 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
132 |     else:
133 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
134 | 
135 |     return trans
136 | 
137 | 
138 | def affine_transform(pt, t):
139 |     new_pt = np.array([pt[0], pt[1], 1.]).T
140 |     new_pt = np.dot(t, new_pt)
141 |     return new_pt[:2]
142 | 
143 | 
144 | def get_3rd_point(a, b):
145 |     direct = a - b
146 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
147 | 
148 | 
149 | def get_dir(src_point, rot_rad):
150 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
151 | 
152 |     src_result = [0, 0]
153 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
154 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
155 | 
156 |     return src_result
157 | 
158 | 
159 | def crop(img, center, scale, output_size, rot=0):
160 |     trans = get_affine_transform(center, scale, rot, output_size)
161 | 
162 |     dst_img = cv2.warpAffine(img,
163 |                              trans,
164 |                              (int(output_size[1]), int(output_size[0])),
165 |                              flags=cv2.INTER_LINEAR)
166 | 
167 |     return dst_img
168 | 


--------------------------------------------------------------------------------
/schp/utils/miou.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from collections import OrderedDict
  6 | from PIL import Image as PILImage
  7 | from utils.transforms import transform_parsing
  8 | 
  9 | LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \
 10 |           'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg',
 11 |           'Right-leg', 'Left-shoe', 'Right-shoe']
 12 | 
 13 | 
 14 | # LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
 15 | 
 16 | def get_palette(num_cls):
 17 |     """ Returns the color map for visualizing the segmentation mask.
 18 |     Args:
 19 |         num_cls: Number of classes
 20 |     Returns:
 21 |         The color map
 22 |     """
 23 | 
 24 |     n = num_cls
 25 |     palette = [0] * (n * 3)
 26 |     for j in range(0, n):
 27 |         lab = j
 28 |         palette[j * 3 + 0] = 0
 29 |         palette[j * 3 + 1] = 0
 30 |         palette[j * 3 + 2] = 0
 31 |         i = 0
 32 |         while lab:
 33 |             palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
 34 |             palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
 35 |             palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
 36 |             i += 1
 37 |             lab >>= 3
 38 |     return palette
 39 | 
 40 | 
 41 | def get_confusion_matrix(gt_label, pred_label, num_classes):
 42 |     """
 43 |     Calcute the confusion matrix by given label and pred
 44 |     :param gt_label: the ground truth label
 45 |     :param pred_label: the pred label
 46 |     :param num_classes: the nunber of class
 47 |     :return: the confusion matrix
 48 |     """
 49 |     index = (gt_label * num_classes + pred_label).astype('int32')
 50 |     label_count = np.bincount(index)
 51 |     confusion_matrix = np.zeros((num_classes, num_classes))
 52 | 
 53 |     for i_label in range(num_classes):
 54 |         for i_pred_label in range(num_classes):
 55 |             cur_index = i_label * num_classes + i_pred_label
 56 |             if cur_index < len(label_count):
 57 |                 confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
 58 | 
 59 |     return confusion_matrix
 60 | 
 61 | 
 62 | def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'):
 63 |     val_file = os.path.join(datadir, dataset + '_id.txt')
 64 |     val_id = [i_id.strip() for i_id in open(val_file)]
 65 | 
 66 |     confusion_matrix = np.zeros((num_classes, num_classes))
 67 | 
 68 |     for i, pred_out in enumerate(preds):
 69 |         im_name = val_id[i]
 70 |         gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png')
 71 |         gt = np.array(PILImage.open(gt_path))
 72 |         h, w = gt.shape
 73 |         s = scales[i]
 74 |         c = centers[i]
 75 |         pred = transform_parsing(pred_out, c, s, w, h, input_size)
 76 | 
 77 |         gt = np.asarray(gt, dtype=np.int32)
 78 |         pred = np.asarray(pred, dtype=np.int32)
 79 | 
 80 |         ignore_index = gt != 255
 81 | 
 82 |         gt = gt[ignore_index]
 83 |         pred = pred[ignore_index]
 84 | 
 85 |         confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
 86 | 
 87 |     pos = confusion_matrix.sum(1)
 88 |     res = confusion_matrix.sum(0)
 89 |     tp = np.diag(confusion_matrix)
 90 | 
 91 |     pixel_accuracy = (tp.sum() / pos.sum()) * 100
 92 |     mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
 93 |     IoU_array = (tp / np.maximum(1.0, pos + res - tp))
 94 |     IoU_array = IoU_array * 100
 95 |     mean_IoU = IoU_array.mean()
 96 |     print('Pixel accuracy: %f \n' % pixel_accuracy)
 97 |     print('Mean accuracy: %f \n' % mean_accuracy)
 98 |     print('Mean IU: %f \n' % mean_IoU)
 99 |     name_value = []
100 | 
101 |     for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
102 |         name_value.append((label, iou))
103 | 
104 |     name_value.append(('Pixel accuracy', pixel_accuracy))
105 |     name_value.append(('Mean accuracy', mean_accuracy))
106 |     name_value.append(('Mean IU', mean_IoU))
107 |     name_value = OrderedDict(name_value)
108 |     return name_value
109 | 
110 | 
111 | def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'):
112 |     list_path = os.path.join(datadir, dataset + '_id.txt')
113 |     val_id = [i_id.strip() for i_id in open(list_path)]
114 | 
115 |     confusion_matrix = np.zeros((num_classes, num_classes))
116 | 
117 |     for i, im_name in enumerate(val_id):
118 |         gt_path = os.path.join(datadir, 'segmentations', im_name + '.png')
119 |         gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
120 | 
121 |         pred_path = os.path.join(preds_dir, im_name + '.png')
122 |         pred = np.asarray(PILImage.open(pred_path))
123 | 
124 |         gt = np.asarray(gt, dtype=np.int32)
125 |         pred = np.asarray(pred, dtype=np.int32)
126 | 
127 |         ignore_index = gt != 255
128 | 
129 |         gt = gt[ignore_index]
130 |         pred = pred[ignore_index]
131 | 
132 |         confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
133 | 
134 |     pos = confusion_matrix.sum(1)
135 |     res = confusion_matrix.sum(0)
136 |     tp = np.diag(confusion_matrix)
137 | 
138 |     pixel_accuracy = (tp.sum() / pos.sum()) * 100
139 |     mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
140 |     IoU_array = (tp / np.maximum(1.0, pos + res - tp))
141 |     IoU_array = IoU_array * 100
142 |     mean_IoU = IoU_array.mean()
143 |     print('Pixel accuracy: %f \n' % pixel_accuracy)
144 |     print('Mean accuracy: %f \n' % mean_accuracy)
145 |     print('Mean IU: %f \n' % mean_IoU)
146 |     name_value = []
147 | 
148 |     for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
149 |         name_value.append((label, iou))
150 | 
151 |     name_value.append(('Pixel accuracy', pixel_accuracy))
152 |     name_value.append(('Mean accuracy', mean_accuracy))
153 |     name_value.append(('Mean IU', mean_IoU))
154 |     name_value = OrderedDict(name_value)
155 |     return name_value
156 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Cozy Human Parser
  2 | 
  3 | Fast, VRAM-light ComfyUI nodes to generate masks for specific body parts and clothes or fashion items. Runs on CPU and CUDA.
  4 | Made with 💚 by the [CozyMantis](https://cozymantis.gumroad.com/) squad.
  5 | 
  6 | | Original              | ATR                      | LIP                      | Pascal                      |
  7 | | --------------------- | ------------------------ | ------------------------ | ------------------------ |
  8 | | ![](assets/demo2.jpg) | ![](assets/demo2atr.png) | ![](assets/demo2lip.png) | ![](assets/demo2pascal.png) |
  9 | | ![](assets/demo3.jpg) | ![](assets/demo3atr.png) | ![](assets/demo3lip.png) | ![](assets/demo3pascal.png) |
 10 | 
 11 | ## Installation
 12 | 
 13 | - Clone this repository into your custom_nodes directory, then run `pip install -r requirements.txt` to install the required dependencies.
 14 | - Copy the following models to the `models/schp` directory, depending on which parser you would like to use:
 15 |   - Model based on the LIP dataset: [Google Drive](https://drive.google.com/file/d/1k4dllHpu0bdx38J7H28rVVLpU-kOHmnH/view?usp=sharing)
 16 |   - Model based on the ATR dataset: [Google Drive](https://drive.google.com/file/d/1ruJg4lqR_jgQPj-9K0PP-L2vJERYOxLP/view?usp=sharing)
 17 |   - Model based on the Pascal dataset: [Google Drive](https://drive.google.com/file/d/1E5YwNKW2VOEayK9mWCS3Kpsxf-3z04ZE/view?usp=sharing)
 18 |  
 19 | Check below for [Windows troubleshooting](#windows-troubleshooting).
 20 | 
 21 | ## Examples
 22 | 
 23 | ### LIP Parser
 24 | 
 25 | - LIP is the largest single person human parsing dataset with 50000+ images. This dataset focuses on complicated real scenarios.
 26 | - mIoU on LIP validation: 59.36 %
 27 | - The LIP parser can detect the following categories:
 28 | 
 29 | ```
 30 | ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses' 'Upper-clothes', 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe']
 31 | ```
 32 | 
 33 | ![assets/lipexample.png](assets/lipexample.png)
 34 | 
 35 | ### ATR Parser
 36 | 
 37 | - ATR is a large single person human parsing dataset with 17000+ images. This dataset focuses on fashion AI.
 38 | - mIoU on ATR test: 82.29%
 39 | - The ATR parser can detect the following categories:
 40 | 
 41 | ```
 42 | ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt', 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf']
 43 | ```
 44 | 
 45 | ![assets/atrexample.png](assets/atrexample.png)
 46 | 
 47 | ### Pascal Parser
 48 | 
 49 | - Pascal Person Part is a tiny single person human parsing dataset with 3000+ images. This dataset focuses on body parts segmentation.
 50 | - mIoU on Pascal-Person-Part validation: 71.46 %
 51 | - The Pascal parser can detect the following categories:
 52 | 
 53 | ```
 54 | ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
 55 | ```
 56 | 
 57 | ![assets/pascalexample.png](assets/pascalexample.png)
 58 | 
 59 | ## Windows Troubleshooting
 60 | 
 61 | - ### Ninja is required to load C++ extensions
 62 | 
 63 | Windows can't find the "ninja.exe" file. The file is probably getting downloaded/installed to something like `X:\path\to\comfy\python_embeded\lib\site-packages\ninja\data\bin`, but it's not properly getting added to the system path, so the OS can't invoke it.
 64 | 
 65 | The solution is to:
 66 | - locate the "ninja.exe" file;
 67 | - add the full path to ninja.exe into the system PATH:
 68 |   - see https://www.mathworks.com/matlabcentral/answers/94933-how-do-i-edit-my-system-path-in-windows
 69 |   - remember, you need to enter the path to the folder containing the ninja.exe binary)
 70 |   - see [this issue](https://github.com/cozymantis/human-parser-comfyui-node/issues/3) for more details
 71 | 
 72 | - ### NK1104: cannot open file 'python311.lib' (or similar)
 73 | 
 74 | Windows can't locate the `python311.lib` library. You need to search for it on your system, then add the library's parent directory to the "LIB" environment variable.
 75 | - see [this SO article](https://stackoverflow.com/questions/36419747/link-fatal-error-lnk1104-cannot-open-file-python27-lib)
 76 | - also see [this issue](https://github.com/cozymantis/human-parser-comfyui-node/issues/1)
 77 |  
 78 | - ### Command '['where', 'cl']' returned non-zero exit status 1
 79 | 
 80 | Windows can't locate "cl.exe" which is the compiler/linker tool: https://learn.microsoft.com/en-us/cpp/build/reference/compiler-options?view=msvc-170
 81 | 
 82 | > You can start this tool only from a Visual Studio developer command prompt. You cannot start it from a system command prompt or from File Explorer. For more information, see Use the MSVC toolset from the command line.
 83 | 
 84 | First, make sure you've installed all of the things highlighted below:
 85 | 
 86 | ![image](https://github.com/cozymantis/human-parser-comfyui-node/assets/5381731/76fbff32-be60-4120-a682-4fa7588e9bf4)
 87 | 
 88 | Then, it looks like you'll need to start ComfyUI from the developer command prompt instead of the regular cmd. Here's docs on how to launch the dev command prompt: https://learn.microsoft.com/en-us/visualstudio/ide/reference/command-prompt-powershell?view=vs-2022
 89 | 
 90 | You'll want to run something similar to:
 91 | 
 92 | ```bash
 93 | cd X:\path\to\comfy
 94 | python main.py
 95 | ```
 96 | 
 97 | - ### error: first parameter of allocation function must be of type "size_t"
 98 | 
 99 | Make sure you're running the "x64 Native Tools Command Prompt" instead of the x86 one. Type "x64" in the start menu to locate it.
100 | 
101 | ![image](https://github.com/cozymantis/human-parser-comfyui-node/assets/5381731/120f5a1b-adf3-4fb1-a3df-5c0006ce0a6e)
102 | 
103 | ## Acknowledgements
104 | 
105 | Based on the excellent paper ["Self-Correction for Human Parsing"](https://github.com/GoGoDuck912/Self-Correction-Human-Parsing) by Li, Peike and Xu, Yunqiu and Wei, Yunchao and Yang, Yi, and their original code that we've updated to also run on CPUs.
106 | 
107 | ```bibtex
108 | @article{li2020self,
109 |   title={Self-Correction for Human Parsing}, 
110 |   author={Li, Peike and Xu, Yunqiu and Wei, Yunchao and Yang, Yi},
111 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 
112 |   year={2020},
113 |   doi={10.1109/TPAMI.2020.3048039}}
114 | ```
115 | 


--------------------------------------------------------------------------------
/schp/utils/criterion.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   criterion.py
  8 | @Time    :   8/30/19 8:59 PM
  9 | @Desc    :
 10 | @License :   This source code is licensed under the license found in the
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import torch.nn as nn
 15 | import torch
 16 | import numpy as np
 17 | from torch.nn import functional as F
 18 | from .lovasz_softmax import LovaszSoftmax
 19 | from .kl_loss import KLDivergenceLoss
 20 | from .consistency_loss import ConsistencyLoss
 21 | 
 22 | NUM_CLASSES = 20
 23 | 
 24 | 
 25 | class CriterionAll(nn.Module):
 26 |     def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1,
 27 |                  num_classes=20):
 28 |         super(CriterionAll, self).__init__()
 29 |         self.ignore_index = ignore_index
 30 |         self.use_class_weight = use_class_weight
 31 |         self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
 32 |         self.lovasz = LovaszSoftmax(ignore_index=ignore_index)
 33 |         self.kldiv = KLDivergenceLoss(ignore_index=ignore_index)
 34 |         self.reg = ConsistencyLoss(ignore_index=ignore_index)
 35 |         self.lamda_1 = lambda_1
 36 |         self.lamda_2 = lambda_2
 37 |         self.lamda_3 = lambda_3
 38 |         self.num_classes = num_classes
 39 | 
 40 |     def parsing_loss(self, preds, target, cycle_n=None):
 41 |         """
 42 |         Loss function definition.
 43 | 
 44 |         Args:
 45 |             preds: [[parsing result1, parsing result2],[edge result]]
 46 |             target: [parsing label, egde label]
 47 |             soft_preds: [[parsing result1, parsing result2],[edge result]]
 48 |         Returns:
 49 |             Calculated Loss.
 50 |         """
 51 |         h, w = target[0].size(1), target[0].size(2)
 52 | 
 53 |         pos_num = torch.sum(target[1] == 1, dtype=torch.float)
 54 |         neg_num = torch.sum(target[1] == 0, dtype=torch.float)
 55 | 
 56 |         weight_pos = neg_num / (pos_num + neg_num)
 57 |         weight_neg = pos_num / (pos_num + neg_num)
 58 |         weights = torch.tensor([weight_neg, weight_pos])  # edge loss weight
 59 | 
 60 |         loss = 0
 61 | 
 62 |         # loss for segmentation
 63 |         preds_parsing = preds[0]
 64 |         for pred_parsing in preds_parsing:
 65 |             scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
 66 |                                        mode='bilinear', align_corners=True)
 67 | 
 68 |             loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0])
 69 |             if target[2] is None:
 70 |                 loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0])
 71 |             else:
 72 |                 soft_scale_pred = F.interpolate(input=target[2], size=(h, w),
 73 |                                                 mode='bilinear', align_corners=True)
 74 |                 soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes),
 75 |                                                  1.0 / (cycle_n + 1.0))
 76 |                 loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0])
 77 | 
 78 |         # loss for edge
 79 |         preds_edge = preds[1]
 80 |         for pred_edge in preds_edge:
 81 |             scale_pred = F.interpolate(input=pred_edge, size=(h, w),
 82 |                                        mode='bilinear', align_corners=True)
 83 |             if target[3] is None:
 84 |                 loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1],
 85 |                                                        weights.cuda(), ignore_index=self.ignore_index)
 86 |             else:
 87 |                 soft_scale_edge = F.interpolate(input=target[3], size=(h, w),
 88 |                                                 mode='bilinear', align_corners=True)
 89 |                 soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2),
 90 |                                                  1.0 / (cycle_n + 1.0))
 91 |                 loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0])
 92 | 
 93 |         # consistency regularization
 94 |         preds_parsing = preds[0]
 95 |         preds_edge = preds[1]
 96 |         for pred_parsing in preds_parsing:
 97 |             scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
 98 |                                        mode='bilinear', align_corners=True)
 99 |             scale_edge = F.interpolate(input=preds_edge[0], size=(h, w),
100 |                                        mode='bilinear', align_corners=True)
101 |             loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0])
102 | 
103 |         return loss
104 | 
105 |     def forward(self, preds, target, cycle_n=None):
106 |         loss = self.parsing_loss(preds, target, cycle_n)
107 |         return loss
108 | 
109 |     def _generate_weights(self, masks, num_classes):
110 |         """
111 |         masks: torch.Tensor with shape [B, H, W]
112 |         """
113 |         masks_label = masks.data.cpu().numpy().astype(np.int64)
114 |         pixel_nums = []
115 |         tot_pixels = 0
116 |         for i in range(num_classes):
117 |             pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float)
118 |             pixel_nums.append(pixel_num_of_cls_i)
119 |             tot_pixels += pixel_num_of_cls_i
120 |         weights = []
121 |         for i in range(num_classes):
122 |             weights.append(
123 |                 (tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1)
124 |             )
125 |         weights = np.array(weights, dtype=np.float)
126 |         # weights = torch.from_numpy(weights).float().to(masks.device)
127 |         return weights
128 | 
129 | 
130 | def moving_average(target1, target2, alpha=1.0):
131 |     target = 0
132 |     target += (1.0 - alpha) * target1
133 |     target += target2 * alpha
134 |     return target
135 | 
136 | 
137 | def to_one_hot(tensor, num_cls, dim=1, ignore_index=255):
138 |     b, h, w = tensor.shape
139 |     tensor[tensor == ignore_index] = 0
140 |     onehot_tensor = torch.zeros(b, num_cls, h, w).cuda()
141 |     onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1)
142 |     return onehot_tensor
143 | 


--------------------------------------------------------------------------------
/schp/simple_extractor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   simple_extractor.py
  8 | @Time    :   8/30/19 8:59 PM
  9 | @Desc    :   Simple Extractor
 10 | @License :   This source code is licensed under the license found in the
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import os
 15 | import torch
 16 | import argparse
 17 | import numpy as np
 18 | from PIL import Image
 19 | from tqdm import tqdm
 20 | 
 21 | from torch.utils.data import DataLoader
 22 | import torchvision.transforms as transforms
 23 | 
 24 | import networks
 25 | from utils.transforms import transform_logits
 26 | from datasets.simple_extractor_dataset import SimpleFolderDataset
 27 | 
 28 | dataset_settings = {
 29 |     'lip': {
 30 |         'input_size': [473, 473],
 31 |         'num_classes': 20,
 32 |         'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat',
 33 |                   'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm',
 34 |                   'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe']
 35 |     },
 36 |     'atr': {
 37 |         'input_size': [512, 512],
 38 |         'num_classes': 18,
 39 |         'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt',
 40 |                   'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf']
 41 |     },
 42 |     'pascal': {
 43 |         'input_size': [512, 512],
 44 |         'num_classes': 7,
 45 |         'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'],
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | def get_arguments():
 51 |     """Parse all the arguments provided from the CLI.
 52 |     Returns:
 53 |       A list of parsed arguments.
 54 |     """
 55 |     parser = argparse.ArgumentParser(description="Self Correction for Human Parsing")
 56 | 
 57 |     parser.add_argument("--dataset", type=str, default='lip', choices=['lip', 'atr', 'pascal'])
 58 |     parser.add_argument("--model-restore", type=str, default='', help="restore pretrained model parameters.")
 59 |     parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.")
 60 |     parser.add_argument("--input-dir", type=str, default='', help="path of input image folder.")
 61 |     parser.add_argument("--output-dir", type=str, default='', help="path of output image folder.")
 62 |     parser.add_argument("--logits", action='store_true', default=False, help="whether to save the logits.")
 63 | 
 64 |     return parser.parse_args()
 65 | 
 66 | 
 67 | def get_palette(num_cls):
 68 |     """ Returns the color map for visualizing the segmentation mask.
 69 |     Args:
 70 |         num_cls: Number of classes
 71 |     Returns:
 72 |         The color map
 73 |     """
 74 |     n = num_cls
 75 |     palette = [0] * (n * 3)
 76 |     for j in range(0, n):
 77 |         lab = j
 78 |         palette[j * 3 + 0] = 0
 79 |         palette[j * 3 + 1] = 0
 80 |         palette[j * 3 + 2] = 0
 81 |         i = 0
 82 |         while lab:
 83 |             palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
 84 |             palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
 85 |             palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
 86 |             i += 1
 87 |             lab >>= 3
 88 |     return palette
 89 | 
 90 | 
 91 | def main():
 92 |     args = get_arguments()
 93 | 
 94 |     gpus = [int(i) for i in args.gpu.split(',')]
 95 |     assert len(gpus) == 1
 96 |     if not args.gpu == 'None':
 97 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
 98 | 
 99 |     num_classes = dataset_settings[args.dataset]['num_classes']
100 |     input_size = dataset_settings[args.dataset]['input_size']
101 |     label = dataset_settings[args.dataset]['label']
102 |     print("Evaluating total class number {} with {}".format(num_classes, label))
103 | 
104 |     model = networks.init_model('resnet101', num_classes=num_classes, pretrained=None)
105 | 
106 |     state_dict = torch.load(args.model_restore)['state_dict']
107 |     from collections import OrderedDict
108 |     new_state_dict = OrderedDict()
109 |     for k, v in state_dict.items():
110 |         name = k[7:]  # remove `module.`
111 |         new_state_dict[name] = v
112 |     model.load_state_dict(new_state_dict)
113 |     model.cuda()
114 |     model.eval()
115 | 
116 |     transform = transforms.Compose([
117 |         transforms.ToTensor(),
118 |         transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
119 |     ])
120 |     dataset = SimpleFolderDataset(root=args.input_dir, input_size=input_size, transform=transform)
121 |     dataloader = DataLoader(dataset)
122 | 
123 |     if not os.path.exists(args.output_dir):
124 |         os.makedirs(args.output_dir)
125 | 
126 |     palette = get_palette(num_classes)
127 |     with torch.no_grad():
128 |         for idx, batch in enumerate(tqdm(dataloader)):
129 |             image, meta = batch
130 |             img_name = meta['name'][0]
131 |             c = meta['center'].numpy()[0]
132 |             s = meta['scale'].numpy()[0]
133 |             w = meta['width'].numpy()[0]
134 |             h = meta['height'].numpy()[0]
135 | 
136 |             output = model(image.cuda())
137 |             upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True)
138 |             upsample_output = upsample(output[0][-1][0].unsqueeze(0))
139 |             upsample_output = upsample_output.squeeze()
140 |             upsample_output = upsample_output.permute(1, 2, 0)  # CHW -> HWC
141 | 
142 |             logits_result = transform_logits(upsample_output.data.cpu().numpy(), c, s, w, h, input_size=input_size)
143 |             parsing_result = np.argmax(logits_result, axis=2)
144 |             parsing_result_path = os.path.join(args.output_dir, img_name[:-4] + '.png')
145 |             output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8))
146 |             output_img.putpalette(palette)
147 |             output_img.save(parsing_result_path)
148 |             if args.logits:
149 |                 logits_result_path = os.path.join(args.output_dir, img_name[:-4] + '.npy')
150 |                 np.save(logits_result_path, logits_result)
151 |     return
152 | 
153 | 
154 | if __name__ == '__main__':
155 |     main()
156 | 


--------------------------------------------------------------------------------
/schp/README.md:
--------------------------------------------------------------------------------
  1 | # Self Correction for Human Parsing
  2 | 
  3 | ![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg)
  4 | [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
  5 | 
  6 | An out-of-box human parsing representation extractor.
  7 | 
  8 | Our solution ranks 1st for all human parsing tracks (including single, multiple and video) in the third LIP challenge!
  9 | 
 10 | ![lip-visualization](./demo/lip-visualization.jpg) 
 11 | 
 12 | Features:
 13 | - [x] Out-of-box human parsing extractor for other downstream applications.
 14 | - [x] Pretrained model on three popular single person human parsing datasets.
 15 | - [x] Training and inferecne code.
 16 | - [x] Simple yet effective extension on multi-person and video human parsing tasks.
 17 | 
 18 | ## Requirements
 19 | 
 20 | ```
 21 | conda env create -f environment.yaml
 22 | conda activate schp
 23 | pip install -r requirements.txt
 24 | ```
 25 | 
 26 | ## Simple Out-of-Box Extractor
 27 | 
 28 | The easiest way to get started is to use our trained SCHP models on your own images to extract human parsing representations. Here we provided state-of-the-art [trained models](https://drive.google.com/drive/folders/1uOaQCpNtosIjEL2phQKEdiYd0Td18jNo?usp=sharing) on three popular datasets. Theses three datasets have different label system, you can choose the best one to fit on your own task.
 29 | 
 30 | **LIP** ([exp-schp-201908261155-lip.pth](https://drive.google.com/file/d/1k4dllHpu0bdx38J7H28rVVLpU-kOHmnH/view?usp=sharing))
 31 | 
 32 | * mIoU on LIP validation: **59.36 %**.
 33 | 
 34 | * LIP is the largest single person human parsing dataset with 50000+ images. This dataset focus more on the complicated real scenarios. LIP has 20 labels, including 'Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe'.
 35 | 
 36 | **ATR** ([exp-schp-201908301523-atr.pth](https://drive.google.com/file/d/1ruJg4lqR_jgQPj-9K0PP-L2vJERYOxLP/view?usp=sharing))
 37 | 
 38 | * mIoU on ATR test: **82.29%**.
 39 | 
 40 | * ATR is a large single person human parsing dataset with 17000+ images. This dataset focus more on fashion AI. ATR has 18 labels, including 'Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt', 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf'.
 41 | 
 42 | **Pascal-Person-Part** ([exp-schp-201908270938-pascal-person-part.pth](https://drive.google.com/file/d/1E5YwNKW2VOEayK9mWCS3Kpsxf-3z04ZE/view?usp=sharing))
 43 | 
 44 | * mIoU on Pascal-Person-Part validation: **71.46** %.
 45 | 
 46 | * Pascal Person Part is a tiny single person human parsing dataset with 3000+ images. This dataset focus more on body parts segmentation. Pascal Person Part has 7 labels, including 'Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'.
 47 | 
 48 | Choose one and have fun on your own task!
 49 | 
 50 | To extract the human parsing representation, simply put your own image in the `INPUT_PATH` folder, then download a pretrained model and run the following command. The output images with the same file name will be saved in `OUTPUT_PATH`
 51 | 
 52 | ```
 53 | python simple_extractor.py --dataset [DATASET] --model-restore [CHECKPOINT_PATH] --input-dir [INPUT_PATH] --output-dir [OUTPUT_PATH]
 54 | ```
 55 | 
 56 | **[Updated]** Here is also a [colab demo example](https://colab.research.google.com/drive/1JOwOPaChoc9GzyBi5FUEYTSaP2qxJl10?usp=sharing) for quick inference provided by [@levindabhi](https://github.com/levindabhi).
 57 | 
 58 | The `DATASET` command has three options, including 'lip', 'atr' and 'pascal'. Note each pixel in the output images denotes the predicted label number. The output images have the same size as the input ones. To better visualization, we put a palette with the output images. We suggest you to read the image with `PIL`.
 59 | 
 60 | If you need not only the final parsing images, but also the feature map representations. Add `--logits` command to save the output feature maps. These feature maps are the logits before softmax layer.
 61 | 
 62 | ## Dataset Preparation
 63 | 
 64 | Please download the [LIP](http://sysu-hcp.net/lip/) dataset following the below structure.
 65 | 
 66 | ```commandline
 67 | data/LIP
 68 | |--- train_imgaes # 30462 training single person images
 69 | |--- val_images # 10000 validation single person images
 70 | |--- train_segmentations # 30462 training annotations
 71 | |--- val_segmentations # 10000 training annotations
 72 | |--- train_id.txt # training image list
 73 | |--- val_id.txt # validation image list
 74 | ```
 75 | 
 76 | ## Training
 77 | 
 78 | ```
 79 | python train.py 
 80 | ```
 81 | By default, the trained model will be saved in `./log` directory. Please read the arguments for more details.
 82 | 
 83 | ## Evaluation
 84 | ```
 85 | python evaluate.py --model-restore [CHECKPOINT_PATH]
 86 | ```
 87 | CHECKPOINT_PATH should be the path of trained model.
 88 | 
 89 | ## Extension on Multiple Human Parsing
 90 | 
 91 | Please read [MultipleHumanParsing.md](./mhp_extension/README.md) for more details.
 92 | 
 93 | ## Citation
 94 | 
 95 | Please cite our work if you find this repo useful in your research.
 96 | 
 97 | ```latex
 98 | @article{li2020self,
 99 |   title={Self-Correction for Human Parsing}, 
100 |   author={Li, Peike and Xu, Yunqiu and Wei, Yunchao and Yang, Yi},
101 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 
102 |   year={2020},
103 |   doi={10.1109/TPAMI.2020.3048039}}
104 | ```
105 | 
106 | ## Visualization
107 | 
108 | * Source Image.
109 | ![demo](./demo/demo.jpg)
110 | * LIP Parsing Result.
111 | ![demo-lip](./demo/demo_lip.png)
112 | * ATR Parsing Result.
113 | ![demo-atr](./demo/demo_atr.png)
114 | * Pascal-Person-Part Parsing Result.
115 | ![demo-pascal](./demo/demo_pascal.png)
116 | * Source Image.
117 | ![demo](./mhp_extension/demo/demo.jpg)
118 | * Instance Human Mask.
119 | ![demo-lip](./mhp_extension/demo/demo_instance_human_mask.png)
120 | * Global Human Parsing Result.
121 | ![demo-lip](./mhp_extension/demo/demo_global_human_parsing.png)
122 | * Multiple Human Parsing Result.
123 | ![demo-lip](./mhp_extension/demo/demo_multiple_human_parsing.png)
124 | 
125 | 
126 | ## Related
127 | Our code adopts the [InplaceSyncBN](https://github.com/mapillary/inplace_abn) to save gpu memory cost.
128 | 
129 | There is also a [PaddlePaddle](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/ACE2P) Implementation of this project.
130 | 


--------------------------------------------------------------------------------
/schp/networks/backbone/resnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   resnet.py
  8 | @Time    :   8/4/19 3:35 PM
  9 | @Desc    :   
 10 | @License :   This source code is licensed under the license found in the 
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import functools
 15 | import torch.nn as nn
 16 | import math
 17 | from torch.utils.model_zoo import load_url
 18 | 
 19 | from modules import InPlaceABNSync
 20 | 
 21 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
 22 | 
 23 | __all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101']  # resnet101 is coming soon!
 24 | 
 25 | model_urls = {
 26 |     'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth',
 27 |     'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
 28 |     'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
 29 | }
 30 | 
 31 | 
 32 | def conv3x3(in_planes, out_planes, stride=1):
 33 |     "3x3 convolution with padding"
 34 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 35 |                      padding=1, bias=False)
 36 | 
 37 | 
 38 | class BasicBlock(nn.Module):
 39 |     expansion = 1
 40 | 
 41 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 42 |         super(BasicBlock, self).__init__()
 43 |         self.conv1 = conv3x3(inplanes, planes, stride)
 44 |         self.bn1 = BatchNorm2d(planes)
 45 |         self.relu = nn.ReLU(inplace=True)
 46 |         self.conv2 = conv3x3(planes, planes)
 47 |         self.bn2 = BatchNorm2d(planes)
 48 |         self.downsample = downsample
 49 |         self.stride = stride
 50 | 
 51 |     def forward(self, x):
 52 |         residual = x
 53 | 
 54 |         out = self.conv1(x)
 55 |         out = self.bn1(out)
 56 |         out = self.relu(out)
 57 | 
 58 |         out = self.conv2(out)
 59 |         out = self.bn2(out)
 60 | 
 61 |         if self.downsample is not None:
 62 |             residual = self.downsample(x)
 63 | 
 64 |         out += residual
 65 |         out = self.relu(out)
 66 | 
 67 |         return out
 68 | 
 69 | 
 70 | class Bottleneck(nn.Module):
 71 |     expansion = 4
 72 | 
 73 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 74 |         super(Bottleneck, self).__init__()
 75 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 76 |         self.bn1 = BatchNorm2d(planes)
 77 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 78 |                                padding=1, bias=False)
 79 |         self.bn2 = BatchNorm2d(planes)
 80 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 81 |         self.bn3 = BatchNorm2d(planes * 4)
 82 |         self.relu = nn.ReLU(inplace=True)
 83 |         self.downsample = downsample
 84 |         self.stride = stride
 85 | 
 86 |     def forward(self, x):
 87 |         residual = x
 88 | 
 89 |         out = self.conv1(x)
 90 |         out = self.bn1(out)
 91 |         out = self.relu(out)
 92 | 
 93 |         out = self.conv2(out)
 94 |         out = self.bn2(out)
 95 |         out = self.relu(out)
 96 | 
 97 |         out = self.conv3(out)
 98 |         out = self.bn3(out)
 99 | 
100 |         if self.downsample is not None:
101 |             residual = self.downsample(x)
102 | 
103 |         out += residual
104 |         out = self.relu(out)
105 | 
106 |         return out
107 | 
108 | 
109 | class ResNet(nn.Module):
110 | 
111 |     def __init__(self, block, layers, num_classes=1000):
112 |         self.inplanes = 128
113 |         super(ResNet, self).__init__()
114 |         self.conv1 = conv3x3(3, 64, stride=2)
115 |         self.bn1 = BatchNorm2d(64)
116 |         self.relu1 = nn.ReLU(inplace=True)
117 |         self.conv2 = conv3x3(64, 64)
118 |         self.bn2 = BatchNorm2d(64)
119 |         self.relu2 = nn.ReLU(inplace=True)
120 |         self.conv3 = conv3x3(64, 128)
121 |         self.bn3 = BatchNorm2d(128)
122 |         self.relu3 = nn.ReLU(inplace=True)
123 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
124 | 
125 |         self.layer1 = self._make_layer(block, 64, layers[0])
126 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
127 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
128 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
129 |         self.avgpool = nn.AvgPool2d(7, stride=1)
130 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
131 | 
132 |         for m in self.modules():
133 |             if isinstance(m, nn.Conv2d):
134 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
135 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
136 |             elif isinstance(m, BatchNorm2d):
137 |                 m.weight.data.fill_(1)
138 |                 m.bias.data.zero_()
139 | 
140 |     def _make_layer(self, block, planes, blocks, stride=1):
141 |         downsample = None
142 |         if stride != 1 or self.inplanes != planes * block.expansion:
143 |             downsample = nn.Sequential(
144 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
145 |                           kernel_size=1, stride=stride, bias=False),
146 |                 BatchNorm2d(planes * block.expansion),
147 |             )
148 | 
149 |         layers = []
150 |         layers.append(block(self.inplanes, planes, stride, downsample))
151 |         self.inplanes = planes * block.expansion
152 |         for i in range(1, blocks):
153 |             layers.append(block(self.inplanes, planes))
154 | 
155 |         return nn.Sequential(*layers)
156 | 
157 |     def forward(self, x):
158 |         x = self.relu1(self.bn1(self.conv1(x)))
159 |         x = self.relu2(self.bn2(self.conv2(x)))
160 |         x = self.relu3(self.bn3(self.conv3(x)))
161 |         x = self.maxpool(x)
162 | 
163 |         x = self.layer1(x)
164 |         x = self.layer2(x)
165 |         x = self.layer3(x)
166 |         x = self.layer4(x)
167 | 
168 |         x = self.avgpool(x)
169 |         x = x.view(x.size(0), -1)
170 |         x = self.fc(x)
171 | 
172 |         return x
173 | 
174 | 
175 | def resnet18(pretrained=False, **kwargs):
176 |     """Constructs a ResNet-18 model.
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
181 |     if pretrained:
182 |         model.load_state_dict(load_url(model_urls['resnet18']))
183 |     return model
184 | 
185 | 
186 | def resnet50(pretrained=False, **kwargs):
187 |     """Constructs a ResNet-50 model.
188 |     Args:
189 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
190 |     """
191 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
192 |     if pretrained:
193 |         model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
194 |     return model
195 | 
196 | 
197 | def resnet101(pretrained=False, **kwargs):
198 |     """Constructs a ResNet-101 model.
199 |     Args:
200 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
201 |     """
202 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
203 |     if pretrained:
204 |         model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
205 |     return model
206 | 


--------------------------------------------------------------------------------
/schp/datasets/datasets.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   datasets.py
  8 | @Time    :   8/4/19 3:35 PM
  9 | @Desc    :
 10 | @License :   This source code is licensed under the license found in the
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import os
 15 | import numpy as np
 16 | import random
 17 | import torch
 18 | import cv2
 19 | from torch.utils import data
 20 | from utils.transforms import get_affine_transform
 21 | 
 22 | 
 23 | class LIPDataSet(data.Dataset):
 24 |     def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25,
 25 |                  rotation_factor=30, ignore_label=255, transform=None):
 26 |         self.root = root
 27 |         self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
 28 |         self.crop_size = np.asarray(crop_size)
 29 |         self.ignore_label = ignore_label
 30 |         self.scale_factor = scale_factor
 31 |         self.rotation_factor = rotation_factor
 32 |         self.flip_prob = 0.5
 33 |         self.transform = transform
 34 |         self.dataset = dataset
 35 | 
 36 |         list_path = os.path.join(self.root, self.dataset + '_id.txt')
 37 |         train_list = [i_id.strip() for i_id in open(list_path)]
 38 | 
 39 |         self.train_list = train_list
 40 |         self.number_samples = len(self.train_list)
 41 | 
 42 |     def __len__(self):
 43 |         return self.number_samples
 44 | 
 45 |     def _box2cs(self, box):
 46 |         x, y, w, h = box[:4]
 47 |         return self._xywh2cs(x, y, w, h)
 48 | 
 49 |     def _xywh2cs(self, x, y, w, h):
 50 |         center = np.zeros((2), dtype=np.float32)
 51 |         center[0] = x + w * 0.5
 52 |         center[1] = y + h * 0.5
 53 |         if w > self.aspect_ratio * h:
 54 |             h = w * 1.0 / self.aspect_ratio
 55 |         elif w < self.aspect_ratio * h:
 56 |             w = h * self.aspect_ratio
 57 |         scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
 58 |         return center, scale
 59 | 
 60 |     def __getitem__(self, index):
 61 |         train_item = self.train_list[index]
 62 | 
 63 |         im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
 64 |         parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')
 65 | 
 66 |         im = cv2.imread(im_path, cv2.IMREAD_COLOR)
 67 |         h, w, _ = im.shape
 68 |         parsing_anno = np.zeros((h, w), dtype=np.long)
 69 | 
 70 |         # Get person center and scale
 71 |         person_center, s = self._box2cs([0, 0, w - 1, h - 1])
 72 |         r = 0
 73 | 
 74 |         if self.dataset != 'test':
 75 |             # Get pose annotation
 76 |             parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
 77 |             if self.dataset == 'train' or self.dataset == 'trainval':
 78 |                 sf = self.scale_factor
 79 |                 rf = self.rotation_factor
 80 |                 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
 81 |                 r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
 82 | 
 83 |                 if random.random() <= self.flip_prob:
 84 |                     im = im[:, ::-1, :]
 85 |                     parsing_anno = parsing_anno[:, ::-1]
 86 |                     person_center[0] = im.shape[1] - person_center[0] - 1
 87 |                     right_idx = [15, 17, 19]
 88 |                     left_idx = [14, 16, 18]
 89 |                     for i in range(0, 3):
 90 |                         right_pos = np.where(parsing_anno == right_idx[i])
 91 |                         left_pos = np.where(parsing_anno == left_idx[i])
 92 |                         parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
 93 |                         parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
 94 | 
 95 |         trans = get_affine_transform(person_center, s, r, self.crop_size)
 96 |         input = cv2.warpAffine(
 97 |             im,
 98 |             trans,
 99 |             (int(self.crop_size[1]), int(self.crop_size[0])),
100 |             flags=cv2.INTER_LINEAR,
101 |             borderMode=cv2.BORDER_CONSTANT,
102 |             borderValue=(0, 0, 0))
103 | 
104 |         if self.transform:
105 |             input = self.transform(input)
106 | 
107 |         meta = {
108 |             'name': train_item,
109 |             'center': person_center,
110 |             'height': h,
111 |             'width': w,
112 |             'scale': s,
113 |             'rotation': r
114 |         }
115 | 
116 |         if self.dataset == 'val' or self.dataset == 'test':
117 |             return input, meta
118 |         else:
119 |             label_parsing = cv2.warpAffine(
120 |                 parsing_anno,
121 |                 trans,
122 |                 (int(self.crop_size[1]), int(self.crop_size[0])),
123 |                 flags=cv2.INTER_NEAREST,
124 |                 borderMode=cv2.BORDER_CONSTANT,
125 |                 borderValue=(255))
126 | 
127 |             label_parsing = torch.from_numpy(label_parsing)
128 | 
129 |             return input, label_parsing, meta
130 | 
131 | 
132 | class LIPDataValSet(data.Dataset):
133 |     def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False):
134 |         self.root = root
135 |         self.crop_size = crop_size
136 |         self.transform = transform
137 |         self.flip = flip
138 |         self.dataset = dataset
139 |         self.root = root
140 |         self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
141 |         self.crop_size = np.asarray(crop_size)
142 | 
143 |         list_path = os.path.join(self.root, self.dataset + '_id.txt')
144 |         val_list = [i_id.strip() for i_id in open(list_path)]
145 | 
146 |         self.val_list = val_list
147 |         self.number_samples = len(self.val_list)
148 | 
149 |     def __len__(self):
150 |         return len(self.val_list)
151 | 
152 |     def _box2cs(self, box):
153 |         x, y, w, h = box[:4]
154 |         return self._xywh2cs(x, y, w, h)
155 | 
156 |     def _xywh2cs(self, x, y, w, h):
157 |         center = np.zeros((2), dtype=np.float32)
158 |         center[0] = x + w * 0.5
159 |         center[1] = y + h * 0.5
160 |         if w > self.aspect_ratio * h:
161 |             h = w * 1.0 / self.aspect_ratio
162 |         elif w < self.aspect_ratio * h:
163 |             w = h * self.aspect_ratio
164 |         scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
165 | 
166 |         return center, scale
167 | 
168 |     def __getitem__(self, index):
169 |         val_item = self.val_list[index]
170 |         # Load training image
171 |         im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
172 |         im = cv2.imread(im_path, cv2.IMREAD_COLOR)
173 |         h, w, _ = im.shape
174 |         # Get person center and scale
175 |         person_center, s = self._box2cs([0, 0, w - 1, h - 1])
176 |         r = 0
177 |         trans = get_affine_transform(person_center, s, r, self.crop_size)
178 |         input = cv2.warpAffine(
179 |             im,
180 |             trans,
181 |             (int(self.crop_size[1]), int(self.crop_size[0])),
182 |             flags=cv2.INTER_LINEAR,
183 |             borderMode=cv2.BORDER_CONSTANT,
184 |             borderValue=(0, 0, 0))
185 |         input = self.transform(input)
186 |         flip_input = input.flip(dims=[-1])
187 |         if self.flip:
188 |             batch_input_im = torch.stack([input, flip_input])
189 |         else:
190 |             batch_input_im = input
191 | 
192 |         meta = {
193 |             'name': val_item,
194 |             'center': person_center,
195 |             'height': h,
196 |             'width': w,
197 |             'scale': s,
198 |             'rotation': r
199 |         }
200 | 
201 |         return batch_input_im, meta
202 | 


--------------------------------------------------------------------------------
/schp/utils/encoding.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## ECE Department, Rutgers University
  4 | ## Email: zhang.hang@rutgers.edu
  5 | ## Copyright (c) 2017
  6 | ##
  7 | ## This source code is licensed under the MIT-style license found in the
  8 | ## LICENSE file in the root directory of this source tree
  9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 | 
 11 | """Encoding Data Parallel"""
 12 | import threading
 13 | import functools
 14 | import torch
 15 | from torch.autograd import Variable, Function
 16 | import torch.cuda.comm as comm
 17 | from torch.nn.parallel.data_parallel import DataParallel
 18 | from torch.nn.parallel.parallel_apply import get_a_var
 19 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
 20 | 
 21 | torch_ver = torch.__version__[:3]
 22 | 
 23 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback']
 24 | 
 25 | def allreduce(*inputs):
 26 |     """Cross GPU all reduce autograd operation for calculate mean and
 27 |     variance in SyncBN.
 28 |     """
 29 |     return AllReduce.apply(*inputs)
 30 | 
 31 | class AllReduce(Function):
 32 |     @staticmethod
 33 |     def forward(ctx, num_inputs, *inputs):
 34 |         ctx.num_inputs = num_inputs
 35 |         ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
 36 |         inputs = [inputs[i:i + num_inputs]
 37 |                  for i in range(0, len(inputs), num_inputs)]
 38 |         # sort before reduce sum
 39 |         inputs = sorted(inputs, key=lambda i: i[0].get_device())
 40 |         results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
 41 |         outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
 42 |         return tuple([t for tensors in outputs for t in tensors])
 43 | 
 44 |     @staticmethod
 45 |     def backward(ctx, *inputs):
 46 |         inputs = [i.data for i in inputs]
 47 |         inputs = [inputs[i:i + ctx.num_inputs]
 48 |                  for i in range(0, len(inputs), ctx.num_inputs)]
 49 |         results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
 50 |         outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
 51 |         return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
 52 | 
 53 | class Reduce(Function):
 54 |     @staticmethod
 55 |     def forward(ctx, *inputs):
 56 |         ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
 57 |         inputs = sorted(inputs, key=lambda i: i.get_device())
 58 |         return comm.reduce_add(inputs)
 59 | 
 60 |     @staticmethod
 61 |     def backward(ctx, gradOutput):
 62 |         return Broadcast.apply(ctx.target_gpus, gradOutput)
 63 | 
 64 | 
 65 | class DataParallelModel(DataParallel):
 66 |     """Implements data parallelism at the module level.
 67 | 
 68 |     This container parallelizes the application of the given module by
 69 |     splitting the input across the specified devices by chunking in the
 70 |     batch dimension.
 71 |     In the forward pass, the module is replicated on each device,
 72 |     and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
 73 |     Note that the outputs are not gathered, please use compatible
 74 |     :class:`encoding.parallel.DataParallelCriterion`.
 75 | 
 76 |     The batch size should be larger than the number of GPUs used. It should
 77 |     also be an integer multiple of the number of GPUs so that each chunk is
 78 |     the same size (so that each GPU processes the same number of samples).
 79 | 
 80 |     Args:
 81 |         module: module to be parallelized
 82 |         device_ids: CUDA devices (default: all devices)
 83 | 
 84 |     Reference:
 85 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
 86 |         Amit Agrawal. “Context Encoding for Semantic Segmentation.
 87 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
 88 | 
 89 |     Example::
 90 | 
 91 |         >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
 92 |         >>> y = net(x)
 93 |     """
 94 |     def gather(self, outputs, output_device):
 95 |         return outputs
 96 | 
 97 |     def replicate(self, module, device_ids):
 98 |         modules = super(DataParallelModel, self).replicate(module, device_ids)
 99 |         return modules
100 | 
101 | 
102 | class DataParallelCriterion(DataParallel):
103 |     """
104 |     Calculate loss in multiple-GPUs, which balance the memory usage for
105 |     Semantic Segmentation.
106 | 
107 |     The targets are splitted across the specified devices by chunking in
108 |     the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
109 | 
110 |     Reference:
111 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
112 |         Amit Agrawal. “Context Encoding for Semantic Segmentation.
113 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
114 | 
115 |     Example::
116 | 
117 |         >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
118 |         >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
119 |         >>> y = net(x)
120 |         >>> loss = criterion(y, target)
121 |     """
122 |     def forward(self, inputs, *targets, **kwargs):
123 |         # input should be already scatterd
124 |         # scattering the targets instead
125 |         if not self.device_ids:
126 |             return self.module(inputs, *targets, **kwargs)
127 |         targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
128 |         if len(self.device_ids) == 1:
129 |             return self.module(inputs, *targets[0], **kwargs[0])
130 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
131 |         outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
132 |         return Reduce.apply(*outputs) / len(outputs)
133 | 
134 | 
135 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
136 |     assert len(modules) == len(inputs)
137 |     assert len(targets) == len(inputs)
138 |     if kwargs_tup:
139 |         assert len(modules) == len(kwargs_tup)
140 |     else:
141 |         kwargs_tup = ({},) * len(modules)
142 |     if devices is not None:
143 |         assert len(modules) == len(devices)
144 |     else:
145 |         devices = [None] * len(modules)
146 | 
147 |     lock = threading.Lock()
148 |     results = {}
149 |     if torch_ver != "0.3":
150 |         grad_enabled = torch.is_grad_enabled()
151 | 
152 |     def _worker(i, module, input, target, kwargs, device=None):
153 |         if torch_ver != "0.3":
154 |             torch.set_grad_enabled(grad_enabled)
155 |         if device is None:
156 |             device = get_a_var(input).get_device()
157 |         try:
158 |             if not isinstance(input, tuple):
159 |                 input = (input,)
160 |             with torch.cuda.device(device):
161 |                 output = module(*(input + target), **kwargs)
162 |             with lock:
163 |                 results[i] = output
164 |         except Exception as e:
165 |             with lock:
166 |                 results[i] = e
167 | 
168 |     if len(modules) > 1:
169 |         threads = [threading.Thread(target=_worker,
170 |                                     args=(i, module, input, target,
171 |                                           kwargs, device),)
172 |                    for i, (module, input, target, kwargs, device) in
173 |                    enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
174 | 
175 |         for thread in threads:
176 |             thread.start()
177 |         for thread in threads:
178 |             thread.join()
179 |     else:
180 |         _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
181 | 
182 |     outputs = []
183 |     for i in range(len(inputs)):
184 |         output = results[i]
185 |         if isinstance(output, Exception):
186 |             raise output
187 |         outputs.append(output)
188 |     return outputs
189 | 


--------------------------------------------------------------------------------
/schp/modules/residual.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | from .bn import ABN, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
  6 | import torch.nn.functional as functional
  7 | 
  8 | 
  9 | class ResidualBlock(nn.Module):
 10 |     """Configurable residual block
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     in_channels : int
 15 |         Number of input channels.
 16 |     channels : list of int
 17 |         Number of channels in the internal feature maps. Can either have two or three elements: if three construct
 18 |         a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
 19 |         `3 x 3` then `1 x 1` convolutions.
 20 |     stride : int
 21 |         Stride of the first `3 x 3` convolution
 22 |     dilation : int
 23 |         Dilation to apply to the `3 x 3` convolutions.
 24 |     groups : int
 25 |         Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
 26 |         bottleneck blocks.
 27 |     norm_act : callable
 28 |         Function to create normalization / activation Module.
 29 |     dropout: callable
 30 |         Function to create Dropout Module.
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  in_channels,
 35 |                  channels,
 36 |                  stride=1,
 37 |                  dilation=1,
 38 |                  groups=1,
 39 |                  norm_act=ABN,
 40 |                  dropout=None):
 41 |         super(ResidualBlock, self).__init__()
 42 | 
 43 |         # Check parameters for inconsistencies
 44 |         if len(channels) != 2 and len(channels) != 3:
 45 |             raise ValueError("channels must contain either two or three values")
 46 |         if len(channels) == 2 and groups != 1:
 47 |             raise ValueError("groups > 1 are only valid if len(channels) == 3")
 48 | 
 49 |         is_bottleneck = len(channels) == 3
 50 |         need_proj_conv = stride != 1 or in_channels != channels[-1]
 51 | 
 52 |         if not is_bottleneck:
 53 |             bn2 = norm_act(channels[1])
 54 |             bn2.activation = ACT_NONE
 55 |             layers = [
 56 |                 ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
 57 |                                     dilation=dilation)),
 58 |                 ("bn1", norm_act(channels[0])),
 59 |                 ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
 60 |                                     dilation=dilation)),
 61 |                 ("bn2", bn2)
 62 |             ]
 63 |             if dropout is not None:
 64 |                 layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
 65 |         else:
 66 |             bn3 = norm_act(channels[2])
 67 |             bn3.activation = ACT_NONE
 68 |             layers = [
 69 |                 ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=1, padding=0, bias=False)),
 70 |                 ("bn1", norm_act(channels[0])),
 71 |                 ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=stride, padding=dilation, bias=False,
 72 |                                     groups=groups, dilation=dilation)),
 73 |                 ("bn2", norm_act(channels[1])),
 74 |                 ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)),
 75 |                 ("bn3", bn3)
 76 |             ]
 77 |             if dropout is not None:
 78 |                 layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
 79 |         self.convs = nn.Sequential(OrderedDict(layers))
 80 | 
 81 |         if need_proj_conv:
 82 |             self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
 83 |             self.proj_bn = norm_act(channels[-1])
 84 |             self.proj_bn.activation = ACT_NONE
 85 | 
 86 |     def forward(self, x):
 87 |         if hasattr(self, "proj_conv"):
 88 |             residual = self.proj_conv(x)
 89 |             residual = self.proj_bn(residual)
 90 |         else:
 91 |             residual = x
 92 |         x = self.convs(x) + residual
 93 | 
 94 |         if self.convs.bn1.activation == ACT_LEAKY_RELU:
 95 |             return functional.leaky_relu(x, negative_slope=self.convs.bn1.slope, inplace=True)
 96 |         elif self.convs.bn1.activation == ACT_ELU:
 97 |             return functional.elu(x, inplace=True)
 98 |         else:
 99 |             return x
100 | 
101 | 
102 | class IdentityResidualBlock(nn.Module):
103 |     def __init__(self,
104 |                  in_channels,
105 |                  channels,
106 |                  stride=1,
107 |                  dilation=1,
108 |                  groups=1,
109 |                  norm_act=ABN,
110 |                  dropout=None):
111 |         """Configurable identity-mapping residual block
112 | 
113 |         Parameters
114 |         ----------
115 |         in_channels : int
116 |             Number of input channels.
117 |         channels : list of int
118 |             Number of channels in the internal feature maps. Can either have two or three elements: if three construct
119 |             a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
120 |             `3 x 3` then `1 x 1` convolutions.
121 |         stride : int
122 |             Stride of the first `3 x 3` convolution
123 |         dilation : int
124 |             Dilation to apply to the `3 x 3` convolutions.
125 |         groups : int
126 |             Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
127 |             bottleneck blocks.
128 |         norm_act : callable
129 |             Function to create normalization / activation Module.
130 |         dropout: callable
131 |             Function to create Dropout Module.
132 |         """
133 |         super(IdentityResidualBlock, self).__init__()
134 | 
135 |         # Check parameters for inconsistencies
136 |         if len(channels) != 2 and len(channels) != 3:
137 |             raise ValueError("channels must contain either two or three values")
138 |         if len(channels) == 2 and groups != 1:
139 |             raise ValueError("groups > 1 are only valid if len(channels) == 3")
140 | 
141 |         is_bottleneck = len(channels) == 3
142 |         need_proj_conv = stride != 1 or in_channels != channels[-1]
143 | 
144 |         self.bn1 = norm_act(in_channels)
145 |         if not is_bottleneck:
146 |             layers = [
147 |                 ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
148 |                                     dilation=dilation)),
149 |                 ("bn2", norm_act(channels[0])),
150 |                 ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
151 |                                     dilation=dilation))
152 |             ]
153 |             if dropout is not None:
154 |                 layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
155 |         else:
156 |             layers = [
157 |                 ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
158 |                 ("bn2", norm_act(channels[0])),
159 |                 ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
160 |                                     groups=groups, dilation=dilation)),
161 |                 ("bn3", norm_act(channels[1])),
162 |                 ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
163 |             ]
164 |             if dropout is not None:
165 |                 layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
166 |         self.convs = nn.Sequential(OrderedDict(layers))
167 | 
168 |         if need_proj_conv:
169 |             self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
170 | 
171 |     def forward(self, x):
172 |         if hasattr(self, "proj_conv"):
173 |             bn1 = self.bn1(x)
174 |             shortcut = self.proj_conv(bn1)
175 |         else:
176 |             shortcut = x.clone()
177 |             bn1 = self.bn1(x)
178 | 
179 |         out = self.convs(bn1)
180 |         out.add_(shortcut)
181 | 
182 |         return out
183 | 


--------------------------------------------------------------------------------
/schp/evaluate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   evaluate.py
  8 | @Time    :   8/4/19 3:36 PM
  9 | @Desc    :
 10 | @License :   This source code is licensed under the license found in the
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import os
 15 | import argparse
 16 | import numpy as np
 17 | import torch
 18 | 
 19 | from torch.utils import data
 20 | from tqdm import tqdm
 21 | from PIL import Image as PILImage
 22 | import torchvision.transforms as transforms
 23 | import torch.backends.cudnn as cudnn
 24 | 
 25 | import networks
 26 | from datasets.datasets import LIPDataValSet
 27 | from utils.miou import compute_mean_ioU
 28 | from utils.transforms import BGR2RGB_transform
 29 | from utils.transforms import transform_parsing
 30 | 
 31 | 
 32 | def get_arguments():
 33 |     """Parse all the arguments provided from the CLI.
 34 | 
 35 |     Returns:
 36 |       A list of parsed arguments.
 37 |     """
 38 |     parser = argparse.ArgumentParser(description="Self Correction for Human Parsing")
 39 | 
 40 |     # Network Structure
 41 |     parser.add_argument("--arch", type=str, default='resnet101')
 42 |     # Data Preference
 43 |     parser.add_argument("--data-dir", type=str, default='./data/LIP')
 44 |     parser.add_argument("--batch-size", type=int, default=1)
 45 |     parser.add_argument("--input-size", type=str, default='473,473')
 46 |     parser.add_argument("--num-classes", type=int, default=20)
 47 |     parser.add_argument("--ignore-label", type=int, default=255)
 48 |     parser.add_argument("--random-mirror", action="store_true")
 49 |     parser.add_argument("--random-scale", action="store_true")
 50 |     # Evaluation Preference
 51 |     parser.add_argument("--log-dir", type=str, default='./log')
 52 |     parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar')
 53 |     parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.")
 54 |     parser.add_argument("--save-results", action="store_true", help="whether to save the results.")
 55 |     parser.add_argument("--flip", action="store_true", help="random flip during the test.")
 56 |     parser.add_argument("--multi-scales", type=str, default='1', help="multiple scales during the test")
 57 |     return parser.parse_args()
 58 | 
 59 | 
 60 | def get_palette(num_cls):
 61 |     """ Returns the color map for visualizing the segmentation mask.
 62 |     Args:
 63 |         num_cls: Number of classes
 64 |     Returns:
 65 |         The color map
 66 |     """
 67 |     n = num_cls
 68 |     palette = [0] * (n * 3)
 69 |     for j in range(0, n):
 70 |         lab = j
 71 |         palette[j * 3 + 0] = 0
 72 |         palette[j * 3 + 1] = 0
 73 |         palette[j * 3 + 2] = 0
 74 |         i = 0
 75 |         while lab:
 76 |             palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
 77 |             palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
 78 |             palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
 79 |             i += 1
 80 |             lab >>= 3
 81 |     return palette
 82 | 
 83 | 
 84 | def multi_scale_testing(model, batch_input_im, crop_size=[473, 473], flip=True, multi_scales=[1]):
 85 |     flipped_idx = (15, 14, 17, 16, 19, 18)
 86 |     if len(batch_input_im.shape) > 4:
 87 |         batch_input_im = batch_input_im.squeeze()
 88 |     if len(batch_input_im.shape) == 3:
 89 |         batch_input_im = batch_input_im.unsqueeze(0)
 90 | 
 91 |     interp = torch.nn.Upsample(size=crop_size, mode='bilinear', align_corners=True)
 92 |     ms_outputs = []
 93 |     for s in multi_scales:
 94 |         interp_im = torch.nn.Upsample(scale_factor=s, mode='bilinear', align_corners=True)
 95 |         scaled_im = interp_im(batch_input_im)
 96 |         parsing_output = model(scaled_im)
 97 |         parsing_output = parsing_output[0][-1]
 98 |         output = parsing_output[0]
 99 |         if flip:
100 |             flipped_output = parsing_output[1]
101 |             flipped_output[14:20, :, :] = flipped_output[flipped_idx, :, :]
102 |             output += flipped_output.flip(dims=[-1])
103 |             output *= 0.5
104 |         output = interp(output.unsqueeze(0))
105 |         ms_outputs.append(output[0])
106 |     ms_fused_parsing_output = torch.stack(ms_outputs)
107 |     ms_fused_parsing_output = ms_fused_parsing_output.mean(0)
108 |     ms_fused_parsing_output = ms_fused_parsing_output.permute(1, 2, 0)  # HWC
109 |     parsing = torch.argmax(ms_fused_parsing_output, dim=2)
110 |     parsing = parsing.data.cpu().numpy()
111 |     ms_fused_parsing_output = ms_fused_parsing_output.data.cpu().numpy()
112 |     return parsing, ms_fused_parsing_output
113 | 
114 | 
115 | def main():
116 |     """Create the model and start the evaluation process."""
117 |     args = get_arguments()
118 |     multi_scales = [float(i) for i in args.multi_scales.split(',')]
119 |     gpus = [int(i) for i in args.gpu.split(',')]
120 |     assert len(gpus) == 1
121 |     if not args.gpu == 'None':
122 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
123 | 
124 |     cudnn.benchmark = True
125 |     cudnn.enabled = True
126 | 
127 |     h, w = map(int, args.input_size.split(','))
128 |     input_size = [h, w]
129 | 
130 |     model = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=None)
131 | 
132 |     IMAGE_MEAN = model.mean
133 |     IMAGE_STD = model.std
134 |     INPUT_SPACE = model.input_space
135 |     print('image mean: {}'.format(IMAGE_MEAN))
136 |     print('image std: {}'.format(IMAGE_STD))
137 |     print('input space:{}'.format(INPUT_SPACE))
138 |     if INPUT_SPACE == 'BGR':
139 |         print('BGR Transformation')
140 |         transform = transforms.Compose([
141 |             transforms.ToTensor(),
142 |             transforms.Normalize(mean=IMAGE_MEAN,
143 |                                  std=IMAGE_STD),
144 | 
145 |         ])
146 |     if INPUT_SPACE == 'RGB':
147 |         print('RGB Transformation')
148 |         transform = transforms.Compose([
149 |             transforms.ToTensor(),
150 |             BGR2RGB_transform(),
151 |             transforms.Normalize(mean=IMAGE_MEAN,
152 |                                  std=IMAGE_STD),
153 |         ])
154 | 
155 |     # Data loader
156 |     lip_test_dataset = LIPDataValSet(args.data_dir, 'val', crop_size=input_size, transform=transform, flip=args.flip)
157 |     num_samples = len(lip_test_dataset)
158 |     print('Totoal testing sample numbers: {}'.format(num_samples))
159 |     testloader = data.DataLoader(lip_test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True)
160 | 
161 |     # Load model weight
162 |     state_dict = torch.load(args.model_restore)['state_dict']
163 |     from collections import OrderedDict
164 |     new_state_dict = OrderedDict()
165 |     for k, v in state_dict.items():
166 |         name = k[7:]  # remove `module.`
167 |         new_state_dict[name] = v
168 |     model.load_state_dict(new_state_dict)
169 |     model.cuda()
170 |     model.eval()
171 | 
172 |     sp_results_dir = os.path.join(args.log_dir, 'sp_results')
173 |     if not os.path.exists(sp_results_dir):
174 |         os.makedirs(sp_results_dir)
175 | 
176 |     palette = get_palette(20)
177 |     parsing_preds = []
178 |     scales = np.zeros((num_samples, 2), dtype=np.float32)
179 |     centers = np.zeros((num_samples, 2), dtype=np.int32)
180 |     with torch.no_grad():
181 |         for idx, batch in enumerate(tqdm(testloader)):
182 |             image, meta = batch
183 |             if (len(image.shape) > 4):
184 |                 image = image.squeeze()
185 |             im_name = meta['name'][0]
186 |             c = meta['center'].numpy()[0]
187 |             s = meta['scale'].numpy()[0]
188 |             w = meta['width'].numpy()[0]
189 |             h = meta['height'].numpy()[0]
190 |             scales[idx, :] = s
191 |             centers[idx, :] = c
192 |             parsing, logits = multi_scale_testing(model, image.cuda(), crop_size=input_size, flip=args.flip,
193 |                                                   multi_scales=multi_scales)
194 |             if args.save_results:
195 |                 parsing_result = transform_parsing(parsing, c, s, w, h, input_size)
196 |                 parsing_result_path = os.path.join(sp_results_dir, im_name + '.png')
197 |                 output_im = PILImage.fromarray(np.asarray(parsing_result, dtype=np.uint8))
198 |                 output_im.putpalette(palette)
199 |                 output_im.save(parsing_result_path)
200 | 
201 |             parsing_preds.append(parsing)
202 |     assert len(parsing_preds) == num_samples
203 |     mIoU = compute_mean_ioU(parsing_preds, scales, centers, args.num_classes, args.data_dir, input_size)
204 |     print(mIoU)
205 |     return
206 | 
207 | 
208 | if __name__ == '__main__':
209 |     main()
210 | 


--------------------------------------------------------------------------------
/schp/modules/functions.py:
--------------------------------------------------------------------------------
  1 | from os import path
  2 | import torch
  3 | import torch.distributed as dist
  4 | import torch.autograd as autograd
  5 | import torch.cuda.comm as comm
  6 | from torch.autograd.function import once_differentiable
  7 | from torch.utils.cpp_extension import load
  8 | 
  9 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src")
 10 | 
 11 | if torch.cuda.is_available():
 12 |     _backend = load(name="inplace_abn",
 13 |                     extra_cflags=["-O3"],
 14 |                     sources=[path.join(_src_path, f) for f in [
 15 |                         "inplace_abn.cpp",
 16 |                         "inplace_abn_cpu.cpp",
 17 |                         "inplace_abn_cuda.cu",
 18 |                         "inplace_abn_cuda_half.cu"
 19 |                     ]],
 20 |                     extra_cuda_cflags=["--expt-extended-lambda"])
 21 | else:
 22 |     _backend = load(name="inplace_abn",
 23 |                     extra_cflags=["-O3"],
 24 |                     sources=[path.join(_src_path, f) for f in [
 25 |                         "inplace_abn_cpu_only.cpp",
 26 |                         "inplace_abn_cpu.cpp"
 27 |                     ]])
 28 | 
 29 | # Activation names
 30 | ACT_RELU = "relu"
 31 | ACT_LEAKY_RELU = "leaky_relu"
 32 | ACT_ELU = "elu"
 33 | ACT_NONE = "none"
 34 | 
 35 | 
 36 | def _check(fn, *args, **kwargs):
 37 |     success = fn(*args, **kwargs)
 38 |     if not success:
 39 |         raise RuntimeError("CUDA Error encountered in {}".format(fn))
 40 | 
 41 | 
 42 | def _broadcast_shape(x):
 43 |     out_size = []
 44 |     for i, s in enumerate(x.size()):
 45 |         if i != 1:
 46 |             out_size.append(1)
 47 |         else:
 48 |             out_size.append(s)
 49 |     return out_size
 50 | 
 51 | 
 52 | def _reduce(x):
 53 |     if len(x.size()) == 2:
 54 |         return x.sum(dim=0)
 55 |     else:
 56 |         n, c = x.size()[0:2]
 57 |         return x.contiguous().view((n, c, -1)).sum(2).sum(0)
 58 | 
 59 | 
 60 | def _count_samples(x):
 61 |     count = 1
 62 |     for i, s in enumerate(x.size()):
 63 |         if i != 1:
 64 |             count *= s
 65 |     return count
 66 | 
 67 | 
 68 | def _act_forward(ctx, x):
 69 |     if ctx.activation == ACT_LEAKY_RELU:
 70 |         _backend.leaky_relu_forward(x, ctx.slope)
 71 |     elif ctx.activation == ACT_ELU:
 72 |         _backend.elu_forward(x)
 73 |     elif ctx.activation == ACT_NONE:
 74 |         pass
 75 | 
 76 | 
 77 | def _act_backward(ctx, x, dx):
 78 |     if ctx.activation == ACT_LEAKY_RELU:
 79 |         _backend.leaky_relu_backward(x, dx, ctx.slope)
 80 |     elif ctx.activation == ACT_ELU:
 81 |         _backend.elu_backward(x, dx)
 82 |     elif ctx.activation == ACT_NONE:
 83 |         pass
 84 | 
 85 | 
 86 | class InPlaceABN(autograd.Function):
 87 |     @staticmethod
 88 |     def forward(ctx, x, weight, bias, running_mean, running_var,
 89 |                 training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
 90 |         # Save context
 91 |         ctx.training = training
 92 |         ctx.momentum = momentum
 93 |         ctx.eps = eps
 94 |         ctx.activation = activation
 95 |         ctx.slope = slope
 96 |         ctx.affine = weight is not None and bias is not None
 97 | 
 98 |         # Prepare inputs
 99 |         count = _count_samples(x)
100 |         x = x.contiguous()
101 |         weight = weight.contiguous() if ctx.affine else x.new_empty(0)
102 |         bias = bias.contiguous() if ctx.affine else x.new_empty(0)
103 | 
104 |         if ctx.training:
105 |             mean, var = _backend.mean_var(x)
106 | 
107 |             # Update running stats
108 |             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
109 |             running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
110 | 
111 |             # Mark in-place modified tensors
112 |             ctx.mark_dirty(x, running_mean, running_var)
113 |         else:
114 |             mean, var = running_mean.contiguous(), running_var.contiguous()
115 |             ctx.mark_dirty(x)
116 | 
117 |         # BN forward + activation
118 |         _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
119 |         _act_forward(ctx, x)
120 | 
121 |         # Output
122 |         ctx.var = var
123 |         ctx.save_for_backward(x, var, weight, bias)
124 |         ctx.mark_non_differentiable(running_mean, running_var)
125 |         return x, running_mean, running_var
126 | 
127 |     @staticmethod
128 |     @once_differentiable
129 |     def backward(ctx, dz, _drunning_mean, _drunning_var):
130 |         z, var, weight, bias = ctx.saved_tensors
131 |         dz = dz.contiguous()
132 | 
133 |         # Undo activation
134 |         _act_backward(ctx, z, dz)
135 | 
136 |         if ctx.training:
137 |             edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
138 |         else:
139 |             # TODO: implement simplified CUDA backward for inference mode
140 |             edz = dz.new_zeros(dz.size(1))
141 |             eydz = dz.new_zeros(dz.size(1))
142 | 
143 |         dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
144 |         # dweight = eydz * weight.sign() if ctx.affine else None
145 |         dweight = eydz if ctx.affine else None
146 |         if dweight is not None:
147 |             dweight[weight < 0] *= -1
148 |         dbias = edz if ctx.affine else None
149 | 
150 |         return dx, dweight, dbias, None, None, None, None, None, None, None
151 | 
152 | 
153 | class InPlaceABNSync(autograd.Function):
154 |     @classmethod
155 |     def forward(cls, ctx, x, weight, bias, running_mean, running_var,
156 |                 training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
157 |         # Save context
158 |         ctx.training = training
159 |         ctx.momentum = momentum
160 |         ctx.eps = eps
161 |         ctx.activation = activation
162 |         ctx.slope = slope
163 |         ctx.affine = weight is not None and bias is not None
164 | 
165 |         # Prepare inputs
166 |         ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
167 | 
168 |         # count = _count_samples(x)
169 |         batch_size = x.new_tensor([x.shape[0]], dtype=torch.long)
170 | 
171 |         x = x.contiguous()
172 |         weight = weight.contiguous() if ctx.affine else x.new_empty(0)
173 |         bias = bias.contiguous() if ctx.affine else x.new_empty(0)
174 | 
175 |         if ctx.training:
176 |             mean, var = _backend.mean_var(x)
177 |             if ctx.world_size > 1:
178 |                 # get global batch size
179 |                 if equal_batches:
180 |                     batch_size *= ctx.world_size
181 |                 else:
182 |                     dist.all_reduce(batch_size, dist.ReduceOp.SUM)
183 | 
184 |                 ctx.factor = x.shape[0] / float(batch_size.item())
185 | 
186 |                 mean_all = mean.clone() * ctx.factor
187 |                 dist.all_reduce(mean_all, dist.ReduceOp.SUM)
188 | 
189 |                 var_all = (var + (mean - mean_all) ** 2) * ctx.factor
190 |                 dist.all_reduce(var_all, dist.ReduceOp.SUM)
191 | 
192 |                 mean = mean_all
193 |                 var = var_all
194 | 
195 |             # Update running stats
196 |             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
197 |             count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1]
198 |             running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
199 | 
200 |             # Mark in-place modified tensors
201 |             ctx.mark_dirty(x, running_mean, running_var)
202 |         else:
203 |             mean, var = running_mean.contiguous(), running_var.contiguous()
204 |             ctx.mark_dirty(x)
205 | 
206 |         # BN forward + activation
207 |         _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
208 |         _act_forward(ctx, x)
209 | 
210 |         # Output
211 |         ctx.var = var
212 |         ctx.save_for_backward(x, var, weight, bias)
213 |         ctx.mark_non_differentiable(running_mean, running_var)
214 |         return x, running_mean, running_var
215 | 
216 |     @staticmethod
217 |     @once_differentiable
218 |     def backward(ctx, dz, _drunning_mean, _drunning_var):
219 |         z, var, weight, bias = ctx.saved_tensors
220 |         dz = dz.contiguous()
221 | 
222 |         # Undo activation
223 |         _act_backward(ctx, z, dz)
224 | 
225 |         if ctx.training:
226 |             edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
227 |             edz_local = edz.clone()
228 |             eydz_local = eydz.clone()
229 | 
230 |             if ctx.world_size > 1:
231 |                 edz *= ctx.factor
232 |                 dist.all_reduce(edz, dist.ReduceOp.SUM)
233 | 
234 |                 eydz *= ctx.factor
235 |                 dist.all_reduce(eydz, dist.ReduceOp.SUM)
236 |         else:
237 |             edz_local = edz = dz.new_zeros(dz.size(1))
238 |             eydz_local = eydz = dz.new_zeros(dz.size(1))
239 | 
240 |         dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
241 |         # dweight = eydz_local * weight.sign() if ctx.affine else None
242 |         dweight = eydz_local if ctx.affine else None
243 |         if dweight is not None:
244 |             dweight[weight < 0] *= -1
245 |         dbias = edz_local if ctx.affine else None
246 | 
247 |         return dx, dweight, dbias, None, None, None, None, None, None, None
248 | 
249 | 
250 | inplace_abn = InPlaceABN.apply
251 | inplace_abn_sync = InPlaceABNSync.apply
252 | 
253 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
254 | 


--------------------------------------------------------------------------------
/schp/modules/src/inplace_abn_cuda_half.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <cuda_fp16.h>
  4 | 
  5 | #include <vector>
  6 | 
  7 | #include "utils/checks.h"
  8 | #include "utils/cuda.cuh"
  9 | #include "inplace_abn.h"
 10 | 
 11 | #include <ATen/cuda/CUDAContext.h>
 12 | 
 13 | // Operations for reduce
 14 | struct SumOpH {
 15 |   __device__ SumOpH(const half *t, int c, int s)
 16 |       : tensor(t), chn(c), sp(s) {}
 17 |   __device__ __forceinline__ float operator()(int batch, int plane, int n) {
 18 |     return __half2float(tensor[(batch * chn + plane) * sp + n]);
 19 |   }
 20 |   const half *tensor;
 21 |   const int chn;
 22 |   const int sp;
 23 | };
 24 | 
 25 | struct VarOpH {
 26 |   __device__ VarOpH(float m, const half *t, int c, int s)
 27 |       : mean(m), tensor(t), chn(c), sp(s) {}
 28 |   __device__ __forceinline__ float operator()(int batch, int plane, int n) {
 29 |     const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
 30 |     return (t - mean) * (t - mean);
 31 |   }
 32 |   const float mean;
 33 |   const half *tensor;
 34 |   const int chn;
 35 |   const int sp;
 36 | };
 37 | 
 38 | struct GradOpH {
 39 |   __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
 40 |       : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
 41 |   __device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
 42 |     float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
 43 |     float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
 44 |     return Pair<float>(_dz, _y * _dz);
 45 |   }
 46 |   const float weight;
 47 |   const float bias;
 48 |   const half *z;
 49 |   const half *dz;
 50 |   const int chn;
 51 |   const int sp;
 52 | };
 53 | 
 54 | /***********
 55 |  * mean_var
 56 |  ***********/
 57 | 
 58 | __global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
 59 |   int plane = blockIdx.x;
 60 |   float norm = 1.f / static_cast<float>(num * sp);
 61 | 
 62 |   float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
 63 |   __syncthreads();
 64 |   float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
 65 | 
 66 |   if (threadIdx.x == 0) {
 67 |     mean[plane] = _mean;
 68 |     var[plane] = _var;
 69 |   }
 70 | }
 71 | 
 72 | std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
 73 |   CHECK_CUDA_INPUT(x);
 74 | 
 75 |   // Extract dimensions
 76 |   int64_t num, chn, sp;
 77 |   get_dims(x, num, chn, sp);
 78 | 
 79 |   // Prepare output tensors
 80 |   auto mean = at::empty({chn},x.options().dtype(at::kFloat));
 81 |   auto var = at::empty({chn},x.options().dtype(at::kFloat));
 82 | 
 83 |   // Run kernel
 84 |   dim3 blocks(chn);
 85 |   dim3 threads(getNumThreads(sp));
 86 |   auto stream = at::cuda::getCurrentCUDAStream();
 87 |   mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
 88 |       reinterpret_cast<half*>(x.data<at::Half>()),
 89 |       mean.data<float>(),
 90 |       var.data<float>(),
 91 |       num, chn, sp);
 92 | 
 93 |   return {mean, var};
 94 | }
 95 | 
 96 | /**********
 97 |  * forward
 98 |  **********/
 99 | 
100 | __global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
101 |                                  bool affine, float eps, int num, int chn, int sp) {
102 |   int plane = blockIdx.x;
103 | 
104 |   const float _mean = mean[plane];
105 |   const float _var = var[plane];
106 |   const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
107 |   const float _bias = affine ? bias[plane] : 0.f;
108 | 
109 |   const float mul = rsqrt(_var + eps) * _weight;
110 | 
111 |   for (int batch = 0; batch < num; ++batch) {
112 |     for (int n = threadIdx.x; n < sp; n += blockDim.x) {
113 |       half *x_ptr = x + (batch * chn + plane) * sp + n;
114 |       float _x = __half2float(*x_ptr);
115 |       float _y = (_x - _mean) * mul + _bias;
116 | 
117 |       *x_ptr = __float2half(_y);
118 |     }
119 |   }
120 | }
121 | 
122 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
123 |                         bool affine, float eps) {
124 |   CHECK_CUDA_INPUT(x);
125 |   CHECK_CUDA_INPUT(mean);
126 |   CHECK_CUDA_INPUT(var);
127 |   CHECK_CUDA_INPUT(weight);
128 |   CHECK_CUDA_INPUT(bias);
129 | 
130 |   // Extract dimensions
131 |   int64_t num, chn, sp;
132 |   get_dims(x, num, chn, sp);
133 | 
134 |   // Run kernel
135 |   dim3 blocks(chn);
136 |   dim3 threads(getNumThreads(sp));
137 |   auto stream = at::cuda::getCurrentCUDAStream();
138 |   forward_kernel_h<<<blocks, threads, 0, stream>>>(
139 |       reinterpret_cast<half*>(x.data<at::Half>()),
140 |       mean.data<float>(),
141 |       var.data<float>(),
142 |       weight.data<float>(),
143 |       bias.data<float>(),
144 |       affine, eps, num, chn, sp);
145 | 
146 |   return x;
147 | }
148 | 
149 | __global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
150 |                                 float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
151 |   int plane = blockIdx.x;
152 | 
153 |   float _weight = affine ? abs(weight[plane]) + eps : 1.f;
154 |   float _bias = affine ? bias[plane] : 0.f;
155 | 
156 |   Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
157 |   __syncthreads();
158 | 
159 |   if (threadIdx.x == 0) {
160 |     edz[plane] = res.v1;
161 |     eydz[plane] = res.v2;
162 |   }
163 | }
164 | 
165 | std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
166 |                                       bool affine, float eps) {
167 |   CHECK_CUDA_INPUT(z);
168 |   CHECK_CUDA_INPUT(dz);
169 |   CHECK_CUDA_INPUT(weight);
170 |   CHECK_CUDA_INPUT(bias);
171 | 
172 |   // Extract dimensions
173 |   int64_t num, chn, sp;
174 |   get_dims(z, num, chn, sp);
175 | 
176 |   auto edz = at::empty({chn},z.options().dtype(at::kFloat));
177 |   auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
178 | 
179 |   // Run kernel
180 |   dim3 blocks(chn);
181 |   dim3 threads(getNumThreads(sp));
182 |   auto stream = at::cuda::getCurrentCUDAStream();
183 |   edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
184 |         reinterpret_cast<half*>(z.data<at::Half>()),
185 |         reinterpret_cast<half*>(dz.data<at::Half>()),
186 |         weight.data<float>(),
187 |         bias.data<float>(),
188 |         edz.data<float>(),
189 |         eydz.data<float>(),
190 |         affine, eps, num, chn, sp);
191 |  
192 |   return {edz, eydz};
193 | }
194 | 
195 | __global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
196 |                                   const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
197 |   int plane = blockIdx.x;
198 | 
199 |   float _weight = affine ? abs(weight[plane]) + eps : 1.f;
200 |   float _bias = affine ? bias[plane] : 0.f;
201 |   float _var = var[plane];
202 |   float _edz = edz[plane];
203 |   float _eydz = eydz[plane];
204 | 
205 |   float _mul = _weight * rsqrt(_var + eps);
206 |   float count = float(num * sp);
207 | 
208 |   for (int batch = 0; batch < num; ++batch) {
209 |     for (int n = threadIdx.x; n < sp; n += blockDim.x) {
210 |       float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
211 |       float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
212 | 
213 |       dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
214 |     }
215 |   }
216 | }
217 | 
218 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
219 |                                       at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
220 |   CHECK_CUDA_INPUT(z);
221 |   CHECK_CUDA_INPUT(dz);
222 |   CHECK_CUDA_INPUT(var);
223 |   CHECK_CUDA_INPUT(weight);
224 |   CHECK_CUDA_INPUT(bias);
225 |   CHECK_CUDA_INPUT(edz);
226 |   CHECK_CUDA_INPUT(eydz);
227 | 
228 |   // Extract dimensions
229 |   int64_t num, chn, sp;
230 |   get_dims(z, num, chn, sp);
231 | 
232 |   auto dx = at::zeros_like(z);
233 | 
234 |   // Run kernel
235 |   dim3 blocks(chn);
236 |   dim3 threads(getNumThreads(sp));
237 |   auto stream = at::cuda::getCurrentCUDAStream();
238 |   backward_kernel_h<<<blocks, threads, 0, stream>>>(
239 |         reinterpret_cast<half*>(z.data<at::Half>()),
240 |         reinterpret_cast<half*>(dz.data<at::Half>()),
241 |         var.data<float>(),
242 |         weight.data<float>(),
243 |         bias.data<float>(),
244 |         edz.data<float>(),
245 |         eydz.data<float>(),
246 |         reinterpret_cast<half*>(dx.data<at::Half>()),
247 |         affine, eps, num, chn, sp);
248 | 
249 |   return dx;
250 | }
251 | 
252 | __global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
253 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count;  i += blockDim.x * gridDim.x){
254 |     float _z = __half2float(z[i]);
255 |     if (_z < 0) {
256 |       dz[i] = __float2half(__half2float(dz[i]) * slope);
257 |       z[i] = __float2half(_z / slope);
258 |     }
259 |   }
260 | }
261 | 
262 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
263 |   CHECK_CUDA_INPUT(z);
264 |   CHECK_CUDA_INPUT(dz);
265 | 
266 |   int64_t count = z.numel();
267 |   dim3 threads(getNumThreads(count));
268 |   dim3 blocks = (count + threads.x - 1) / threads.x;
269 |   auto stream = at::cuda::getCurrentCUDAStream();
270 |   leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
271 |       reinterpret_cast<half*>(z.data<at::Half>()),
272 |       reinterpret_cast<half*>(dz.data<at::Half>()),
273 |       slope, count);
274 | }
275 | 
276 | 


--------------------------------------------------------------------------------
/schp/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   train.py
  8 | @Time    :   8/4/19 3:36 PM
  9 | @Desc    :
 10 | @License :   This source code is licensed under the license found in the
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import os
 15 | import json
 16 | import timeit
 17 | import argparse
 18 | 
 19 | import torch
 20 | import torch.optim as optim
 21 | import torchvision.transforms as transforms
 22 | import torch.backends.cudnn as cudnn
 23 | from torch.utils import data
 24 | 
 25 | import networks
 26 | import utils.schp as schp
 27 | from datasets.datasets import LIPDataSet
 28 | from datasets.target_generation import generate_edge_tensor
 29 | from utils.transforms import BGR2RGB_transform
 30 | from utils.criterion import CriterionAll
 31 | from utils.encoding import DataParallelModel, DataParallelCriterion
 32 | from utils.warmup_scheduler import SGDRScheduler
 33 | 
 34 | 
 35 | def get_arguments():
 36 |     """Parse all the arguments provided from the CLI.
 37 |     Returns:
 38 |       A list of parsed arguments.
 39 |     """
 40 |     parser = argparse.ArgumentParser(description="Self Correction for Human Parsing")
 41 | 
 42 |     # Network Structure
 43 |     parser.add_argument("--arch", type=str, default='resnet101')
 44 |     # Data Preference
 45 |     parser.add_argument("--data-dir", type=str, default='./data/LIP')
 46 |     parser.add_argument("--batch-size", type=int, default=16)
 47 |     parser.add_argument("--input-size", type=str, default='473,473')
 48 |     parser.add_argument("--num-classes", type=int, default=20)
 49 |     parser.add_argument("--ignore-label", type=int, default=255)
 50 |     parser.add_argument("--random-mirror", action="store_true")
 51 |     parser.add_argument("--random-scale", action="store_true")
 52 |     # Training Strategy
 53 |     parser.add_argument("--learning-rate", type=float, default=7e-3)
 54 |     parser.add_argument("--momentum", type=float, default=0.9)
 55 |     parser.add_argument("--weight-decay", type=float, default=5e-4)
 56 |     parser.add_argument("--gpu", type=str, default='0,1,2')
 57 |     parser.add_argument("--start-epoch", type=int, default=0)
 58 |     parser.add_argument("--epochs", type=int, default=150)
 59 |     parser.add_argument("--eval-epochs", type=int, default=10)
 60 |     parser.add_argument("--imagenet-pretrain", type=str, default='./pretrain_model/resnet101-imagenet.pth')
 61 |     parser.add_argument("--log-dir", type=str, default='./log')
 62 |     parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar')
 63 |     parser.add_argument("--schp-start", type=int, default=100, help='schp start epoch')
 64 |     parser.add_argument("--cycle-epochs", type=int, default=10, help='schp cyclical epoch')
 65 |     parser.add_argument("--schp-restore", type=str, default='./log/schp_checkpoint.pth.tar')
 66 |     parser.add_argument("--lambda-s", type=float, default=1, help='segmentation loss weight')
 67 |     parser.add_argument("--lambda-e", type=float, default=1, help='edge loss weight')
 68 |     parser.add_argument("--lambda-c", type=float, default=0.1, help='segmentation-edge consistency loss weight')
 69 |     return parser.parse_args()
 70 | 
 71 | 
 72 | def main():
 73 |     args = get_arguments()
 74 |     print(args)
 75 | 
 76 |     start_epoch = 0
 77 |     cycle_n = 0
 78 | 
 79 |     if not os.path.exists(args.log_dir):
 80 |         os.makedirs(args.log_dir)
 81 |     with open(os.path.join(args.log_dir, 'args.json'), 'w') as opt_file:
 82 |         json.dump(vars(args), opt_file)
 83 | 
 84 |     gpus = [int(i) for i in args.gpu.split(',')]
 85 |     if not args.gpu == 'None':
 86 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
 87 | 
 88 |     input_size = list(map(int, args.input_size.split(',')))
 89 | 
 90 |     cudnn.enabled = True
 91 |     cudnn.benchmark = True
 92 | 
 93 |     # Model Initialization
 94 |     AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain)
 95 |     model = DataParallelModel(AugmentCE2P)
 96 |     model.cuda()
 97 | 
 98 |     IMAGE_MEAN = AugmentCE2P.mean
 99 |     IMAGE_STD = AugmentCE2P.std
100 |     INPUT_SPACE = AugmentCE2P.input_space
101 |     print('image mean: {}'.format(IMAGE_MEAN))
102 |     print('image std: {}'.format(IMAGE_STD))
103 |     print('input space:{}'.format(INPUT_SPACE))
104 | 
105 |     restore_from = args.model_restore
106 |     if os.path.exists(restore_from):
107 |         print('Resume training from {}'.format(restore_from))
108 |         checkpoint = torch.load(restore_from)
109 |         model.load_state_dict(checkpoint['state_dict'])
110 |         start_epoch = checkpoint['epoch']
111 | 
112 |     SCHP_AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain)
113 |     schp_model = DataParallelModel(SCHP_AugmentCE2P)
114 |     schp_model.cuda()
115 | 
116 |     if os.path.exists(args.schp_restore):
117 |         print('Resuming schp checkpoint from {}'.format(args.schp_restore))
118 |         schp_checkpoint = torch.load(args.schp_restore)
119 |         schp_model_state_dict = schp_checkpoint['state_dict']
120 |         cycle_n = schp_checkpoint['cycle_n']
121 |         schp_model.load_state_dict(schp_model_state_dict)
122 | 
123 |     # Loss Function
124 |     criterion = CriterionAll(lambda_1=args.lambda_s, lambda_2=args.lambda_e, lambda_3=args.lambda_c,
125 |                              num_classes=args.num_classes)
126 |     criterion = DataParallelCriterion(criterion)
127 |     criterion.cuda()
128 | 
129 |     # Data Loader
130 |     if INPUT_SPACE == 'BGR':
131 |         print('BGR Transformation')
132 |         transform = transforms.Compose([
133 |             transforms.ToTensor(),
134 |             transforms.Normalize(mean=IMAGE_MEAN,
135 |                                  std=IMAGE_STD),
136 |         ])
137 | 
138 |     elif INPUT_SPACE == 'RGB':
139 |         print('RGB Transformation')
140 |         transform = transforms.Compose([
141 |             transforms.ToTensor(),
142 |             BGR2RGB_transform(),
143 |             transforms.Normalize(mean=IMAGE_MEAN,
144 |                                  std=IMAGE_STD),
145 |         ])
146 | 
147 |     train_dataset = LIPDataSet(args.data_dir, 'train', crop_size=input_size, transform=transform)
148 |     train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size * len(gpus),
149 |                                    num_workers=16, shuffle=True, pin_memory=True, drop_last=True)
150 |     print('Total training samples: {}'.format(len(train_dataset)))
151 | 
152 |     # Optimizer Initialization
153 |     optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum,
154 |                           weight_decay=args.weight_decay)
155 | 
156 |     lr_scheduler = SGDRScheduler(optimizer, total_epoch=args.epochs,
157 |                                  eta_min=args.learning_rate / 100, warmup_epoch=10,
158 |                                  start_cyclical=args.schp_start, cyclical_base_lr=args.learning_rate / 2,
159 |                                  cyclical_epoch=args.cycle_epochs)
160 | 
161 |     total_iters = args.epochs * len(train_loader)
162 |     start = timeit.default_timer()
163 |     for epoch in range(start_epoch, args.epochs):
164 |         lr_scheduler.step(epoch=epoch)
165 |         lr = lr_scheduler.get_lr()[0]
166 | 
167 |         model.train()
168 |         for i_iter, batch in enumerate(train_loader):
169 |             i_iter += len(train_loader) * epoch
170 | 
171 |             images, labels, _ = batch
172 |             labels = labels.cuda(non_blocking=True)
173 | 
174 |             edges = generate_edge_tensor(labels)
175 |             labels = labels.type(torch.cuda.LongTensor)
176 |             edges = edges.type(torch.cuda.LongTensor)
177 | 
178 |             preds = model(images)
179 | 
180 |             # Online Self Correction Cycle with Label Refinement
181 |             if cycle_n >= 1:
182 |                 with torch.no_grad():
183 |                     soft_preds = schp_model(images)
184 |                     soft_parsing = []
185 |                     soft_edge = []
186 |                     for soft_pred in soft_preds:
187 |                         soft_parsing.append(soft_pred[0][-1])
188 |                         soft_edge.append(soft_pred[1][-1])
189 |                     soft_preds = torch.cat(soft_parsing, dim=0)
190 |                     soft_edges = torch.cat(soft_edge, dim=0)
191 |             else:
192 |                 soft_preds = None
193 |                 soft_edges = None
194 | 
195 |             loss = criterion(preds, [labels, edges, soft_preds, soft_edges], cycle_n)
196 | 
197 |             optimizer.zero_grad()
198 |             loss.backward()
199 |             optimizer.step()
200 | 
201 |             if i_iter % 100 == 0:
202 |                 print('iter = {} of {} completed, lr = {}, loss = {}'.format(i_iter, total_iters, lr,
203 |                                                                              loss.data.cpu().numpy()))
204 |         if (epoch + 1) % (args.eval_epochs) == 0:
205 |             schp.save_schp_checkpoint({
206 |                 'epoch': epoch + 1,
207 |                 'state_dict': model.state_dict(),
208 |             }, False, args.log_dir, filename='checkpoint_{}.pth.tar'.format(epoch + 1))
209 | 
210 |         # Self Correction Cycle with Model Aggregation
211 |         if (epoch + 1) >= args.schp_start and (epoch + 1 - args.schp_start) % args.cycle_epochs == 0:
212 |             print('Self-correction cycle number {}'.format(cycle_n))
213 |             schp.moving_average(schp_model, model, 1.0 / (cycle_n + 1))
214 |             cycle_n += 1
215 |             schp.bn_re_estimate(train_loader, schp_model)
216 |             schp.save_schp_checkpoint({
217 |                 'state_dict': schp_model.state_dict(),
218 |                 'cycle_n': cycle_n,
219 |             }, False, args.log_dir, filename='schp_{}_checkpoint.pth.tar'.format(cycle_n))
220 | 
221 |         torch.cuda.empty_cache()
222 |         end = timeit.default_timer()
223 |         print('epoch = {} of {} completed using {} s'.format(epoch, args.epochs,
224 |                                                              (end - start) / (epoch - start_epoch + 1)))
225 | 
226 |     end = timeit.default_timer()
227 |     print('Training Finished in {} seconds'.format(end - start))
228 | 
229 | 
230 | if __name__ == '__main__':
231 |     main()
232 | 


--------------------------------------------------------------------------------
/schp/networks/context_encoding/ocnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   ocnet.py
  8 | @Time    :   8/4/19 3:36 PM
  9 | @Desc    :   
 10 | @License :   This source code is licensed under the license found in the 
 11 |              LICENSE file in the root directory of this source tree.
 12 | """
 13 | 
 14 | import functools
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | from torch.autograd import Variable
 19 | from torch.nn import functional as F
 20 | 
 21 | from modules import InPlaceABNSync
 22 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
 23 | 
 24 | 
 25 | class _SelfAttentionBlock(nn.Module):
 26 |     '''
 27 |     The basic implementation for self-attention block/non-local block
 28 |     Input:
 29 |         N X C X H X W
 30 |     Parameters:
 31 |         in_channels       : the dimension of the input feature map
 32 |         key_channels      : the dimension after the key/query transform
 33 |         value_channels    : the dimension after the value transform
 34 |         scale             : choose the scale to downsample the input feature maps (save memory cost)
 35 |     Return:
 36 |         N X C X H X W
 37 |         position-aware context features.(w/o concate or add with the input)
 38 |     '''
 39 | 
 40 |     def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
 41 |         super(_SelfAttentionBlock, self).__init__()
 42 |         self.scale = scale
 43 |         self.in_channels = in_channels
 44 |         self.out_channels = out_channels
 45 |         self.key_channels = key_channels
 46 |         self.value_channels = value_channels
 47 |         if out_channels == None:
 48 |             self.out_channels = in_channels
 49 |         self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
 50 |         self.f_key = nn.Sequential(
 51 |             nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
 52 |                       kernel_size=1, stride=1, padding=0),
 53 |             InPlaceABNSync(self.key_channels),
 54 |         )
 55 |         self.f_query = self.f_key
 56 |         self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
 57 |                                  kernel_size=1, stride=1, padding=0)
 58 |         self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
 59 |                            kernel_size=1, stride=1, padding=0)
 60 |         nn.init.constant(self.W.weight, 0)
 61 |         nn.init.constant(self.W.bias, 0)
 62 | 
 63 |     def forward(self, x):
 64 |         batch_size, h, w = x.size(0), x.size(2), x.size(3)
 65 |         if self.scale > 1:
 66 |             x = self.pool(x)
 67 | 
 68 |         value = self.f_value(x).view(batch_size, self.value_channels, -1)
 69 |         value = value.permute(0, 2, 1)
 70 |         query = self.f_query(x).view(batch_size, self.key_channels, -1)
 71 |         query = query.permute(0, 2, 1)
 72 |         key = self.f_key(x).view(batch_size, self.key_channels, -1)
 73 | 
 74 |         sim_map = torch.matmul(query, key)
 75 |         sim_map = (self.key_channels ** -.5) * sim_map
 76 |         sim_map = F.softmax(sim_map, dim=-1)
 77 | 
 78 |         context = torch.matmul(sim_map, value)
 79 |         context = context.permute(0, 2, 1).contiguous()
 80 |         context = context.view(batch_size, self.value_channels, *x.size()[2:])
 81 |         context = self.W(context)
 82 |         if self.scale > 1:
 83 |             context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True)
 84 |         return context
 85 | 
 86 | 
 87 | class SelfAttentionBlock2D(_SelfAttentionBlock):
 88 |     def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
 89 |         super(SelfAttentionBlock2D, self).__init__(in_channels,
 90 |                                                    key_channels,
 91 |                                                    value_channels,
 92 |                                                    out_channels,
 93 |                                                    scale)
 94 | 
 95 | 
 96 | class BaseOC_Module(nn.Module):
 97 |     """
 98 |     Implementation of the BaseOC module
 99 |     Parameters:
100 |         in_features / out_features: the channels of the input / output feature maps.
101 |         dropout: we choose 0.05 as the default value.
102 |         size: you can apply multiple sizes. Here we only use one size.
103 |     Return:
104 |         features fused with Object context information.
105 |     """
106 | 
107 |     def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
108 |         super(BaseOC_Module, self).__init__()
109 |         self.stages = []
110 |         self.stages = nn.ModuleList(
111 |             [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
112 |         self.conv_bn_dropout = nn.Sequential(
113 |             nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0),
114 |             InPlaceABNSync(out_channels),
115 |             nn.Dropout2d(dropout)
116 |         )
117 | 
118 |     def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
119 |         return SelfAttentionBlock2D(in_channels,
120 |                                     key_channels,
121 |                                     value_channels,
122 |                                     output_channels,
123 |                                     size)
124 | 
125 |     def forward(self, feats):
126 |         priors = [stage(feats) for stage in self.stages]
127 |         context = priors[0]
128 |         for i in range(1, len(priors)):
129 |             context += priors[i]
130 |         output = self.conv_bn_dropout(torch.cat([context, feats], 1))
131 |         return output
132 | 
133 | 
134 | class BaseOC_Context_Module(nn.Module):
135 |     """
136 |     Output only the context features.
137 |     Parameters:
138 |         in_features / out_features: the channels of the input / output feature maps.
139 |         dropout: specify the dropout ratio
140 |         fusion: We provide two different fusion method, "concat" or "add"
141 |         size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
142 |     Return:
143 |         features after "concat" or "add"
144 |     """
145 | 
146 |     def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
147 |         super(BaseOC_Context_Module, self).__init__()
148 |         self.stages = []
149 |         self.stages = nn.ModuleList(
150 |             [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
151 |         self.conv_bn_dropout = nn.Sequential(
152 |             nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0),
153 |             InPlaceABNSync(out_channels),
154 |         )
155 | 
156 |     def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
157 |         return SelfAttentionBlock2D(in_channels,
158 |                                     key_channels,
159 |                                     value_channels,
160 |                                     output_channels,
161 |                                     size)
162 | 
163 |     def forward(self, feats):
164 |         priors = [stage(feats) for stage in self.stages]
165 |         context = priors[0]
166 |         for i in range(1, len(priors)):
167 |             context += priors[i]
168 |         output = self.conv_bn_dropout(context)
169 |         return output
170 | 
171 | 
172 | class ASP_OC_Module(nn.Module):
173 |     def __init__(self, features, out_features=256, dilations=(12, 24, 36)):
174 |         super(ASP_OC_Module, self).__init__()
175 |         self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
176 |                                      InPlaceABNSync(out_features),
177 |                                      BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
178 |                                                            key_channels=out_features // 2, value_channels=out_features,
179 |                                                            dropout=0, sizes=([2])))
180 |         self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
181 |                                    InPlaceABNSync(out_features))
182 |         self.conv3 = nn.Sequential(
183 |             nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
184 |             InPlaceABNSync(out_features))
185 |         self.conv4 = nn.Sequential(
186 |             nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
187 |             InPlaceABNSync(out_features))
188 |         self.conv5 = nn.Sequential(
189 |             nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
190 |             InPlaceABNSync(out_features))
191 | 
192 |         self.conv_bn_dropout = nn.Sequential(
193 |             nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
194 |             InPlaceABNSync(out_features),
195 |             nn.Dropout2d(0.1)
196 |         )
197 | 
198 |     def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
199 |         assert (len(feat1) == len(feat2))
200 |         z = []
201 |         for i in range(len(feat1)):
202 |             z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
203 |         return z
204 | 
205 |     def forward(self, x):
206 |         if isinstance(x, Variable):
207 |             _, _, h, w = x.size()
208 |         elif isinstance(x, tuple) or isinstance(x, list):
209 |             _, _, h, w = x[0].size()
210 |         else:
211 |             raise RuntimeError('unknown input type')
212 | 
213 |         feat1 = self.context(x)
214 |         feat2 = self.conv2(x)
215 |         feat3 = self.conv3(x)
216 |         feat4 = self.conv4(x)
217 |         feat5 = self.conv5(x)
218 | 
219 |         if isinstance(x, Variable):
220 |             out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
221 |         elif isinstance(x, tuple) or isinstance(x, list):
222 |             out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
223 |         else:
224 |             raise RuntimeError('unknown input type')
225 |         output = self.conv_bn_dropout(out)
226 |         return output
227 | 


--------------------------------------------------------------------------------
/schp/utils/lovasz_softmax.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | @Author  :   Peike Li
  6 | @Contact :   peike.li@yahoo.com
  7 | @File    :   lovasz_softmax.py
  8 | @Time    :   8/30/19 7:12 PM
  9 | @Desc    :   Lovasz-Softmax and Jaccard hinge loss in PyTorch
 10 |              Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
 11 | @License :   This source code is licensed under the license found in the
 12 |              LICENSE file in the root directory of this source tree.
 13 | """
 14 | 
 15 | from __future__ import print_function, division
 16 | 
 17 | import torch
 18 | from torch.autograd import Variable
 19 | import torch.nn.functional as F
 20 | import numpy as np
 21 | from torch import nn
 22 | 
 23 | try:
 24 |     from itertools import ifilterfalse
 25 | except ImportError:  # py3k
 26 |     from itertools import filterfalse as ifilterfalse
 27 | 
 28 | 
 29 | def lovasz_grad(gt_sorted):
 30 |     """
 31 |     Computes gradient of the Lovasz extension w.r.t sorted errors
 32 |     See Alg. 1 in paper
 33 |     """
 34 |     p = len(gt_sorted)
 35 |     gts = gt_sorted.sum()
 36 |     intersection = gts - gt_sorted.float().cumsum(0)
 37 |     union = gts + (1 - gt_sorted).float().cumsum(0)
 38 |     jaccard = 1. - intersection / union
 39 |     if p > 1:  # cover 1-pixel case
 40 |         jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
 41 |     return jaccard
 42 | 
 43 | 
 44 | def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
 45 |     """
 46 |     IoU for foreground class
 47 |     binary: 1 foreground, 0 background
 48 |     """
 49 |     if not per_image:
 50 |         preds, labels = (preds,), (labels,)
 51 |     ious = []
 52 |     for pred, label in zip(preds, labels):
 53 |         intersection = ((label == 1) & (pred == 1)).sum()
 54 |         union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
 55 |         if not union:
 56 |             iou = EMPTY
 57 |         else:
 58 |             iou = float(intersection) / float(union)
 59 |         ious.append(iou)
 60 |     iou = mean(ious)  # mean accross images if per_image
 61 |     return 100 * iou
 62 | 
 63 | 
 64 | def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
 65 |     """
 66 |     Array of IoU for each (non ignored) class
 67 |     """
 68 |     if not per_image:
 69 |         preds, labels = (preds,), (labels,)
 70 |     ious = []
 71 |     for pred, label in zip(preds, labels):
 72 |         iou = []
 73 |         for i in range(C):
 74 |             if i != ignore:  # The ignored label is sometimes among predicted classes (ENet - CityScapes)
 75 |                 intersection = ((label == i) & (pred == i)).sum()
 76 |                 union = ((label == i) | ((pred == i) & (label != ignore))).sum()
 77 |                 if not union:
 78 |                     iou.append(EMPTY)
 79 |                 else:
 80 |                     iou.append(float(intersection) / float(union))
 81 |         ious.append(iou)
 82 |     ious = [mean(iou) for iou in zip(*ious)]  # mean accross images if per_image
 83 |     return 100 * np.array(ious)
 84 | 
 85 | 
 86 | # --------------------------- BINARY LOSSES ---------------------------
 87 | 
 88 | 
 89 | def lovasz_hinge(logits, labels, per_image=True, ignore=None):
 90 |     """
 91 |     Binary Lovasz hinge loss
 92 |       logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
 93 |       labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
 94 |       per_image: compute the loss per image instead of per batch
 95 |       ignore: void class id
 96 |     """
 97 |     if per_image:
 98 |         loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
 99 |                     for log, lab in zip(logits, labels))
100 |     else:
101 |         loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
102 |     return loss
103 | 
104 | 
105 | def lovasz_hinge_flat(logits, labels):
106 |     """
107 |     Binary Lovasz hinge loss
108 |       logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
109 |       labels: [P] Tensor, binary ground truth labels (0 or 1)
110 |       ignore: label to ignore
111 |     """
112 |     if len(labels) == 0:
113 |         # only void pixels, the gradients should be 0
114 |         return logits.sum() * 0.
115 |     signs = 2. * labels.float() - 1.
116 |     errors = (1. - logits * Variable(signs))
117 |     errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
118 |     perm = perm.data
119 |     gt_sorted = labels[perm]
120 |     grad = lovasz_grad(gt_sorted)
121 |     loss = torch.dot(F.relu(errors_sorted), Variable(grad))
122 |     return loss
123 | 
124 | 
125 | def flatten_binary_scores(scores, labels, ignore=None):
126 |     """
127 |     Flattens predictions in the batch (binary case)
128 |     Remove labels equal to 'ignore'
129 |     """
130 |     scores = scores.view(-1)
131 |     labels = labels.view(-1)
132 |     if ignore is None:
133 |         return scores, labels
134 |     valid = (labels != ignore)
135 |     vscores = scores[valid]
136 |     vlabels = labels[valid]
137 |     return vscores, vlabels
138 | 
139 | 
140 | class StableBCELoss(torch.nn.modules.Module):
141 |     def __init__(self):
142 |         super(StableBCELoss, self).__init__()
143 | 
144 |     def forward(self, input, target):
145 |         neg_abs = - input.abs()
146 |         loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
147 |         return loss.mean()
148 | 
149 | 
150 | def binary_xloss(logits, labels, ignore=None):
151 |     """
152 |     Binary Cross entropy loss
153 |       logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
154 |       labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
155 |       ignore: void class id
156 |     """
157 |     logits, labels = flatten_binary_scores(logits, labels, ignore)
158 |     loss = StableBCELoss()(logits, Variable(labels.float()))
159 |     return loss
160 | 
161 | 
162 | # --------------------------- MULTICLASS LOSSES ---------------------------
163 | 
164 | 
165 | def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None):
166 |     """
167 |     Multi-class Lovasz-Softmax loss
168 |       probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
169 |               Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
170 |       labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
171 |       classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
172 |       per_image: compute the loss per image instead of per batch
173 |       ignore: void class labels
174 |     """
175 |     if per_image:
176 |         loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted)
177 |                     for prob, lab in zip(probas, labels))
178 |     else:
179 |         loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted )
180 |     return loss
181 | 
182 | 
183 | def lovasz_softmax_flat(probas, labels, classes='present', weighted=None):
184 |     """
185 |     Multi-class Lovasz-Softmax loss
186 |       probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
187 |       labels: [P] Tensor, ground truth labels (between 0 and C - 1)
188 |       classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
189 |     """
190 |     if probas.numel() == 0:
191 |         # only void pixels, the gradients should be 0
192 |         return probas * 0.
193 |     C = probas.size(1)
194 |     losses = []
195 |     class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
196 |     for c in class_to_sum:
197 |         fg = (labels == c).float()  # foreground for class c
198 |         if (classes is 'present' and fg.sum() == 0):
199 |             continue
200 |         if C == 1:
201 |             if len(classes) > 1:
202 |                 raise ValueError('Sigmoid output possible only with 1 class')
203 |             class_pred = probas[:, 0]
204 |         else:
205 |             class_pred = probas[:, c]
206 |         errors = (Variable(fg) - class_pred).abs()
207 |         errors_sorted, perm = torch.sort(errors, 0, descending=True)
208 |         perm = perm.data
209 |         fg_sorted = fg[perm]
210 |         if weighted is not None:
211 |             losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
212 |         else:
213 |             losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
214 |     return mean(losses)
215 | 
216 | 
217 | def flatten_probas(probas, labels, ignore=None):
218 |     """
219 |     Flattens predictions in the batch
220 |     """
221 |     if probas.dim() == 3:
222 |         # assumes output of a sigmoid layer
223 |         B, H, W = probas.size()
224 |         probas = probas.view(B, 1, H, W)
225 |     B, C, H, W = probas.size()
226 |     probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
227 |     labels = labels.view(-1)
228 |     if ignore is None:
229 |         return probas, labels
230 |     valid = (labels != ignore)
231 |     vprobas = probas[valid.nonzero().squeeze()]
232 |     vlabels = labels[valid]
233 |     return vprobas, vlabels
234 | 
235 | 
236 | def xloss(logits, labels, ignore=None):
237 |     """
238 |     Cross entropy loss
239 |     """
240 |     return F.cross_entropy(logits, Variable(labels), ignore_index=255)
241 | 
242 | 
243 | # --------------------------- HELPER FUNCTIONS ---------------------------
244 | def isnan(x):
245 |     return x != x
246 | 
247 | 
248 | def mean(l, ignore_nan=False, empty=0):
249 |     """
250 |     nanmean compatible with generators.
251 |     """
252 |     l = iter(l)
253 |     if ignore_nan:
254 |         l = ifilterfalse(isnan, l)
255 |     try:
256 |         n = 1
257 |         acc = next(l)
258 |     except StopIteration:
259 |         if empty == 'raise':
260 |             raise ValueError('Empty mean')
261 |         return empty
262 |     for n, v in enumerate(l, 2):
263 |         acc += v
264 |     if n == 1:
265 |         return acc
266 |     return acc / n
267 | 
268 | # --------------------------- Class ---------------------------
269 | class LovaszSoftmax(nn.Module):
270 |     def __init__(self, per_image=False, ignore_index=255, weighted=None):
271 |         super(LovaszSoftmax, self).__init__()
272 |         self.lovasz_softmax = lovasz_softmax
273 |         self.per_image = per_image
274 |         self.ignore_index=ignore_index
275 |         self.weighted = weighted
276 | 
277 |     def forward(self, pred, label):
278 |         pred = F.softmax(pred, dim=1)
279 |         return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted)


--------------------------------------------------------------------------------