├── schp ├── utils │ ├── __init__.py │ ├── consistency_loss.py │ ├── kl_loss.py │ ├── schp.py │ ├── warmup_scheduler.py │ ├── soft_dice_loss.py │ ├── transforms.py │ ├── miou.py │ ├── criterion.py │ ├── encoding.py │ └── lovasz_softmax.py ├── datasets │ ├── __init__.py │ ├── target_generation.py │ ├── simple_extractor_dataset.py │ └── datasets.py ├── requirements.txt ├── .gitignore ├── modules │ ├── __init__.py │ ├── src │ │ ├── checks.h │ │ ├── utils │ │ │ ├── checks.h │ │ │ ├── common.h │ │ │ └── cuda.cuh │ │ ├── inplace_abn.h │ │ ├── inplace_abn_cpu.cpp │ │ ├── inplace_abn.cpp │ │ ├── inplace_abn_cpu_only.cpp │ │ └── inplace_abn_cuda_half.cu │ ├── misc.py │ ├── dense.py │ ├── deeplab.py │ ├── bn.py │ ├── residual.py │ └── functions.py ├── networks │ ├── __init__.py │ ├── context_encoding │ │ ├── psp.py │ │ ├── aspp.py │ │ └── ocnet.py │ └── backbone │ │ ├── resnext.py │ │ ├── mobilenetv2.py │ │ └── resnet.py ├── LICENSE ├── environment.yaml ├── simple_extractor.py ├── README.md ├── evaluate.py └── train.py ├── requirements.txt ├── assets ├── demo2.jpg ├── demo3.jpg ├── demo2atr.png ├── demo2lip.png ├── demo3atr.png ├── demo3lip.png ├── atrexample.png ├── demo2pascal.png ├── demo3pascal.png ├── lipexample.png └── pascalexample.png ├── __init__.py ├── pyproject.toml ├── .github └── workflows │ └── publish.yml ├── HumanParserPascalCustomNode.py ├── HumanParserATRCustomNode.py ├── HumanParserLIPCustomNode.py ├── .gitignore ├── utils.py └── README.md /schp/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /schp/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /schp/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python==4.4.0.46 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ninja 2 | torch 3 | opencv-python 4 | numpy 5 | -------------------------------------------------------------------------------- /schp/.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | 3 | data/ 4 | log/ 5 | pretrain_model/ 6 | -------------------------------------------------------------------------------- /assets/demo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2.jpg -------------------------------------------------------------------------------- /assets/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3.jpg -------------------------------------------------------------------------------- /assets/demo2atr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2atr.png -------------------------------------------------------------------------------- /assets/demo2lip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2lip.png -------------------------------------------------------------------------------- /assets/demo3atr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3atr.png -------------------------------------------------------------------------------- /assets/demo3lip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3lip.png -------------------------------------------------------------------------------- /assets/atrexample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/atrexample.png -------------------------------------------------------------------------------- /assets/demo2pascal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo2pascal.png -------------------------------------------------------------------------------- /assets/demo3pascal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/demo3pascal.png -------------------------------------------------------------------------------- /assets/lipexample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/lipexample.png -------------------------------------------------------------------------------- /assets/pascalexample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cozymantis/human-parser-comfyui-node/HEAD/assets/pascalexample.png -------------------------------------------------------------------------------- /schp/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNSync 2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 3 | from .misc import GlobalAvgPool2d, SingleGPU 4 | from .residual import IdentityResidualBlock 5 | from .dense import DenseModule 6 | -------------------------------------------------------------------------------- /schp/networks/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .AugmentCE2P import resnet101 4 | 5 | __factory = { 6 | 'resnet101': resnet101, 7 | } 8 | 9 | 10 | def init_model(name, *args, **kwargs): 11 | if name not in __factory.keys(): 12 | raise KeyError("Unknown model arch: {}".format(name)) 13 | return __factory[name](*args, **kwargs) -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .HumanParserLIPCustomNode import HumanParserLIPCustomNode 2 | from .HumanParserATRCustomNode import HumanParserATRCustomNode 3 | from .HumanParserPascalCustomNode import HumanParserPascalCustomNode 4 | 5 | NODE_CLASS_MAPPINGS = { 6 | "Cozy Human Parser LIP" : HumanParserLIPCustomNode, 7 | "Cozy Human Parser ATR" : HumanParserATRCustomNode, 8 | "Cozy Human Parser Pascal" : HumanParserPascalCustomNode, 9 | } 10 | 11 | __all__ = ['NODE_CLASS_MAPPINGS'] -------------------------------------------------------------------------------- /schp/modules/src/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 6 | #ifndef AT_CHECK 7 | #define AT_CHECK AT_ASSERT 8 | #endif 9 | 10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") 13 | 14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) -------------------------------------------------------------------------------- /schp/modules/src/utils/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 6 | #ifndef AT_CHECK 7 | #define AT_CHECK AT_ASSERT 8 | #endif 9 | 10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") 13 | 14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "human-parser-comfyui-node" 3 | description = "A ComfyUI node to automatically extract masks for body regions and clothing/fashion items. Made with 💚 by the CozyMantis squad." 4 | version = "1.0.0" 5 | license = { file = "LICENSE.md" } 6 | dependencies = ["ninja", "torch", "opencv-python", "numpy"] 7 | 8 | [project.urls] 9 | Repository = "https://github.com/cozymantis/human-parser-comfyui-node" 10 | # Used by Comfy Registry https://comfyregistry.org 11 | 12 | [tool.comfy] 13 | PublisherId = "cozymantis" 14 | DisplayName = "Human Body and Clothes Parser - Segmentation ComfyUI Node" 15 | Icon = "" 16 | -------------------------------------------------------------------------------- /schp/modules/misc.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.distributed as dist 4 | 5 | class GlobalAvgPool2d(nn.Module): 6 | def __init__(self): 7 | """Global average pooling over the input's spatial dimensions""" 8 | super(GlobalAvgPool2d, self).__init__() 9 | 10 | def forward(self, inputs): 11 | in_size = inputs.size() 12 | return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) 13 | 14 | class SingleGPU(nn.Module): 15 | def __init__(self, module): 16 | super(SingleGPU, self).__init__() 17 | self.module=module 18 | 19 | def forward(self, input): 20 | return self.module(input.cuda(non_blocking=True)) 21 | 22 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "pyproject.toml" 9 | 10 | permissions: 11 | issues: write 12 | 13 | jobs: 14 | publish-node: 15 | name: Publish Custom Node to registry 16 | runs-on: ubuntu-latest 17 | if: ${{ github.repository_owner == 'cozymantis' }} 18 | steps: 19 | - name: Check out code 20 | uses: actions/checkout@v4 21 | - name: Publish Custom Node 22 | uses: Comfy-Org/publish-node-action@v1 23 | with: 24 | ## Add your own personal access token to your Github Repository secrets and reference it here. 25 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} -------------------------------------------------------------------------------- /schp/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Peike Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /schp/utils/consistency_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : kl_loss.py 8 | @Time : 7/23/19 4:02 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | import torch 14 | import torch.nn.functional as F 15 | from torch import nn 16 | from datasets.target_generation import generate_edge_tensor 17 | 18 | 19 | class ConsistencyLoss(nn.Module): 20 | def __init__(self, ignore_index=255): 21 | super(ConsistencyLoss, self).__init__() 22 | self.ignore_index=ignore_index 23 | 24 | def forward(self, parsing, edge, label): 25 | parsing_pre = torch.argmax(parsing, dim=1) 26 | parsing_pre[label==self.ignore_index]=self.ignore_index 27 | generated_edge = generate_edge_tensor(parsing_pre) 28 | edge_pre = torch.argmax(edge, dim=1) 29 | v_generate_edge = generated_edge[label!=255] 30 | v_edge_pre = edge_pre[label!=255] 31 | v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor) 32 | positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count 33 | return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0)) 34 | -------------------------------------------------------------------------------- /schp/datasets/target_generation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | 5 | def generate_edge_tensor(label, edge_width=3): 6 | label = label.type(torch.cuda.FloatTensor) 7 | if len(label.shape) == 2: 8 | label = label.unsqueeze(0) 9 | n, h, w = label.shape 10 | edge = torch.zeros(label.shape, dtype=torch.float).cuda() 11 | # right 12 | edge_right = edge[:, 1:h, :] 13 | edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255) 14 | & (label[:, :h - 1, :] != 255)] = 1 15 | 16 | # up 17 | edge_up = edge[:, :, :w - 1] 18 | edge_up[(label[:, :, :w - 1] != label[:, :, 1:w]) 19 | & (label[:, :, :w - 1] != 255) 20 | & (label[:, :, 1:w] != 255)] = 1 21 | 22 | # upright 23 | edge_upright = edge[:, :h - 1, :w - 1] 24 | edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w]) 25 | & (label[:, :h - 1, :w - 1] != 255) 26 | & (label[:, 1:h, 1:w] != 255)] = 1 27 | 28 | # bottomright 29 | edge_bottomright = edge[:, :h - 1, 1:w] 30 | edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1]) 31 | & (label[:, :h - 1, 1:w] != 255) 32 | & (label[:, 1:h, :w - 1] != 255)] = 1 33 | 34 | kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda() 35 | with torch.no_grad(): 36 | edge = edge.unsqueeze(1) 37 | edge = F.conv2d(edge, kernel, stride=1, padding=1) 38 | edge[edge!=0] = 1 39 | edge = edge.squeeze() 40 | return edge 41 | -------------------------------------------------------------------------------- /schp/modules/dense.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .bn import ABN 7 | 8 | 9 | class DenseModule(nn.Module): 10 | def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): 11 | super(DenseModule, self).__init__() 12 | self.in_channels = in_channels 13 | self.growth = growth 14 | self.layers = layers 15 | 16 | self.convs1 = nn.ModuleList() 17 | self.convs3 = nn.ModuleList() 18 | for i in range(self.layers): 19 | self.convs1.append(nn.Sequential(OrderedDict([ 20 | ("bn", norm_act(in_channels)), 21 | ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) 22 | ]))) 23 | self.convs3.append(nn.Sequential(OrderedDict([ 24 | ("bn", norm_act(self.growth * bottleneck_factor)), 25 | ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, 26 | dilation=dilation)) 27 | ]))) 28 | in_channels += self.growth 29 | 30 | @property 31 | def out_channels(self): 32 | return self.in_channels + self.growth * self.layers 33 | 34 | def forward(self, x): 35 | inputs = [x] 36 | for i in range(self.layers): 37 | x = torch.cat(inputs, dim=1) 38 | x = self.convs1[i](x) 39 | x = self.convs3[i](x) 40 | inputs += [x] 41 | 42 | return torch.cat(inputs, dim=1) 43 | -------------------------------------------------------------------------------- /schp/environment.yaml: -------------------------------------------------------------------------------- 1 | name: schp 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - blas=1.0=mkl 8 | - ca-certificates=2020.12.8=h06a4308_0 9 | - certifi=2020.12.5=py38h06a4308_0 10 | - cudatoolkit=10.1.243=h6bb024c_0 11 | - freetype=2.10.4=h5ab3b9f_0 12 | - intel-openmp=2020.2=254 13 | - jpeg=9b=h024ee3a_2 14 | - lcms2=2.11=h396b838_0 15 | - ld_impl_linux-64=2.33.1=h53a641e_7 16 | - libedit=3.1.20191231=h14c3975_1 17 | - libffi=3.3=he6710b0_2 18 | - libgcc-ng=9.1.0=hdf63c60_0 19 | - libpng=1.6.37=hbc83047_0 20 | - libstdcxx-ng=9.1.0=hdf63c60_0 21 | - libtiff=4.1.0=h2733197_1 22 | - lz4-c=1.9.2=heb0550a_3 23 | - mkl=2020.2=256 24 | - mkl-service=2.3.0=py38he904b0f_0 25 | - mkl_fft=1.2.0=py38h23d657b_0 26 | - mkl_random=1.1.1=py38h0573a6f_0 27 | - ncurses=6.2=he6710b0_1 28 | - ninja=1.10.2=py38hff7bd54_0 29 | - numpy=1.19.2=py38h54aff64_0 30 | - numpy-base=1.19.2=py38hfa32c7d_0 31 | - olefile=0.46=py_0 32 | - openssl=1.1.1i=h27cfd23_0 33 | - pillow=8.0.1=py38he98fc37_0 34 | - pip=20.3.3=py38h06a4308_0 35 | - python=3.8.5=h7579374_1 36 | - readline=8.0=h7b6447c_0 37 | - setuptools=51.0.0=py38h06a4308_2 38 | - six=1.15.0=py38h06a4308_0 39 | - sqlite=3.33.0=h62c20be_0 40 | - tk=8.6.10=hbc83047_0 41 | - tqdm=4.55.0=pyhd3eb1b0_0 42 | - wheel=0.36.2=pyhd3eb1b0_0 43 | - xz=5.2.5=h7b6447c_0 44 | - zlib=1.2.11=h7b6447c_3 45 | - zstd=1.4.5=h9ceee32_0 46 | - pytorch=1.5.1=py3.8_cuda10.1.243_cudnn7.6.3_0 47 | - torchvision=0.6.1=py38_cu101 48 | prefix: /home/peike/opt/anaconda3/envs/schp 49 | 50 | -------------------------------------------------------------------------------- /schp/modules/src/utils/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | * Functions to share code between CPU and GPU 7 | */ 8 | 9 | #ifdef __CUDACC__ 10 | // CUDA versions 11 | 12 | #define HOST_DEVICE __host__ __device__ 13 | #define INLINE_HOST_DEVICE __host__ __device__ inline 14 | #define FLOOR(x) floor(x) 15 | 16 | #if __CUDA_ARCH__ >= 600 17 | // Recent compute capabilities have block-level atomicAdd for all data types, so we use that 18 | #define ACCUM(x,y) atomicAdd_block(&(x),(y)) 19 | #else 20 | // Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float 21 | // and use the known atomicCAS-based implementation for double 22 | template 23 | __device__ inline data_t atomic_add(data_t *address, data_t val) { 24 | return atomicAdd(address, val); 25 | } 26 | 27 | template<> 28 | __device__ inline double atomic_add(double *address, double val) { 29 | unsigned long long int* address_as_ull = (unsigned long long int*)address; 30 | unsigned long long int old = *address_as_ull, assumed; 31 | do { 32 | assumed = old; 33 | old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); 34 | } while (assumed != old); 35 | return __longlong_as_double(old); 36 | } 37 | 38 | #define ACCUM(x,y) atomic_add(&(x),(y)) 39 | #endif // #if __CUDA_ARCH__ >= 600 40 | 41 | #else 42 | // CPU versions 43 | 44 | #define HOST_DEVICE 45 | #define INLINE_HOST_DEVICE inline 46 | #define FLOOR(x) std::floor(x) 47 | #define ACCUM(x,y) (x) += (y) 48 | 49 | #endif // #ifdef __CUDACC__ -------------------------------------------------------------------------------- /schp/utils/kl_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : kl_loss.py 8 | @Time : 7/23/19 4:02 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | import torch 14 | import torch.nn.functional as F 15 | from torch import nn 16 | 17 | 18 | def flatten_probas(input, target, labels, ignore=255): 19 | """ 20 | Flattens predictions in the batch. 21 | """ 22 | B, C, H, W = input.size() 23 | input = input.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C 24 | target = target.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C 25 | labels = labels.view(-1) 26 | if ignore is None: 27 | return input, target 28 | valid = (labels != ignore) 29 | vinput = input[valid.nonzero().squeeze()] 30 | vtarget = target[valid.nonzero().squeeze()] 31 | return vinput, vtarget 32 | 33 | 34 | class KLDivergenceLoss(nn.Module): 35 | def __init__(self, ignore_index=255, T=1): 36 | super(KLDivergenceLoss, self).__init__() 37 | self.ignore_index=ignore_index 38 | self.T = T 39 | 40 | def forward(self, input, target, label): 41 | log_input_prob = F.log_softmax(input / self.T, dim=1) 42 | target_porb = F.softmax(target / self.T, dim=1) 43 | loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index)) 44 | return self.T*self.T*loss # balanced 45 | -------------------------------------------------------------------------------- /schp/networks/context_encoding/psp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : psp.py 8 | @Time : 8/4/19 3:36 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import torch 15 | import torch.nn as nn 16 | from torch.nn import functional as F 17 | 18 | from modules import InPlaceABNSync 19 | 20 | 21 | class PSPModule(nn.Module): 22 | """ 23 | Reference: 24 | Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* 25 | """ 26 | def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)): 27 | super(PSPModule, self).__init__() 28 | 29 | self.stages = [] 30 | self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes]) 31 | self.bottleneck = nn.Sequential( 32 | nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1, 33 | bias=False), 34 | InPlaceABNSync(out_features), 35 | ) 36 | 37 | def _make_stage(self, features, out_features, size): 38 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 39 | conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False) 40 | bn = InPlaceABNSync(out_features) 41 | return nn.Sequential(prior, conv, bn) 42 | 43 | def forward(self, feats): 44 | h, w = feats.size(2), feats.size(3) 45 | priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in 46 | self.stages] + [feats] 47 | bottle = self.bottleneck(torch.cat(priors, 1)) 48 | return bottle -------------------------------------------------------------------------------- /schp/modules/src/utils/cuda.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * General settings and functions 5 | */ 6 | const int WARP_SIZE = 32; 7 | const int MAX_BLOCK_SIZE = 1024; 8 | 9 | static int getNumThreads(int nElem) { 10 | int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE}; 11 | for (int i = 0; i < 6; ++i) { 12 | if (nElem <= threadSizes[i]) { 13 | return threadSizes[i]; 14 | } 15 | } 16 | return MAX_BLOCK_SIZE; 17 | } 18 | 19 | /* 20 | * Reduction utilities 21 | */ 22 | template 23 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 24 | unsigned int mask = 0xffffffff) { 25 | #if CUDART_VERSION >= 9000 26 | return __shfl_xor_sync(mask, value, laneMask, width); 27 | #else 28 | return __shfl_xor(value, laneMask, width); 29 | #endif 30 | } 31 | 32 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 33 | 34 | template 35 | struct Pair { 36 | T v1, v2; 37 | __device__ Pair() {} 38 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} 39 | __device__ Pair(T v) : v1(v), v2(v) {} 40 | __device__ Pair(int v) : v1(v), v2(v) {} 41 | __device__ Pair &operator+=(const Pair &a) { 42 | v1 += a.v1; 43 | v2 += a.v2; 44 | return *this; 45 | } 46 | }; 47 | 48 | template 49 | static __device__ __forceinline__ T warpSum(T val) { 50 | #if __CUDA_ARCH__ >= 300 51 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 52 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 53 | } 54 | #else 55 | __shared__ T values[MAX_BLOCK_SIZE]; 56 | values[threadIdx.x] = val; 57 | __threadfence_block(); 58 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 59 | for (int i = 1; i < WARP_SIZE; i++) { 60 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 61 | } 62 | #endif 63 | return val; 64 | } 65 | 66 | template 67 | static __device__ __forceinline__ Pair warpSum(Pair value) { 68 | value.v1 = warpSum(value.v1); 69 | value.v2 = warpSum(value.v2); 70 | return value; 71 | } -------------------------------------------------------------------------------- /HumanParserPascalCustomNode.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from PIL import Image 4 | 5 | from .utils import generate 6 | ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'] 7 | class HumanParserPascalCustomNode: 8 | @classmethod 9 | def INPUT_TYPES(cls): 10 | return { 11 | "required": { 12 | "image" : ("IMAGE", {}), 13 | "background": ("BOOLEAN", {"default": False}), 14 | "head": ("BOOLEAN", {"default": False}), 15 | "torso": ("BOOLEAN", {"default": False}), 16 | "upper_arms": ("BOOLEAN", {"default": False}), 17 | "lower_arms": ("BOOLEAN", {"default": False}), 18 | "upper_legs": ("BOOLEAN", {"default": False}), 19 | "lower_legs": ("BOOLEAN", {"default": False}), 20 | }, 21 | } 22 | 23 | RETURN_TYPES = ("MASK", "IMAGE") 24 | RETURN_NAMES = ("mask", "map") 25 | FUNCTION = "run" 26 | CATEGORY = "CozyMantis" 27 | 28 | def run(self, image, background, head, torso, upper_arms, lower_arms, upper_legs, lower_legs): 29 | if torch.cuda.is_available(): 30 | device = 'cuda' 31 | else: 32 | device = 'cpu' 33 | 34 | output_img = generate(image[0], 'pascal', device) 35 | 36 | mask_components = [] 37 | 38 | if background: 39 | mask_components.append(0) 40 | if head: 41 | mask_components.append(1) 42 | if torso: 43 | mask_components.append(2) 44 | if upper_arms: 45 | mask_components.append(3) 46 | if lower_arms: 47 | mask_components.append(4) 48 | if upper_legs: 49 | mask_components.append(5) 50 | if lower_legs: 51 | mask_components.append(6) 52 | 53 | mask = np.isin(output_img, mask_components).astype(np.uint8) 54 | mask_image = Image.fromarray(mask * 255) 55 | mask_image = mask_image.convert("RGB") 56 | mask_image = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0) 57 | 58 | output_img = output_img.convert('RGB') 59 | output_img = torch.from_numpy(np.array(output_img).astype(np.float32) / 255.0).unsqueeze(0) 60 | return (mask_image[:, :, :, 0], output_img,) 61 | -------------------------------------------------------------------------------- /schp/datasets/simple_extractor_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : dataset.py 8 | @Time : 8/30/19 9:12 PM 9 | @Desc : Dataset Definition 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import os 15 | import cv2 16 | import numpy as np 17 | 18 | from torch.utils import data 19 | from utils.transforms import get_affine_transform 20 | 21 | 22 | class SimpleFolderDataset(data.Dataset): 23 | def __init__(self, root, input_size=[512, 512], transform=None): 24 | self.root = root 25 | self.input_size = input_size 26 | self.transform = transform 27 | self.aspect_ratio = input_size[1] * 1.0 / input_size[0] 28 | self.input_size = np.asarray(input_size) 29 | 30 | self.file_list = os.listdir(self.root) 31 | 32 | def __len__(self): 33 | return len(self.file_list) 34 | 35 | def _box2cs(self, box): 36 | x, y, w, h = box[:4] 37 | return self._xywh2cs(x, y, w, h) 38 | 39 | def _xywh2cs(self, x, y, w, h): 40 | center = np.zeros((2), dtype=np.float32) 41 | center[0] = x + w * 0.5 42 | center[1] = y + h * 0.5 43 | if w > self.aspect_ratio * h: 44 | h = w * 1.0 / self.aspect_ratio 45 | elif w < self.aspect_ratio * h: 46 | w = h * self.aspect_ratio 47 | scale = np.array([w, h], dtype=np.float32) 48 | return center, scale 49 | 50 | def __getitem__(self, index): 51 | img_name = self.file_list[index] 52 | img_path = os.path.join(self.root, img_name) 53 | img = cv2.imread(img_path, cv2.IMREAD_COLOR) 54 | h, w, _ = img.shape 55 | 56 | # Get person center and scale 57 | person_center, s = self._box2cs([0, 0, w - 1, h - 1]) 58 | r = 0 59 | trans = get_affine_transform(person_center, s, r, self.input_size) 60 | input = cv2.warpAffine( 61 | img, 62 | trans, 63 | (int(self.input_size[1]), int(self.input_size[0])), 64 | flags=cv2.INTER_LINEAR, 65 | borderMode=cv2.BORDER_CONSTANT, 66 | borderValue=(0, 0, 0)) 67 | 68 | input = self.transform(input) 69 | meta = { 70 | 'name': img_name, 71 | 'center': person_center, 72 | 'height': h, 73 | 'width': w, 74 | 'scale': s, 75 | 'rotation': r 76 | } 77 | 78 | return input, meta 79 | -------------------------------------------------------------------------------- /schp/utils/schp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : schp.py 8 | @Time : 4/8/19 2:11 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import os 15 | import torch 16 | import modules 17 | 18 | def moving_average(net1, net2, alpha=1): 19 | for param1, param2 in zip(net1.parameters(), net2.parameters()): 20 | param1.data *= (1.0 - alpha) 21 | param1.data += param2.data * alpha 22 | 23 | 24 | def _check_bn(module, flag): 25 | if issubclass(module.__class__, modules.bn.InPlaceABNSync): 26 | flag[0] = True 27 | 28 | 29 | def check_bn(model): 30 | flag = [False] 31 | model.apply(lambda module: _check_bn(module, flag)) 32 | return flag[0] 33 | 34 | 35 | def reset_bn(module): 36 | if issubclass(module.__class__, modules.bn.InPlaceABNSync): 37 | module.running_mean = torch.zeros_like(module.running_mean) 38 | module.running_var = torch.ones_like(module.running_var) 39 | 40 | 41 | def _get_momenta(module, momenta): 42 | if issubclass(module.__class__, modules.bn.InPlaceABNSync): 43 | momenta[module] = module.momentum 44 | 45 | 46 | def _set_momenta(module, momenta): 47 | if issubclass(module.__class__, modules.bn.InPlaceABNSync): 48 | module.momentum = momenta[module] 49 | 50 | 51 | def bn_re_estimate(loader, model): 52 | if not check_bn(model): 53 | print('No batch norm layer detected') 54 | return 55 | model.train() 56 | momenta = {} 57 | model.apply(reset_bn) 58 | model.apply(lambda module: _get_momenta(module, momenta)) 59 | n = 0 60 | for i_iter, batch in enumerate(loader): 61 | images, labels, _ = batch 62 | b = images.data.size(0) 63 | momentum = b / (n + b) 64 | for module in momenta.keys(): 65 | module.momentum = momentum 66 | model(images) 67 | n += b 68 | model.apply(lambda module: _set_momenta(module, momenta)) 69 | 70 | 71 | def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'): 72 | save_path = os.path.join(output_dir, filename) 73 | if os.path.exists(save_path): 74 | os.remove(save_path) 75 | torch.save(states, save_path) 76 | if is_best_parsing and 'state_dict' in states: 77 | best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar') 78 | if os.path.exists(best_save_path): 79 | os.remove(best_save_path) 80 | torch.save(states, best_save_path) 81 | -------------------------------------------------------------------------------- /schp/networks/context_encoding/aspp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : aspp.py 8 | @Time : 8/4/19 3:36 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import torch 15 | import torch.nn as nn 16 | from torch.nn import functional as F 17 | 18 | from modules import InPlaceABNSync 19 | 20 | 21 | class ASPPModule(nn.Module): 22 | """ 23 | Reference: 24 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* 25 | """ 26 | def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)): 27 | super(ASPPModule, self).__init__() 28 | 29 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 30 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, 31 | bias=False), 32 | InPlaceABNSync(inner_features)) 33 | self.conv2 = nn.Sequential( 34 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), 35 | InPlaceABNSync(inner_features)) 36 | self.conv3 = nn.Sequential( 37 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), 38 | InPlaceABNSync(inner_features)) 39 | self.conv4 = nn.Sequential( 40 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), 41 | InPlaceABNSync(inner_features)) 42 | self.conv5 = nn.Sequential( 43 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), 44 | InPlaceABNSync(inner_features)) 45 | 46 | self.bottleneck = nn.Sequential( 47 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 48 | InPlaceABNSync(out_features), 49 | nn.Dropout2d(0.1) 50 | ) 51 | 52 | def forward(self, x): 53 | _, _, h, w = x.size() 54 | 55 | feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 56 | 57 | feat2 = self.conv2(x) 58 | feat3 = self.conv3(x) 59 | feat4 = self.conv4(x) 60 | feat5 = self.conv5(x) 61 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) 62 | 63 | bottle = self.bottleneck(out) 64 | return bottle -------------------------------------------------------------------------------- /HumanParserATRCustomNode.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from PIL import Image 4 | 5 | from .utils import generate 6 | 7 | class HumanParserATRCustomNode: 8 | @classmethod 9 | def INPUT_TYPES(cls): 10 | return { 11 | "required": { 12 | "image" : ("IMAGE", {}), 13 | "background": ("BOOLEAN", {"default": False}), 14 | "hat": ("BOOLEAN", {"default": False}), 15 | "hair": ("BOOLEAN", {"default": False}), 16 | "sunglasses": ("BOOLEAN", {"default": False}), 17 | "upper_clothes": ("BOOLEAN", {"default": False}), 18 | "skirt": ("BOOLEAN", {"default": False}), 19 | "pants": ("BOOLEAN", {"default": False}), 20 | "dress": ("BOOLEAN", {"default": False}), 21 | "belt": ("BOOLEAN", {"default": False}), 22 | "left_shoe": ("BOOLEAN", {"default": False}), 23 | "right_shoe": ("BOOLEAN", {"default": False}), 24 | "face": ("BOOLEAN", {"default": False}), 25 | "left_leg": ("BOOLEAN", {"default": False}), 26 | "right_leg": ("BOOLEAN", {"default": False}), 27 | "left_arm": ("BOOLEAN", {"default": False}), 28 | "right_arm": ("BOOLEAN", {"default": False}), 29 | "bag": ("BOOLEAN", {"default": False}), 30 | "scarf": ("BOOLEAN", {"default": False}), 31 | }, 32 | } 33 | 34 | RETURN_TYPES = ("MASK", "IMAGE") 35 | RETURN_NAMES = ("mask", "map") 36 | FUNCTION = "run" 37 | CATEGORY = "CozyMantis" 38 | 39 | def run(self, image, background, hat, hair, sunglasses, upper_clothes, skirt, pants, dress, belt, left_shoe, right_shoe, face, left_leg, right_leg, left_arm, right_arm, bag, scarf): 40 | if torch.cuda.is_available(): 41 | device = 'cuda' 42 | else: 43 | device = 'cpu' 44 | 45 | output_img = generate(image[0], 'atr', device) 46 | 47 | mask_components = [] 48 | 49 | if background: 50 | mask_components.append(0) 51 | if hat: 52 | mask_components.append(1) 53 | if hair: 54 | mask_components.append(2) 55 | if sunglasses: 56 | mask_components.append(3) 57 | if upper_clothes: 58 | mask_components.append(4) 59 | if skirt: 60 | mask_components.append(5) 61 | if pants: 62 | mask_components.append(6) 63 | if dress: 64 | mask_components.append(7) 65 | if belt: 66 | mask_components.append(8) 67 | if left_shoe: 68 | mask_components.append(9) 69 | if right_shoe: 70 | mask_components.append(10) 71 | if face: 72 | mask_components.append(11) 73 | if left_leg: 74 | mask_components.append(12) 75 | if right_leg: 76 | mask_components.append(13) 77 | if left_arm: 78 | mask_components.append(14) 79 | if right_arm: 80 | mask_components.append(15) 81 | if bag: 82 | mask_components.append(16) 83 | if scarf: 84 | mask_components.append(17) 85 | 86 | mask = np.isin(output_img, mask_components).astype(np.uint8) 87 | mask_image = Image.fromarray(mask * 255) 88 | mask_image = mask_image.convert("RGB") 89 | mask_image = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0) 90 | 91 | output_img = output_img.convert('RGB') 92 | output_img = torch.from_numpy(np.array(output_img).astype(np.float32) / 255.0).unsqueeze(0) 93 | return (mask_image[:, :, :, 0], output_img,) 94 | -------------------------------------------------------------------------------- /schp/utils/warmup_scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : warmup_scheduler.py 8 | @Time : 3/28/19 2:24 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import math 15 | from torch.optim.lr_scheduler import _LRScheduler 16 | 17 | 18 | class GradualWarmupScheduler(_LRScheduler): 19 | """ Gradually warm-up learning rate with cosine annealing in optimizer. 20 | Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. 21 | """ 22 | 23 | def __init__(self, optimizer, total_epoch, eta_min=0, warmup_epoch=10, last_epoch=-1): 24 | self.total_epoch = total_epoch 25 | self.eta_min = eta_min 26 | self.warmup_epoch = warmup_epoch 27 | super(GradualWarmupScheduler, self).__init__(optimizer, last_epoch) 28 | 29 | def get_lr(self): 30 | if self.last_epoch <= self.warmup_epoch: 31 | return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs] 32 | else: 33 | return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.total_epoch-self.warmup_epoch))) / 2 for base_lr in self.base_lrs] 34 | 35 | 36 | class SGDRScheduler(_LRScheduler): 37 | """ Consine annealing with warm up and restarts. 38 | Proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`. 39 | """ 40 | def __init__(self, optimizer, total_epoch=150, start_cyclical=100, cyclical_base_lr=7e-4, cyclical_epoch=10, eta_min=0, warmup_epoch=10, last_epoch=-1): 41 | self.total_epoch = total_epoch 42 | self.start_cyclical = start_cyclical 43 | self.cyclical_epoch = cyclical_epoch 44 | self.cyclical_base_lr = cyclical_base_lr 45 | self.eta_min = eta_min 46 | self.warmup_epoch = warmup_epoch 47 | super(SGDRScheduler, self).__init__(optimizer, last_epoch) 48 | 49 | def get_lr(self): 50 | if self.last_epoch < self.warmup_epoch: 51 | return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs] 52 | elif self.last_epoch < self.start_cyclical: 53 | return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.start_cyclical-self.warmup_epoch))) / 2 for base_lr in self.base_lrs] 54 | else: 55 | return [self.eta_min + (self.cyclical_base_lr-self.eta_min)*(1+math.cos(math.pi* ((self.last_epoch-self.start_cyclical)% self.cyclical_epoch)/self.cyclical_epoch)) / 2 for base_lr in self.base_lrs] 56 | 57 | 58 | if __name__ == '__main__': 59 | import matplotlib.pyplot as plt 60 | import torch 61 | model = torch.nn.Linear(10, 2) 62 | optimizer = torch.optim.SGD(params=model.parameters(), lr=7e-3, momentum=0.9, weight_decay=5e-4) 63 | scheduler_warmup = SGDRScheduler(optimizer, total_epoch=150, eta_min=7e-5, warmup_epoch=10, start_cyclical=100, cyclical_base_lr=3.5e-3, cyclical_epoch=10) 64 | lr = [] 65 | for epoch in range(0,150): 66 | scheduler_warmup.step(epoch) 67 | lr.append(scheduler_warmup.get_lr()) 68 | plt.style.use('ggplot') 69 | plt.plot(list(range(0,150)), lr) 70 | plt.show() 71 | 72 | -------------------------------------------------------------------------------- /HumanParserLIPCustomNode.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from PIL import Image 4 | 5 | from .utils import generate 6 | 7 | class HumanParserLIPCustomNode: 8 | @classmethod 9 | def INPUT_TYPES(cls): 10 | return { 11 | "required": { 12 | "image" : ("IMAGE", {}), 13 | "background": ("BOOLEAN", {"default": False}), 14 | "hat": ("BOOLEAN", {"default": False}), 15 | "hair": ("BOOLEAN", {"default": False}), 16 | "glove": ("BOOLEAN", {"default": False}), 17 | "sunglasses": ("BOOLEAN", {"default": False}), 18 | "upper_clothes": ("BOOLEAN", {"default": False}), 19 | "dress": ("BOOLEAN", {"default": False}), 20 | "coat": ("BOOLEAN", {"default": False}), 21 | "socks": ("BOOLEAN", {"default": False}), 22 | "pants": ("BOOLEAN", {"default": False}), 23 | "jumpsuits": ("BOOLEAN", {"default": False}), 24 | "scarf": ("BOOLEAN", {"default": False}), 25 | "skirt": ("BOOLEAN", {"default": False}), 26 | "face": ("BOOLEAN", {"default": False}), 27 | "left_arm": ("BOOLEAN", {"default": False}), 28 | "right_arm": ("BOOLEAN", {"default": False}), 29 | "left_leg": ("BOOLEAN", {"default": False}), 30 | "right_leg": ("BOOLEAN", {"default": False}), 31 | "left_shoe": ("BOOLEAN", {"default": False}), 32 | "right_shoe": ("BOOLEAN", {"default": False}), 33 | }, 34 | } 35 | 36 | RETURN_TYPES = ("MASK", "IMAGE") 37 | RETURN_NAMES = ("mask", "map") 38 | FUNCTION = "run" 39 | CATEGORY = "CozyMantis" 40 | 41 | def run(self, image, background, hat, hair, glove, sunglasses, upper_clothes, dress, coat, socks, pants, jumpsuits, scarf, skirt, face, left_arm, right_arm, left_leg, right_leg, left_shoe, right_shoe): 42 | if torch.cuda.is_available(): 43 | device = 'cuda' 44 | else: 45 | device = 'cpu' 46 | 47 | output_img = generate(image[0], 'lip', device) 48 | 49 | mask_components = [] 50 | 51 | if background: 52 | mask_components.append(0) 53 | if hat: 54 | mask_components.append(1) 55 | if hair: 56 | mask_components.append(2) 57 | if glove: 58 | mask_components.append(3) 59 | if sunglasses: 60 | mask_components.append(4) 61 | if upper_clothes: 62 | mask_components.append(5) 63 | if dress: 64 | mask_components.append(6) 65 | if coat: 66 | mask_components.append(7) 67 | if socks: 68 | mask_components.append(8) 69 | if pants: 70 | mask_components.append(9) 71 | if jumpsuits: 72 | mask_components.append(10) 73 | if scarf: 74 | mask_components.append(11) 75 | if skirt: 76 | mask_components.append(12) 77 | if face: 78 | mask_components.append(13) 79 | if left_arm: 80 | mask_components.append(14) 81 | if right_arm: 82 | mask_components.append(15) 83 | if left_leg: 84 | mask_components.append(16) 85 | if right_leg: 86 | mask_components.append(17) 87 | if left_shoe: 88 | mask_components.append(18) 89 | if right_shoe: 90 | mask_components.append(19) 91 | 92 | mask = np.isin(output_img, mask_components).astype(np.uint8) 93 | mask_image = Image.fromarray(mask * 255) 94 | mask_image = mask_image.convert("RGB") 95 | mask_image = torch.from_numpy(np.array(mask_image).astype(np.float32) / 255.0).unsqueeze(0) 96 | 97 | output_img = output_img.convert('RGB') 98 | output_img = torch.from_numpy(np.array(output_img).astype(np.float32) / 255.0).unsqueeze(0) 99 | return (mask_image[:, :, :, 0], output_img,) 100 | -------------------------------------------------------------------------------- /schp/modules/src/inplace_abn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | std::vector mean_var_cpu(at::Tensor x); 8 | std::vector mean_var_cuda(at::Tensor x); 9 | std::vector mean_var_cuda_h(at::Tensor x); 10 | 11 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 12 | bool affine, float eps); 13 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 14 | bool affine, float eps); 15 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps); 17 | 18 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 19 | bool affine, float eps); 20 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 21 | bool affine, float eps); 22 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 23 | bool affine, float eps); 24 | 25 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 26 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 27 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 28 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 29 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 30 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 31 | 32 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); 33 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); 34 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope); 35 | 36 | void elu_backward_cpu(at::Tensor z, at::Tensor dz); 37 | void elu_backward_cuda(at::Tensor z, at::Tensor dz); 38 | 39 | static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { 40 | num = x.size(0); 41 | chn = x.size(1); 42 | sp = 1; 43 | for (int64_t i = 2; i < x.ndimension(); ++i) 44 | sp *= x.size(i); 45 | } 46 | 47 | /* 48 | * Specialized CUDA reduction functions for BN 49 | */ 50 | #ifdef __CUDACC__ 51 | 52 | #include "utils/cuda.cuh" 53 | 54 | template 55 | __device__ T reduce(Op op, int plane, int N, int S) { 56 | T sum = (T)0; 57 | for (int batch = 0; batch < N; ++batch) { 58 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 59 | sum += op(batch, plane, x); 60 | } 61 | } 62 | 63 | // sum over NumThreads within a warp 64 | sum = warpSum(sum); 65 | 66 | // 'transpose', and reduce within warp again 67 | __shared__ T shared[32]; 68 | __syncthreads(); 69 | if (threadIdx.x % WARP_SIZE == 0) { 70 | shared[threadIdx.x / WARP_SIZE] = sum; 71 | } 72 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 73 | // zero out the other entries in shared 74 | shared[threadIdx.x] = (T)0; 75 | } 76 | __syncthreads(); 77 | if (threadIdx.x / WARP_SIZE == 0) { 78 | sum = warpSum(shared[threadIdx.x]); 79 | if (threadIdx.x == 0) { 80 | shared[0] = sum; 81 | } 82 | } 83 | __syncthreads(); 84 | 85 | // Everyone picks it up, should be broadcast into the whole gradInput 86 | return shared[0]; 87 | } 88 | #endif 89 | -------------------------------------------------------------------------------- /schp/utils/soft_dice_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : soft_dice_loss.py 8 | @Time : 8/13/19 5:09 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | from __future__ import print_function, division 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from torch import nn 19 | 20 | try: 21 | from itertools import ifilterfalse 22 | except ImportError: # py3k 23 | from itertools import filterfalse as ifilterfalse 24 | 25 | 26 | def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6): 27 | ''' 28 | Tversky loss function. 29 | probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) 30 | labels: [P] Tensor, ground truth labels (between 0 and C - 1) 31 | 32 | Same as soft dice loss when alpha=beta=0.5. 33 | Same as Jaccord loss when alpha=beta=1.0. 34 | See `Tversky loss function for image segmentation using 3D fully convolutional deep networks` 35 | https://arxiv.org/pdf/1706.05721.pdf 36 | ''' 37 | C = probas.size(1) 38 | losses = [] 39 | for c in list(range(C)): 40 | fg = (labels == c).float() 41 | if fg.sum() == 0: 42 | continue 43 | class_pred = probas[:, c] 44 | p0 = class_pred 45 | p1 = 1 - class_pred 46 | g0 = fg 47 | g1 = 1 - fg 48 | numerator = torch.sum(p0 * g0) 49 | denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0) 50 | losses.append(1 - ((numerator) / (denominator + epsilon))) 51 | return mean(losses) 52 | 53 | 54 | def flatten_probas(probas, labels, ignore=255): 55 | """ 56 | Flattens predictions in the batch 57 | """ 58 | B, C, H, W = probas.size() 59 | probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C 60 | labels = labels.view(-1) 61 | if ignore is None: 62 | return probas, labels 63 | valid = (labels != ignore) 64 | vprobas = probas[valid.nonzero().squeeze()] 65 | vlabels = labels[valid] 66 | return vprobas, vlabels 67 | 68 | 69 | def isnan(x): 70 | return x != x 71 | 72 | 73 | def mean(l, ignore_nan=False, empty=0): 74 | """ 75 | nanmean compatible with generators. 76 | """ 77 | l = iter(l) 78 | if ignore_nan: 79 | l = ifilterfalse(isnan, l) 80 | try: 81 | n = 1 82 | acc = next(l) 83 | except StopIteration: 84 | if empty == 'raise': 85 | raise ValueError('Empty mean') 86 | return empty 87 | for n, v in enumerate(l, 2): 88 | acc += v 89 | if n == 1: 90 | return acc 91 | return acc / n 92 | 93 | 94 | class SoftDiceLoss(nn.Module): 95 | def __init__(self, ignore_index=255): 96 | super(SoftDiceLoss, self).__init__() 97 | self.ignore_index = ignore_index 98 | 99 | def forward(self, pred, label): 100 | pred = F.softmax(pred, dim=1) 101 | return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5) 102 | 103 | 104 | class SoftJaccordLoss(nn.Module): 105 | def __init__(self, ignore_index=255): 106 | super(SoftJaccordLoss, self).__init__() 107 | self.ignore_index = ignore_index 108 | 109 | def forward(self, pred, label): 110 | pred = F.softmax(pred, dim=1) 111 | return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0) 112 | -------------------------------------------------------------------------------- /schp/modules/deeplab.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | from models._util import try_index 6 | from .bn import ABN 7 | 8 | 9 | class DeeplabV3(nn.Module): 10 | def __init__(self, 11 | in_channels, 12 | out_channels, 13 | hidden_channels=256, 14 | dilations=(12, 24, 36), 15 | norm_act=ABN, 16 | pooling_size=None): 17 | super(DeeplabV3, self).__init__() 18 | self.pooling_size = pooling_size 19 | 20 | self.map_convs = nn.ModuleList([ 21 | nn.Conv2d(in_channels, hidden_channels, 1, bias=False), 22 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]), 23 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]), 24 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2]) 25 | ]) 26 | self.map_bn = norm_act(hidden_channels * 4) 27 | 28 | self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False) 29 | self.global_pooling_bn = norm_act(hidden_channels) 30 | 31 | self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False) 32 | self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False) 33 | self.red_bn = norm_act(out_channels) 34 | 35 | self.reset_parameters(self.map_bn.activation, self.map_bn.slope) 36 | 37 | def reset_parameters(self, activation, slope): 38 | gain = nn.init.calculate_gain(activation, slope) 39 | for m in self.modules(): 40 | if isinstance(m, nn.Conv2d): 41 | nn.init.xavier_normal_(m.weight.data, gain) 42 | if hasattr(m, "bias") and m.bias is not None: 43 | nn.init.constant_(m.bias, 0) 44 | elif isinstance(m, ABN): 45 | if hasattr(m, "weight") and m.weight is not None: 46 | nn.init.constant_(m.weight, 1) 47 | if hasattr(m, "bias") and m.bias is not None: 48 | nn.init.constant_(m.bias, 0) 49 | 50 | def forward(self, x): 51 | # Map convolutions 52 | out = torch.cat([m(x) for m in self.map_convs], dim=1) 53 | out = self.map_bn(out) 54 | out = self.red_conv(out) 55 | 56 | # Global pooling 57 | pool = self._global_pooling(x) 58 | pool = self.global_pooling_conv(pool) 59 | pool = self.global_pooling_bn(pool) 60 | pool = self.pool_red_conv(pool) 61 | if self.training or self.pooling_size is None: 62 | pool = pool.repeat(1, 1, x.size(2), x.size(3)) 63 | 64 | out += pool 65 | out = self.red_bn(out) 66 | return out 67 | 68 | def _global_pooling(self, x): 69 | if self.training or self.pooling_size is None: 70 | pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1) 71 | pool = pool.view(x.size(0), x.size(1), 1, 1) 72 | else: 73 | pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]), 74 | min(try_index(self.pooling_size, 1), x.shape[3])) 75 | padding = ( 76 | (pooling_size[1] - 1) // 2, 77 | (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1, 78 | (pooling_size[0] - 1) // 2, 79 | (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1 80 | ) 81 | 82 | pool = functional.avg_pool2d(x, pooling_size, stride=1) 83 | pool = functional.pad(pool, pad=padding, mode="replicate") 84 | return pool 85 | -------------------------------------------------------------------------------- /schp/modules/src/inplace_abn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "utils/checks.h" 6 | #include "inplace_abn.h" 7 | 8 | at::Tensor reduce_sum(at::Tensor x) { 9 | if (x.ndimension() == 2) { 10 | return x.sum(0); 11 | } else { 12 | auto x_view = x.view({x.size(0), x.size(1), -1}); 13 | return x_view.sum(-1).sum(0); 14 | } 15 | } 16 | 17 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 18 | if (x.ndimension() == 2) { 19 | return v; 20 | } else { 21 | std::vector broadcast_size = {1, -1}; 22 | for (int64_t i = 2; i < x.ndimension(); ++i) 23 | broadcast_size.push_back(1); 24 | 25 | return v.view(broadcast_size); 26 | } 27 | } 28 | 29 | int64_t count(at::Tensor x) { 30 | int64_t count = x.size(0); 31 | for (int64_t i = 2; i < x.ndimension(); ++i) 32 | count *= x.size(i); 33 | 34 | return count; 35 | } 36 | 37 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { 38 | if (affine) { 39 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); 40 | } else { 41 | return z; 42 | } 43 | } 44 | 45 | std::vector mean_var_cpu(at::Tensor x) { 46 | auto num = count(x); 47 | auto mean = reduce_sum(x) / num; 48 | auto diff = x - broadcast_to(mean, x); 49 | auto var = reduce_sum(diff.pow(2)) / num; 50 | 51 | return {mean, var}; 52 | } 53 | 54 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 55 | bool affine, float eps) { 56 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); 57 | auto mul = at::rsqrt(var + eps) * gamma; 58 | 59 | x.sub_(broadcast_to(mean, x)); 60 | x.mul_(broadcast_to(mul, x)); 61 | if (affine) x.add_(broadcast_to(bias, x)); 62 | 63 | return x; 64 | } 65 | 66 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 67 | bool affine, float eps) { 68 | auto edz = reduce_sum(dz); 69 | auto y = invert_affine(z, weight, bias, affine, eps); 70 | auto eydz = reduce_sum(y * dz); 71 | 72 | return {edz, eydz}; 73 | } 74 | 75 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 76 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 77 | auto y = invert_affine(z, weight, bias, affine, eps); 78 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); 79 | 80 | auto num = count(z); 81 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); 82 | return dx; 83 | } 84 | 85 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { 86 | CHECK_CPU_INPUT(z); 87 | CHECK_CPU_INPUT(dz); 88 | 89 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { 90 | int64_t count = z.numel(); 91 | auto *_z = z.data(); 92 | auto *_dz = dz.data(); 93 | 94 | for (int64_t i = 0; i < count; ++i) { 95 | if (_z[i] < 0) { 96 | _z[i] *= 1 / slope; 97 | _dz[i] *= slope; 98 | } 99 | } 100 | })); 101 | } 102 | 103 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) { 104 | CHECK_CPU_INPUT(z); 105 | CHECK_CPU_INPUT(dz); 106 | 107 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { 108 | int64_t count = z.numel(); 109 | auto *_z = z.data(); 110 | auto *_dz = dz.data(); 111 | 112 | for (int64_t i = 0; i < count; ++i) { 113 | if (_z[i] < 0) { 114 | _z[i] = log1p(_z[i]); 115 | _dz[i] *= (_z[i] + 1.f); 116 | } 117 | } 118 | })); 119 | } 120 | -------------------------------------------------------------------------------- /schp/modules/src/inplace_abn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) 8 | { 9 | if (x.is_cuda()) 10 | { 11 | if (x.type().scalarType() == at::ScalarType::Half) 12 | { 13 | return mean_var_cuda_h(x); 14 | } 15 | else 16 | { 17 | return mean_var_cuda(x); 18 | } 19 | } 20 | else 21 | { 22 | return mean_var_cpu(x); 23 | } 24 | } 25 | 26 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 27 | bool affine, float eps) 28 | { 29 | if (x.is_cuda()) 30 | { 31 | if (x.type().scalarType() == at::ScalarType::Half) 32 | { 33 | return forward_cuda_h(x, mean, var, weight, bias, affine, eps); 34 | } 35 | else 36 | { 37 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 38 | } 39 | } 40 | else 41 | { 42 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 43 | } 44 | } 45 | 46 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 47 | bool affine, float eps) 48 | { 49 | if (z.is_cuda()) 50 | { 51 | if (z.type().scalarType() == at::ScalarType::Half) 52 | { 53 | return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps); 54 | } 55 | else 56 | { 57 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 58 | } 59 | } 60 | else 61 | { 62 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 63 | } 64 | } 65 | 66 | at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 67 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) 68 | { 69 | if (z.is_cuda()) 70 | { 71 | if (z.type().scalarType() == at::ScalarType::Half) 72 | { 73 | return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps); 74 | } 75 | else 76 | { 77 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 78 | } 79 | } 80 | else 81 | { 82 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 83 | } 84 | } 85 | 86 | void leaky_relu_forward(at::Tensor z, float slope) 87 | { 88 | at::leaky_relu_(z, slope); 89 | } 90 | 91 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) 92 | { 93 | if (z.is_cuda()) 94 | { 95 | if (z.type().scalarType() == at::ScalarType::Half) 96 | { 97 | return leaky_relu_backward_cuda_h(z, dz, slope); 98 | } 99 | else 100 | { 101 | return leaky_relu_backward_cuda(z, dz, slope); 102 | } 103 | } 104 | else 105 | { 106 | return leaky_relu_backward_cpu(z, dz, slope); 107 | } 108 | } 109 | 110 | void elu_forward(at::Tensor z) 111 | { 112 | at::elu_(z); 113 | } 114 | 115 | void elu_backward(at::Tensor z, at::Tensor dz) 116 | { 117 | if (z.is_cuda()) 118 | { 119 | return elu_backward_cuda(z, dz); 120 | } 121 | else 122 | { 123 | return elu_backward_cpu(z, dz); 124 | } 125 | } 126 | 127 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 128 | { 129 | m.def("mean_var", &mean_var, "Mean and variance computation"); 130 | m.def("forward", &forward, "In-place forward computation"); 131 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 132 | m.def("backward", &backward, "Second part of backward computation"); 133 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 134 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 135 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 136 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 137 | } 138 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /schp/modules/src/inplace_abn_cpu_only.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) 8 | { 9 | if (x.is_cuda()) 10 | { 11 | if (x.type().scalarType() == at::ScalarType::Half) 12 | { 13 | return mean_var_cuda_h(x); 14 | } 15 | else 16 | { 17 | return mean_var_cuda(x); 18 | } 19 | } 20 | else 21 | { 22 | return mean_var_cpu(x); 23 | } 24 | } 25 | 26 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 27 | bool affine, float eps) {} 28 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 29 | bool affine, float eps) {} 30 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 31 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) {} 32 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 33 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) {} 34 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 35 | bool affine, float eps) {} 36 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 37 | bool affine, float eps) {} 38 | std::vector mean_var_cuda(at::Tensor x) {} 39 | std::vector mean_var_cuda_h(at::Tensor x) {} 40 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) {} 41 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {} 42 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {} 43 | 44 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 45 | bool affine, float eps) 46 | { 47 | if (x.is_cuda()) 48 | { 49 | if (x.type().scalarType() == at::ScalarType::Half) 50 | { 51 | return forward_cuda_h(x, mean, var, weight, bias, affine, eps); 52 | } 53 | else 54 | { 55 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 56 | } 57 | } 58 | else 59 | { 60 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 61 | } 62 | } 63 | 64 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 65 | bool affine, float eps) 66 | { 67 | if (z.is_cuda()) 68 | { 69 | if (z.type().scalarType() == at::ScalarType::Half) 70 | { 71 | return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps); 72 | } 73 | else 74 | { 75 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 76 | } 77 | } 78 | else 79 | { 80 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 81 | } 82 | } 83 | 84 | at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 85 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) 86 | { 87 | if (z.is_cuda()) 88 | { 89 | if (z.type().scalarType() == at::ScalarType::Half) 90 | { 91 | return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps); 92 | } 93 | else 94 | { 95 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 96 | } 97 | } 98 | else 99 | { 100 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 101 | } 102 | } 103 | 104 | void leaky_relu_forward(at::Tensor z, float slope) 105 | { 106 | at::leaky_relu_(z, slope); 107 | } 108 | 109 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) 110 | { 111 | if (z.is_cuda()) 112 | { 113 | if (z.type().scalarType() == at::ScalarType::Half) 114 | { 115 | return leaky_relu_backward_cuda_h(z, dz, slope); 116 | } 117 | else 118 | { 119 | return leaky_relu_backward_cuda(z, dz, slope); 120 | } 121 | } 122 | else 123 | { 124 | return leaky_relu_backward_cpu(z, dz, slope); 125 | } 126 | } 127 | 128 | void elu_forward(at::Tensor z) 129 | { 130 | at::elu_(z); 131 | } 132 | 133 | void elu_backward(at::Tensor z, at::Tensor dz) 134 | { 135 | if (z.is_cuda()) 136 | { 137 | return elu_backward_cuda(z, dz); 138 | } 139 | else 140 | { 141 | return elu_backward_cpu(z, dz); 142 | } 143 | } 144 | 145 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 146 | { 147 | m.def("mean_var", &mean_var, "Mean and variance computation"); 148 | m.def("forward", &forward, "In-place forward computation"); 149 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 150 | m.def("backward", &backward, "Second part of backward computation"); 151 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 152 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 153 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 154 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 155 | } 156 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import os 4 | import numpy as np 5 | from collections import OrderedDict 6 | import torchvision.transforms as transforms 7 | from PIL import Image 8 | 9 | from .schp import networks 10 | from .schp.utils.transforms import transform_logits, get_affine_transform 11 | 12 | dataset_settings = { 13 | 'lip': { 14 | 'input_size': [473, 473], 15 | 'num_classes': 20, 16 | 'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', 17 | 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 18 | 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe'] 19 | }, 20 | 'atr': { 21 | 'input_size': [512, 512], 22 | 'num_classes': 18, 23 | 'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt', 24 | 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf'] 25 | }, 26 | 'pascal': { 27 | 'input_size': [512, 512], 28 | 'num_classes': 7, 29 | 'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'], 30 | } 31 | } 32 | 33 | def get_palette(num_cls): 34 | """ Returns the color map for visualizing the segmentation mask. 35 | Args: 36 | num_cls: Number of classes 37 | Returns: 38 | The color map 39 | """ 40 | n = num_cls 41 | palette = [0] * (n * 3) 42 | for j in range(0, n): 43 | lab = j 44 | palette[j * 3 + 0] = 0 45 | palette[j * 3 + 1] = 0 46 | palette[j * 3 + 2] = 0 47 | i = 0 48 | while lab: 49 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 50 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 51 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 52 | i += 1 53 | lab >>= 3 54 | return palette 55 | 56 | def _box2cs(box, aspect_ratio): 57 | x, y, w, h = box[:4] 58 | return _xywh2cs(x, y, w, h, aspect_ratio) 59 | 60 | def _xywh2cs(x, y, w, h, aspect_ratio): 61 | center = np.zeros((2), dtype=np.float32) 62 | center[0] = x + w * 0.5 63 | center[1] = y + h * 0.5 64 | if w > aspect_ratio * h: 65 | h = w * 1.0 / aspect_ratio 66 | elif w < aspect_ratio * h: 67 | w = h * aspect_ratio 68 | scale = np.array([w, h], dtype=np.float32) 69 | return center, scale 70 | 71 | def check_model_path(model_path): 72 | # Checks to see if the model exists, if not try adding ComfyUI/ to the start to fix possible errors on Windows (maybe others too) 73 | if not os.path.exists(model_path): 74 | new_model_path = os.path.join("ComfyUI", model_path) 75 | if os.path.exists(new_model_path): 76 | return new_model_path 77 | return model_path 78 | 79 | def generate(image, type, device): 80 | num_classes = dataset_settings[type]['num_classes'] 81 | input_size = dataset_settings[type]['input_size'] 82 | aspect_ratio = input_size[1] * 1.0 / input_size[0] 83 | if type == 'lip': 84 | model_path = 'models/schp/exp-schp-201908261155-lip.pth' 85 | elif type == 'atr': 86 | model_path = 'models/schp/exp-schp-201908301523-atr.pth' 87 | elif type == 'pascal': 88 | model_path = 'models/schp/exp-schp-201908270938-pascal-person-part.pth' 89 | 90 | # Check and adjust the model path if necessary 91 | model_path = check_model_path(model_path) 92 | 93 | model = networks.init_model('resnet101', num_classes=num_classes, pretrained=None) 94 | state_dict = torch.load(model_path)['state_dict'] 95 | new_state_dict = OrderedDict() 96 | for k, v in state_dict.items(): 97 | name = k[7:] 98 | new_state_dict[name] = v 99 | model.load_state_dict(new_state_dict) 100 | model.to(device) 101 | model.eval() 102 | 103 | # Get person center and scale 104 | input = 255. * image.cpu().numpy() 105 | input = np.clip(input, 0, 255).astype(np.uint8) 106 | input = cv2.cvtColor(input, cv2.COLOR_RGB2BGR) 107 | h, w, _ = input.shape 108 | 109 | person_center, s = _box2cs([0, 0, w - 1, h - 1], aspect_ratio) 110 | trans = get_affine_transform(person_center, s, 0, input_size) 111 | input = cv2.warpAffine( 112 | input, 113 | trans, 114 | (int(input_size[1]), int(input_size[0])), 115 | flags=cv2.INTER_LINEAR, 116 | borderMode=cv2.BORDER_CONSTANT, 117 | borderValue=(0, 0, 0)) 118 | 119 | transform = transforms.Compose([ 120 | transforms.ToTensor(), 121 | transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229]) 122 | ]) 123 | input = transform(input) 124 | 125 | palette = get_palette(num_classes) 126 | with torch.no_grad(): 127 | input = input[None, :, :, :] 128 | output = model(input.to(device)) 129 | upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True) 130 | upsample_output = upsample(output[0][-1][0].unsqueeze(0)) 131 | upsample_output = upsample_output.squeeze() 132 | upsample_output = upsample_output.permute(1, 2, 0) # CHW -> HWC 133 | 134 | logits_result = transform_logits(upsample_output.data.cpu().numpy(), person_center, s, w, h, input_size=input_size) 135 | parsing_result = np.argmax(logits_result, axis=2) 136 | 137 | output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8)) 138 | output_img.putpalette(palette) 139 | return output_img 140 | -------------------------------------------------------------------------------- /schp/modules/bn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | try: 6 | from queue import Queue 7 | except ImportError: 8 | from Queue import Queue 9 | 10 | from .functions import * 11 | 12 | 13 | class ABN(nn.Module): 14 | """Activated Batch Normalization 15 | 16 | This gathers a `BatchNorm2d` and an activation function in a single module 17 | """ 18 | 19 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 20 | """Creates an Activated Batch Normalization module 21 | 22 | Parameters 23 | ---------- 24 | num_features : int 25 | Number of feature channels in the input and output. 26 | eps : float 27 | Small constant to prevent numerical issues. 28 | momentum : float 29 | Momentum factor applied to compute running statistics as. 30 | affine : bool 31 | If `True` apply learned scale and shift transformation after normalization. 32 | activation : str 33 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 34 | slope : float 35 | Negative slope for the `leaky_relu` activation. 36 | """ 37 | super(ABN, self).__init__() 38 | self.num_features = num_features 39 | self.affine = affine 40 | self.eps = eps 41 | self.momentum = momentum 42 | self.activation = activation 43 | self.slope = slope 44 | if self.affine: 45 | self.weight = nn.Parameter(torch.ones(num_features)) 46 | self.bias = nn.Parameter(torch.zeros(num_features)) 47 | else: 48 | self.register_parameter('weight', None) 49 | self.register_parameter('bias', None) 50 | self.register_buffer('running_mean', torch.zeros(num_features)) 51 | self.register_buffer('running_var', torch.ones(num_features)) 52 | self.reset_parameters() 53 | 54 | def reset_parameters(self): 55 | nn.init.constant_(self.running_mean, 0) 56 | nn.init.constant_(self.running_var, 1) 57 | if self.affine: 58 | nn.init.constant_(self.weight, 1) 59 | nn.init.constant_(self.bias, 0) 60 | 61 | def forward(self, x): 62 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, 63 | self.training, self.momentum, self.eps) 64 | 65 | if self.activation == ACT_RELU: 66 | return functional.relu(x, inplace=True) 67 | elif self.activation == ACT_LEAKY_RELU: 68 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) 69 | elif self.activation == ACT_ELU: 70 | return functional.elu(x, inplace=True) 71 | else: 72 | return x 73 | 74 | def __repr__(self): 75 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 76 | ' affine={affine}, activation={activation}' 77 | if self.activation == "leaky_relu": 78 | rep += ', slope={slope})' 79 | else: 80 | rep += ')' 81 | return rep.format(name=self.__class__.__name__, **self.__dict__) 82 | 83 | 84 | class InPlaceABN(ABN): 85 | """InPlace Activated Batch Normalization""" 86 | 87 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 88 | """Creates an InPlace Activated Batch Normalization module 89 | 90 | Parameters 91 | ---------- 92 | num_features : int 93 | Number of feature channels in the input and output. 94 | eps : float 95 | Small constant to prevent numerical issues. 96 | momentum : float 97 | Momentum factor applied to compute running statistics as. 98 | affine : bool 99 | If `True` apply learned scale and shift transformation after normalization. 100 | activation : str 101 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 102 | slope : float 103 | Negative slope for the `leaky_relu` activation. 104 | """ 105 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) 106 | 107 | def forward(self, x): 108 | x, _, _ = inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, 109 | self.training, self.momentum, self.eps, self.activation, self.slope) 110 | return x 111 | 112 | 113 | class InPlaceABNSync(ABN): 114 | """InPlace Activated Batch Normalization with cross-GPU synchronization 115 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`. 116 | """ 117 | 118 | def forward(self, x): 119 | x, _, _ = inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, 120 | self.training, self.momentum, self.eps, self.activation, self.slope) 121 | return x 122 | 123 | def __repr__(self): 124 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 125 | ' affine={affine}, activation={activation}' 126 | if self.activation == "leaky_relu": 127 | rep += ', slope={slope})' 128 | else: 129 | rep += ')' 130 | return rep.format(name=self.__class__.__name__, **self.__dict__) 131 | 132 | 133 | -------------------------------------------------------------------------------- /schp/networks/backbone/resnext.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : resnext.py.py 8 | @Time : 8/11/19 8:58 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | import functools 14 | import torch.nn as nn 15 | import math 16 | from torch.utils.model_zoo import load_url 17 | 18 | from modules import InPlaceABNSync 19 | 20 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 21 | 22 | __all__ = ['ResNeXt', 'resnext101'] # support resnext 101 23 | 24 | model_urls = { 25 | 'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth', 26 | 'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth' 27 | } 28 | 29 | 30 | def conv3x3(in_planes, out_planes, stride=1): 31 | "3x3 convolution with padding" 32 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 33 | padding=1, bias=False) 34 | 35 | 36 | class GroupBottleneck(nn.Module): 37 | expansion = 2 38 | 39 | def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None): 40 | super(GroupBottleneck, self).__init__() 41 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 42 | self.bn1 = BatchNorm2d(planes) 43 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 44 | padding=1, groups=groups, bias=False) 45 | self.bn2 = BatchNorm2d(planes) 46 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False) 47 | self.bn3 = BatchNorm2d(planes * 2) 48 | self.relu = nn.ReLU(inplace=True) 49 | self.downsample = downsample 50 | self.stride = stride 51 | 52 | def forward(self, x): 53 | residual = x 54 | 55 | out = self.conv1(x) 56 | out = self.bn1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.bn2(out) 61 | out = self.relu(out) 62 | 63 | out = self.conv3(out) 64 | out = self.bn3(out) 65 | 66 | if self.downsample is not None: 67 | residual = self.downsample(x) 68 | 69 | out += residual 70 | out = self.relu(out) 71 | 72 | return out 73 | 74 | 75 | class ResNeXt(nn.Module): 76 | 77 | def __init__(self, block, layers, groups=32, num_classes=1000): 78 | self.inplanes = 128 79 | super(ResNeXt, self).__init__() 80 | self.conv1 = conv3x3(3, 64, stride=2) 81 | self.bn1 = BatchNorm2d(64) 82 | self.relu1 = nn.ReLU(inplace=True) 83 | self.conv2 = conv3x3(64, 64) 84 | self.bn2 = BatchNorm2d(64) 85 | self.relu2 = nn.ReLU(inplace=True) 86 | self.conv3 = conv3x3(64, 128) 87 | self.bn3 = BatchNorm2d(128) 88 | self.relu3 = nn.ReLU(inplace=True) 89 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 90 | 91 | self.layer1 = self._make_layer(block, 128, layers[0], groups=groups) 92 | self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups) 93 | self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups) 94 | self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups) 95 | self.avgpool = nn.AvgPool2d(7, stride=1) 96 | self.fc = nn.Linear(1024 * block.expansion, num_classes) 97 | 98 | for m in self.modules(): 99 | if isinstance(m, nn.Conv2d): 100 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups 101 | m.weight.data.normal_(0, math.sqrt(2. / n)) 102 | elif isinstance(m, BatchNorm2d): 103 | m.weight.data.fill_(1) 104 | m.bias.data.zero_() 105 | 106 | def _make_layer(self, block, planes, blocks, stride=1, groups=1): 107 | downsample = None 108 | if stride != 1 or self.inplanes != planes * block.expansion: 109 | downsample = nn.Sequential( 110 | nn.Conv2d(self.inplanes, planes * block.expansion, 111 | kernel_size=1, stride=stride, bias=False), 112 | BatchNorm2d(planes * block.expansion), 113 | ) 114 | 115 | layers = [] 116 | layers.append(block(self.inplanes, planes, stride, groups, downsample)) 117 | self.inplanes = planes * block.expansion 118 | for i in range(1, blocks): 119 | layers.append(block(self.inplanes, planes, groups=groups)) 120 | 121 | return nn.Sequential(*layers) 122 | 123 | def forward(self, x): 124 | x = self.relu1(self.bn1(self.conv1(x))) 125 | x = self.relu2(self.bn2(self.conv2(x))) 126 | x = self.relu3(self.bn3(self.conv3(x))) 127 | x = self.maxpool(x) 128 | 129 | x = self.layer1(x) 130 | x = self.layer2(x) 131 | x = self.layer3(x) 132 | x = self.layer4(x) 133 | 134 | x = self.avgpool(x) 135 | x = x.view(x.size(0), -1) 136 | x = self.fc(x) 137 | 138 | return x 139 | 140 | 141 | def resnext101(pretrained=False, **kwargs): 142 | """Constructs a ResNet-101 model. 143 | Args: 144 | pretrained (bool): If True, returns a model pre-trained on Places 145 | """ 146 | model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs) 147 | if pretrained: 148 | model.load_state_dict(load_url(model_urls['resnext101']), strict=False) 149 | return model 150 | -------------------------------------------------------------------------------- /schp/networks/backbone/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : mobilenetv2.py 8 | @Time : 8/4/19 3:35 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import torch.nn as nn 15 | import math 16 | import functools 17 | 18 | from modules import InPlaceABN, InPlaceABNSync 19 | 20 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 21 | 22 | __all__ = ['mobilenetv2'] 23 | 24 | 25 | def conv_bn(inp, oup, stride): 26 | return nn.Sequential( 27 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 28 | BatchNorm2d(oup), 29 | nn.ReLU6(inplace=True) 30 | ) 31 | 32 | 33 | def conv_1x1_bn(inp, oup): 34 | return nn.Sequential( 35 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 36 | BatchNorm2d(oup), 37 | nn.ReLU6(inplace=True) 38 | ) 39 | 40 | 41 | class InvertedResidual(nn.Module): 42 | def __init__(self, inp, oup, stride, expand_ratio): 43 | super(InvertedResidual, self).__init__() 44 | self.stride = stride 45 | assert stride in [1, 2] 46 | 47 | hidden_dim = round(inp * expand_ratio) 48 | self.use_res_connect = self.stride == 1 and inp == oup 49 | 50 | if expand_ratio == 1: 51 | self.conv = nn.Sequential( 52 | # dw 53 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 54 | BatchNorm2d(hidden_dim), 55 | nn.ReLU6(inplace=True), 56 | # pw-linear 57 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 58 | BatchNorm2d(oup), 59 | ) 60 | else: 61 | self.conv = nn.Sequential( 62 | # pw 63 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 64 | BatchNorm2d(hidden_dim), 65 | nn.ReLU6(inplace=True), 66 | # dw 67 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 68 | BatchNorm2d(hidden_dim), 69 | nn.ReLU6(inplace=True), 70 | # pw-linear 71 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 72 | BatchNorm2d(oup), 73 | ) 74 | 75 | def forward(self, x): 76 | if self.use_res_connect: 77 | return x + self.conv(x) 78 | else: 79 | return self.conv(x) 80 | 81 | 82 | class MobileNetV2(nn.Module): 83 | def __init__(self, n_class=1000, input_size=224, width_mult=1.): 84 | super(MobileNetV2, self).__init__() 85 | block = InvertedResidual 86 | input_channel = 32 87 | last_channel = 1280 88 | interverted_residual_setting = [ 89 | # t, c, n, s 90 | [1, 16, 1, 1], 91 | [6, 24, 2, 2], # layer 2 92 | [6, 32, 3, 2], # layer 3 93 | [6, 64, 4, 2], 94 | [6, 96, 3, 1], # layer 4 95 | [6, 160, 3, 2], 96 | [6, 320, 1, 1], # layer 5 97 | ] 98 | 99 | # building first layer 100 | assert input_size % 32 == 0 101 | input_channel = int(input_channel * width_mult) 102 | self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel 103 | self.features = [conv_bn(3, input_channel, 2)] 104 | # building inverted residual blocks 105 | for t, c, n, s in interverted_residual_setting: 106 | output_channel = int(c * width_mult) 107 | for i in range(n): 108 | if i == 0: 109 | self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) 110 | else: 111 | self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) 112 | input_channel = output_channel 113 | # building last several layers 114 | self.features.append(conv_1x1_bn(input_channel, self.last_channel)) 115 | # make it nn.Sequential 116 | self.features = nn.Sequential(*self.features) 117 | 118 | # building classifier 119 | self.classifier = nn.Sequential( 120 | nn.Dropout(0.2), 121 | nn.Linear(self.last_channel, n_class), 122 | ) 123 | 124 | self._initialize_weights() 125 | 126 | def forward(self, x): 127 | x = self.features(x) 128 | x = x.mean(3).mean(2) 129 | x = self.classifier(x) 130 | return x 131 | 132 | def _initialize_weights(self): 133 | for m in self.modules(): 134 | if isinstance(m, nn.Conv2d): 135 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 136 | m.weight.data.normal_(0, math.sqrt(2. / n)) 137 | if m.bias is not None: 138 | m.bias.data.zero_() 139 | elif isinstance(m, BatchNorm2d): 140 | m.weight.data.fill_(1) 141 | m.bias.data.zero_() 142 | elif isinstance(m, nn.Linear): 143 | n = m.weight.size(1) 144 | m.weight.data.normal_(0, 0.01) 145 | m.bias.data.zero_() 146 | 147 | 148 | def mobilenetv2(pretrained=False, **kwargs): 149 | """Constructs a MobileNet_V2 model. 150 | Args: 151 | pretrained (bool): If True, returns a model pre-trained on ImageNet 152 | """ 153 | model = MobileNetV2(n_class=1000, **kwargs) 154 | if pretrained: 155 | model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False) 156 | return model 157 | -------------------------------------------------------------------------------- /schp/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import cv2 13 | import torch 14 | 15 | class BRG2Tensor_transform(object): 16 | def __call__(self, pic): 17 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 18 | if isinstance(img, torch.ByteTensor): 19 | return img.float() 20 | else: 21 | return img 22 | 23 | class BGR2RGB_transform(object): 24 | def __call__(self, tensor): 25 | return tensor[[2,1,0],:,:] 26 | 27 | def flip_back(output_flipped, matched_parts): 28 | ''' 29 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 30 | ''' 31 | assert output_flipped.ndim == 4,\ 32 | 'output_flipped should be [batch_size, num_joints, height, width]' 33 | 34 | output_flipped = output_flipped[:, :, :, ::-1] 35 | 36 | for pair in matched_parts: 37 | tmp = output_flipped[:, pair[0], :, :].copy() 38 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 39 | output_flipped[:, pair[1], :, :] = tmp 40 | 41 | return output_flipped 42 | 43 | 44 | def fliplr_joints(joints, joints_vis, width, matched_parts): 45 | """ 46 | flip coords 47 | """ 48 | # Flip horizontal 49 | joints[:, 0] = width - joints[:, 0] - 1 50 | 51 | # Change left-right parts 52 | for pair in matched_parts: 53 | joints[pair[0], :], joints[pair[1], :] = \ 54 | joints[pair[1], :], joints[pair[0], :].copy() 55 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 56 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 57 | 58 | return joints*joints_vis, joints_vis 59 | 60 | 61 | def transform_preds(coords, center, scale, input_size): 62 | target_coords = np.zeros(coords.shape) 63 | trans = get_affine_transform(center, scale, 0, input_size, inv=1) 64 | for p in range(coords.shape[0]): 65 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 66 | return target_coords 67 | 68 | def transform_parsing(pred, center, scale, width, height, input_size): 69 | 70 | trans = get_affine_transform(center, scale, 0, input_size, inv=1) 71 | target_pred = cv2.warpAffine( 72 | pred, 73 | trans, 74 | (int(width), int(height)), #(int(width), int(height)), 75 | flags=cv2.INTER_NEAREST, 76 | borderMode=cv2.BORDER_CONSTANT, 77 | borderValue=(0)) 78 | 79 | return target_pred 80 | 81 | def transform_logits(logits, center, scale, width, height, input_size): 82 | 83 | trans = get_affine_transform(center, scale, 0, input_size, inv=1) 84 | channel = logits.shape[2] 85 | target_logits = [] 86 | for i in range(channel): 87 | target_logit = cv2.warpAffine( 88 | logits[:,:,i], 89 | trans, 90 | (int(width), int(height)), #(int(width), int(height)), 91 | flags=cv2.INTER_LINEAR, 92 | borderMode=cv2.BORDER_CONSTANT, 93 | borderValue=(0)) 94 | target_logits.append(target_logit) 95 | target_logits = np.stack(target_logits,axis=2) 96 | 97 | return target_logits 98 | 99 | 100 | def get_affine_transform(center, 101 | scale, 102 | rot, 103 | output_size, 104 | shift=np.array([0, 0], dtype=np.float32), 105 | inv=0): 106 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 107 | print(scale) 108 | scale = np.array([scale, scale]) 109 | 110 | scale_tmp = scale 111 | 112 | src_w = scale_tmp[0] 113 | dst_w = output_size[1] 114 | dst_h = output_size[0] 115 | 116 | rot_rad = np.pi * rot / 180 117 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 118 | dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32) 119 | 120 | src = np.zeros((3, 2), dtype=np.float32) 121 | dst = np.zeros((3, 2), dtype=np.float32) 122 | src[0, :] = center + scale_tmp * shift 123 | src[1, :] = center + src_dir + scale_tmp * shift 124 | dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5] 125 | dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir 126 | 127 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 128 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 129 | 130 | if inv: 131 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 132 | else: 133 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 134 | 135 | return trans 136 | 137 | 138 | def affine_transform(pt, t): 139 | new_pt = np.array([pt[0], pt[1], 1.]).T 140 | new_pt = np.dot(t, new_pt) 141 | return new_pt[:2] 142 | 143 | 144 | def get_3rd_point(a, b): 145 | direct = a - b 146 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 147 | 148 | 149 | def get_dir(src_point, rot_rad): 150 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 151 | 152 | src_result = [0, 0] 153 | src_result[0] = src_point[0] * cs - src_point[1] * sn 154 | src_result[1] = src_point[0] * sn + src_point[1] * cs 155 | 156 | return src_result 157 | 158 | 159 | def crop(img, center, scale, output_size, rot=0): 160 | trans = get_affine_transform(center, scale, rot, output_size) 161 | 162 | dst_img = cv2.warpAffine(img, 163 | trans, 164 | (int(output_size[1]), int(output_size[0])), 165 | flags=cv2.INTER_LINEAR) 166 | 167 | return dst_img 168 | -------------------------------------------------------------------------------- /schp/utils/miou.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | 5 | from collections import OrderedDict 6 | from PIL import Image as PILImage 7 | from utils.transforms import transform_parsing 8 | 9 | LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \ 10 | 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 11 | 'Right-leg', 'Left-shoe', 'Right-shoe'] 12 | 13 | 14 | # LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'] 15 | 16 | def get_palette(num_cls): 17 | """ Returns the color map for visualizing the segmentation mask. 18 | Args: 19 | num_cls: Number of classes 20 | Returns: 21 | The color map 22 | """ 23 | 24 | n = num_cls 25 | palette = [0] * (n * 3) 26 | for j in range(0, n): 27 | lab = j 28 | palette[j * 3 + 0] = 0 29 | palette[j * 3 + 1] = 0 30 | palette[j * 3 + 2] = 0 31 | i = 0 32 | while lab: 33 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 34 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 35 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 36 | i += 1 37 | lab >>= 3 38 | return palette 39 | 40 | 41 | def get_confusion_matrix(gt_label, pred_label, num_classes): 42 | """ 43 | Calcute the confusion matrix by given label and pred 44 | :param gt_label: the ground truth label 45 | :param pred_label: the pred label 46 | :param num_classes: the nunber of class 47 | :return: the confusion matrix 48 | """ 49 | index = (gt_label * num_classes + pred_label).astype('int32') 50 | label_count = np.bincount(index) 51 | confusion_matrix = np.zeros((num_classes, num_classes)) 52 | 53 | for i_label in range(num_classes): 54 | for i_pred_label in range(num_classes): 55 | cur_index = i_label * num_classes + i_pred_label 56 | if cur_index < len(label_count): 57 | confusion_matrix[i_label, i_pred_label] = label_count[cur_index] 58 | 59 | return confusion_matrix 60 | 61 | 62 | def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'): 63 | val_file = os.path.join(datadir, dataset + '_id.txt') 64 | val_id = [i_id.strip() for i_id in open(val_file)] 65 | 66 | confusion_matrix = np.zeros((num_classes, num_classes)) 67 | 68 | for i, pred_out in enumerate(preds): 69 | im_name = val_id[i] 70 | gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png') 71 | gt = np.array(PILImage.open(gt_path)) 72 | h, w = gt.shape 73 | s = scales[i] 74 | c = centers[i] 75 | pred = transform_parsing(pred_out, c, s, w, h, input_size) 76 | 77 | gt = np.asarray(gt, dtype=np.int32) 78 | pred = np.asarray(pred, dtype=np.int32) 79 | 80 | ignore_index = gt != 255 81 | 82 | gt = gt[ignore_index] 83 | pred = pred[ignore_index] 84 | 85 | confusion_matrix += get_confusion_matrix(gt, pred, num_classes) 86 | 87 | pos = confusion_matrix.sum(1) 88 | res = confusion_matrix.sum(0) 89 | tp = np.diag(confusion_matrix) 90 | 91 | pixel_accuracy = (tp.sum() / pos.sum()) * 100 92 | mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100 93 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 94 | IoU_array = IoU_array * 100 95 | mean_IoU = IoU_array.mean() 96 | print('Pixel accuracy: %f \n' % pixel_accuracy) 97 | print('Mean accuracy: %f \n' % mean_accuracy) 98 | print('Mean IU: %f \n' % mean_IoU) 99 | name_value = [] 100 | 101 | for i, (label, iou) in enumerate(zip(LABELS, IoU_array)): 102 | name_value.append((label, iou)) 103 | 104 | name_value.append(('Pixel accuracy', pixel_accuracy)) 105 | name_value.append(('Mean accuracy', mean_accuracy)) 106 | name_value.append(('Mean IU', mean_IoU)) 107 | name_value = OrderedDict(name_value) 108 | return name_value 109 | 110 | 111 | def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'): 112 | list_path = os.path.join(datadir, dataset + '_id.txt') 113 | val_id = [i_id.strip() for i_id in open(list_path)] 114 | 115 | confusion_matrix = np.zeros((num_classes, num_classes)) 116 | 117 | for i, im_name in enumerate(val_id): 118 | gt_path = os.path.join(datadir, 'segmentations', im_name + '.png') 119 | gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE) 120 | 121 | pred_path = os.path.join(preds_dir, im_name + '.png') 122 | pred = np.asarray(PILImage.open(pred_path)) 123 | 124 | gt = np.asarray(gt, dtype=np.int32) 125 | pred = np.asarray(pred, dtype=np.int32) 126 | 127 | ignore_index = gt != 255 128 | 129 | gt = gt[ignore_index] 130 | pred = pred[ignore_index] 131 | 132 | confusion_matrix += get_confusion_matrix(gt, pred, num_classes) 133 | 134 | pos = confusion_matrix.sum(1) 135 | res = confusion_matrix.sum(0) 136 | tp = np.diag(confusion_matrix) 137 | 138 | pixel_accuracy = (tp.sum() / pos.sum()) * 100 139 | mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100 140 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 141 | IoU_array = IoU_array * 100 142 | mean_IoU = IoU_array.mean() 143 | print('Pixel accuracy: %f \n' % pixel_accuracy) 144 | print('Mean accuracy: %f \n' % mean_accuracy) 145 | print('Mean IU: %f \n' % mean_IoU) 146 | name_value = [] 147 | 148 | for i, (label, iou) in enumerate(zip(LABELS, IoU_array)): 149 | name_value.append((label, iou)) 150 | 151 | name_value.append(('Pixel accuracy', pixel_accuracy)) 152 | name_value.append(('Mean accuracy', mean_accuracy)) 153 | name_value.append(('Mean IU', mean_IoU)) 154 | name_value = OrderedDict(name_value) 155 | return name_value 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cozy Human Parser 2 | 3 | Fast, VRAM-light ComfyUI nodes to generate masks for specific body parts and clothes or fashion items. Runs on CPU and CUDA. 4 | Made with 💚 by the [CozyMantis](https://cozymantis.gumroad.com/) squad. 5 | 6 | | Original | ATR | LIP | Pascal | 7 | | --------------------- | ------------------------ | ------------------------ | ------------------------ | 8 | | ![](assets/demo2.jpg) | ![](assets/demo2atr.png) | ![](assets/demo2lip.png) | ![](assets/demo2pascal.png) | 9 | | ![](assets/demo3.jpg) | ![](assets/demo3atr.png) | ![](assets/demo3lip.png) | ![](assets/demo3pascal.png) | 10 | 11 | ## Installation 12 | 13 | - Clone this repository into your custom_nodes directory, then run `pip install -r requirements.txt` to install the required dependencies. 14 | - Copy the following models to the `models/schp` directory, depending on which parser you would like to use: 15 | - Model based on the LIP dataset: [Google Drive](https://drive.google.com/file/d/1k4dllHpu0bdx38J7H28rVVLpU-kOHmnH/view?usp=sharing) 16 | - Model based on the ATR dataset: [Google Drive](https://drive.google.com/file/d/1ruJg4lqR_jgQPj-9K0PP-L2vJERYOxLP/view?usp=sharing) 17 | - Model based on the Pascal dataset: [Google Drive](https://drive.google.com/file/d/1E5YwNKW2VOEayK9mWCS3Kpsxf-3z04ZE/view?usp=sharing) 18 | 19 | Check below for [Windows troubleshooting](#windows-troubleshooting). 20 | 21 | ## Examples 22 | 23 | ### LIP Parser 24 | 25 | - LIP is the largest single person human parsing dataset with 50000+ images. This dataset focuses on complicated real scenarios. 26 | - mIoU on LIP validation: 59.36 % 27 | - The LIP parser can detect the following categories: 28 | 29 | ``` 30 | ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses' 'Upper-clothes', 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe'] 31 | ``` 32 | 33 | ![assets/lipexample.png](assets/lipexample.png) 34 | 35 | ### ATR Parser 36 | 37 | - ATR is a large single person human parsing dataset with 17000+ images. This dataset focuses on fashion AI. 38 | - mIoU on ATR test: 82.29% 39 | - The ATR parser can detect the following categories: 40 | 41 | ``` 42 | ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt', 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf'] 43 | ``` 44 | 45 | ![assets/atrexample.png](assets/atrexample.png) 46 | 47 | ### Pascal Parser 48 | 49 | - Pascal Person Part is a tiny single person human parsing dataset with 3000+ images. This dataset focuses on body parts segmentation. 50 | - mIoU on Pascal-Person-Part validation: 71.46 % 51 | - The Pascal parser can detect the following categories: 52 | 53 | ``` 54 | ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'] 55 | ``` 56 | 57 | ![assets/pascalexample.png](assets/pascalexample.png) 58 | 59 | ## Windows Troubleshooting 60 | 61 | - ### Ninja is required to load C++ extensions 62 | 63 | Windows can't find the "ninja.exe" file. The file is probably getting downloaded/installed to something like `X:\path\to\comfy\python_embeded\lib\site-packages\ninja\data\bin`, but it's not properly getting added to the system path, so the OS can't invoke it. 64 | 65 | The solution is to: 66 | - locate the "ninja.exe" file; 67 | - add the full path to ninja.exe into the system PATH: 68 | - see https://www.mathworks.com/matlabcentral/answers/94933-how-do-i-edit-my-system-path-in-windows 69 | - remember, you need to enter the path to the folder containing the ninja.exe binary) 70 | - see [this issue](https://github.com/cozymantis/human-parser-comfyui-node/issues/3) for more details 71 | 72 | - ### NK1104: cannot open file 'python311.lib' (or similar) 73 | 74 | Windows can't locate the `python311.lib` library. You need to search for it on your system, then add the library's parent directory to the "LIB" environment variable. 75 | - see [this SO article](https://stackoverflow.com/questions/36419747/link-fatal-error-lnk1104-cannot-open-file-python27-lib) 76 | - also see [this issue](https://github.com/cozymantis/human-parser-comfyui-node/issues/1) 77 | 78 | - ### Command '['where', 'cl']' returned non-zero exit status 1 79 | 80 | Windows can't locate "cl.exe" which is the compiler/linker tool: https://learn.microsoft.com/en-us/cpp/build/reference/compiler-options?view=msvc-170 81 | 82 | > You can start this tool only from a Visual Studio developer command prompt. You cannot start it from a system command prompt or from File Explorer. For more information, see Use the MSVC toolset from the command line. 83 | 84 | First, make sure you've installed all of the things highlighted below: 85 | 86 | ![image](https://github.com/cozymantis/human-parser-comfyui-node/assets/5381731/76fbff32-be60-4120-a682-4fa7588e9bf4) 87 | 88 | Then, it looks like you'll need to start ComfyUI from the developer command prompt instead of the regular cmd. Here's docs on how to launch the dev command prompt: https://learn.microsoft.com/en-us/visualstudio/ide/reference/command-prompt-powershell?view=vs-2022 89 | 90 | You'll want to run something similar to: 91 | 92 | ```bash 93 | cd X:\path\to\comfy 94 | python main.py 95 | ``` 96 | 97 | - ### error: first parameter of allocation function must be of type "size_t" 98 | 99 | Make sure you're running the "x64 Native Tools Command Prompt" instead of the x86 one. Type "x64" in the start menu to locate it. 100 | 101 | ![image](https://github.com/cozymantis/human-parser-comfyui-node/assets/5381731/120f5a1b-adf3-4fb1-a3df-5c0006ce0a6e) 102 | 103 | ## Acknowledgements 104 | 105 | Based on the excellent paper ["Self-Correction for Human Parsing"](https://github.com/GoGoDuck912/Self-Correction-Human-Parsing) by Li, Peike and Xu, Yunqiu and Wei, Yunchao and Yang, Yi, and their original code that we've updated to also run on CPUs. 106 | 107 | ```bibtex 108 | @article{li2020self, 109 | title={Self-Correction for Human Parsing}, 110 | author={Li, Peike and Xu, Yunqiu and Wei, Yunchao and Yang, Yi}, 111 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 112 | year={2020}, 113 | doi={10.1109/TPAMI.2020.3048039}} 114 | ``` 115 | -------------------------------------------------------------------------------- /schp/utils/criterion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : criterion.py 8 | @Time : 8/30/19 8:59 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import torch.nn as nn 15 | import torch 16 | import numpy as np 17 | from torch.nn import functional as F 18 | from .lovasz_softmax import LovaszSoftmax 19 | from .kl_loss import KLDivergenceLoss 20 | from .consistency_loss import ConsistencyLoss 21 | 22 | NUM_CLASSES = 20 23 | 24 | 25 | class CriterionAll(nn.Module): 26 | def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1, 27 | num_classes=20): 28 | super(CriterionAll, self).__init__() 29 | self.ignore_index = ignore_index 30 | self.use_class_weight = use_class_weight 31 | self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index) 32 | self.lovasz = LovaszSoftmax(ignore_index=ignore_index) 33 | self.kldiv = KLDivergenceLoss(ignore_index=ignore_index) 34 | self.reg = ConsistencyLoss(ignore_index=ignore_index) 35 | self.lamda_1 = lambda_1 36 | self.lamda_2 = lambda_2 37 | self.lamda_3 = lambda_3 38 | self.num_classes = num_classes 39 | 40 | def parsing_loss(self, preds, target, cycle_n=None): 41 | """ 42 | Loss function definition. 43 | 44 | Args: 45 | preds: [[parsing result1, parsing result2],[edge result]] 46 | target: [parsing label, egde label] 47 | soft_preds: [[parsing result1, parsing result2],[edge result]] 48 | Returns: 49 | Calculated Loss. 50 | """ 51 | h, w = target[0].size(1), target[0].size(2) 52 | 53 | pos_num = torch.sum(target[1] == 1, dtype=torch.float) 54 | neg_num = torch.sum(target[1] == 0, dtype=torch.float) 55 | 56 | weight_pos = neg_num / (pos_num + neg_num) 57 | weight_neg = pos_num / (pos_num + neg_num) 58 | weights = torch.tensor([weight_neg, weight_pos]) # edge loss weight 59 | 60 | loss = 0 61 | 62 | # loss for segmentation 63 | preds_parsing = preds[0] 64 | for pred_parsing in preds_parsing: 65 | scale_pred = F.interpolate(input=pred_parsing, size=(h, w), 66 | mode='bilinear', align_corners=True) 67 | 68 | loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0]) 69 | if target[2] is None: 70 | loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0]) 71 | else: 72 | soft_scale_pred = F.interpolate(input=target[2], size=(h, w), 73 | mode='bilinear', align_corners=True) 74 | soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes), 75 | 1.0 / (cycle_n + 1.0)) 76 | loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0]) 77 | 78 | # loss for edge 79 | preds_edge = preds[1] 80 | for pred_edge in preds_edge: 81 | scale_pred = F.interpolate(input=pred_edge, size=(h, w), 82 | mode='bilinear', align_corners=True) 83 | if target[3] is None: 84 | loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1], 85 | weights.cuda(), ignore_index=self.ignore_index) 86 | else: 87 | soft_scale_edge = F.interpolate(input=target[3], size=(h, w), 88 | mode='bilinear', align_corners=True) 89 | soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2), 90 | 1.0 / (cycle_n + 1.0)) 91 | loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0]) 92 | 93 | # consistency regularization 94 | preds_parsing = preds[0] 95 | preds_edge = preds[1] 96 | for pred_parsing in preds_parsing: 97 | scale_pred = F.interpolate(input=pred_parsing, size=(h, w), 98 | mode='bilinear', align_corners=True) 99 | scale_edge = F.interpolate(input=preds_edge[0], size=(h, w), 100 | mode='bilinear', align_corners=True) 101 | loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0]) 102 | 103 | return loss 104 | 105 | def forward(self, preds, target, cycle_n=None): 106 | loss = self.parsing_loss(preds, target, cycle_n) 107 | return loss 108 | 109 | def _generate_weights(self, masks, num_classes): 110 | """ 111 | masks: torch.Tensor with shape [B, H, W] 112 | """ 113 | masks_label = masks.data.cpu().numpy().astype(np.int64) 114 | pixel_nums = [] 115 | tot_pixels = 0 116 | for i in range(num_classes): 117 | pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float) 118 | pixel_nums.append(pixel_num_of_cls_i) 119 | tot_pixels += pixel_num_of_cls_i 120 | weights = [] 121 | for i in range(num_classes): 122 | weights.append( 123 | (tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1) 124 | ) 125 | weights = np.array(weights, dtype=np.float) 126 | # weights = torch.from_numpy(weights).float().to(masks.device) 127 | return weights 128 | 129 | 130 | def moving_average(target1, target2, alpha=1.0): 131 | target = 0 132 | target += (1.0 - alpha) * target1 133 | target += target2 * alpha 134 | return target 135 | 136 | 137 | def to_one_hot(tensor, num_cls, dim=1, ignore_index=255): 138 | b, h, w = tensor.shape 139 | tensor[tensor == ignore_index] = 0 140 | onehot_tensor = torch.zeros(b, num_cls, h, w).cuda() 141 | onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1) 142 | return onehot_tensor 143 | -------------------------------------------------------------------------------- /schp/simple_extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : simple_extractor.py 8 | @Time : 8/30/19 8:59 PM 9 | @Desc : Simple Extractor 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import os 15 | import torch 16 | import argparse 17 | import numpy as np 18 | from PIL import Image 19 | from tqdm import tqdm 20 | 21 | from torch.utils.data import DataLoader 22 | import torchvision.transforms as transforms 23 | 24 | import networks 25 | from utils.transforms import transform_logits 26 | from datasets.simple_extractor_dataset import SimpleFolderDataset 27 | 28 | dataset_settings = { 29 | 'lip': { 30 | 'input_size': [473, 473], 31 | 'num_classes': 20, 32 | 'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', 33 | 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 34 | 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe'] 35 | }, 36 | 'atr': { 37 | 'input_size': [512, 512], 38 | 'num_classes': 18, 39 | 'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt', 40 | 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf'] 41 | }, 42 | 'pascal': { 43 | 'input_size': [512, 512], 44 | 'num_classes': 7, 45 | 'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'], 46 | } 47 | } 48 | 49 | 50 | def get_arguments(): 51 | """Parse all the arguments provided from the CLI. 52 | Returns: 53 | A list of parsed arguments. 54 | """ 55 | parser = argparse.ArgumentParser(description="Self Correction for Human Parsing") 56 | 57 | parser.add_argument("--dataset", type=str, default='lip', choices=['lip', 'atr', 'pascal']) 58 | parser.add_argument("--model-restore", type=str, default='', help="restore pretrained model parameters.") 59 | parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.") 60 | parser.add_argument("--input-dir", type=str, default='', help="path of input image folder.") 61 | parser.add_argument("--output-dir", type=str, default='', help="path of output image folder.") 62 | parser.add_argument("--logits", action='store_true', default=False, help="whether to save the logits.") 63 | 64 | return parser.parse_args() 65 | 66 | 67 | def get_palette(num_cls): 68 | """ Returns the color map for visualizing the segmentation mask. 69 | Args: 70 | num_cls: Number of classes 71 | Returns: 72 | The color map 73 | """ 74 | n = num_cls 75 | palette = [0] * (n * 3) 76 | for j in range(0, n): 77 | lab = j 78 | palette[j * 3 + 0] = 0 79 | palette[j * 3 + 1] = 0 80 | palette[j * 3 + 2] = 0 81 | i = 0 82 | while lab: 83 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 84 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 85 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 86 | i += 1 87 | lab >>= 3 88 | return palette 89 | 90 | 91 | def main(): 92 | args = get_arguments() 93 | 94 | gpus = [int(i) for i in args.gpu.split(',')] 95 | assert len(gpus) == 1 96 | if not args.gpu == 'None': 97 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 98 | 99 | num_classes = dataset_settings[args.dataset]['num_classes'] 100 | input_size = dataset_settings[args.dataset]['input_size'] 101 | label = dataset_settings[args.dataset]['label'] 102 | print("Evaluating total class number {} with {}".format(num_classes, label)) 103 | 104 | model = networks.init_model('resnet101', num_classes=num_classes, pretrained=None) 105 | 106 | state_dict = torch.load(args.model_restore)['state_dict'] 107 | from collections import OrderedDict 108 | new_state_dict = OrderedDict() 109 | for k, v in state_dict.items(): 110 | name = k[7:] # remove `module.` 111 | new_state_dict[name] = v 112 | model.load_state_dict(new_state_dict) 113 | model.cuda() 114 | model.eval() 115 | 116 | transform = transforms.Compose([ 117 | transforms.ToTensor(), 118 | transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229]) 119 | ]) 120 | dataset = SimpleFolderDataset(root=args.input_dir, input_size=input_size, transform=transform) 121 | dataloader = DataLoader(dataset) 122 | 123 | if not os.path.exists(args.output_dir): 124 | os.makedirs(args.output_dir) 125 | 126 | palette = get_palette(num_classes) 127 | with torch.no_grad(): 128 | for idx, batch in enumerate(tqdm(dataloader)): 129 | image, meta = batch 130 | img_name = meta['name'][0] 131 | c = meta['center'].numpy()[0] 132 | s = meta['scale'].numpy()[0] 133 | w = meta['width'].numpy()[0] 134 | h = meta['height'].numpy()[0] 135 | 136 | output = model(image.cuda()) 137 | upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True) 138 | upsample_output = upsample(output[0][-1][0].unsqueeze(0)) 139 | upsample_output = upsample_output.squeeze() 140 | upsample_output = upsample_output.permute(1, 2, 0) # CHW -> HWC 141 | 142 | logits_result = transform_logits(upsample_output.data.cpu().numpy(), c, s, w, h, input_size=input_size) 143 | parsing_result = np.argmax(logits_result, axis=2) 144 | parsing_result_path = os.path.join(args.output_dir, img_name[:-4] + '.png') 145 | output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8)) 146 | output_img.putpalette(palette) 147 | output_img.save(parsing_result_path) 148 | if args.logits: 149 | logits_result_path = os.path.join(args.output_dir, img_name[:-4] + '.npy') 150 | np.save(logits_result_path, logits_result) 151 | return 152 | 153 | 154 | if __name__ == '__main__': 155 | main() 156 | -------------------------------------------------------------------------------- /schp/README.md: -------------------------------------------------------------------------------- 1 | # Self Correction for Human Parsing 2 | 3 | ![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg) 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) 5 | 6 | An out-of-box human parsing representation extractor. 7 | 8 | Our solution ranks 1st for all human parsing tracks (including single, multiple and video) in the third LIP challenge! 9 | 10 | ![lip-visualization](./demo/lip-visualization.jpg) 11 | 12 | Features: 13 | - [x] Out-of-box human parsing extractor for other downstream applications. 14 | - [x] Pretrained model on three popular single person human parsing datasets. 15 | - [x] Training and inferecne code. 16 | - [x] Simple yet effective extension on multi-person and video human parsing tasks. 17 | 18 | ## Requirements 19 | 20 | ``` 21 | conda env create -f environment.yaml 22 | conda activate schp 23 | pip install -r requirements.txt 24 | ``` 25 | 26 | ## Simple Out-of-Box Extractor 27 | 28 | The easiest way to get started is to use our trained SCHP models on your own images to extract human parsing representations. Here we provided state-of-the-art [trained models](https://drive.google.com/drive/folders/1uOaQCpNtosIjEL2phQKEdiYd0Td18jNo?usp=sharing) on three popular datasets. Theses three datasets have different label system, you can choose the best one to fit on your own task. 29 | 30 | **LIP** ([exp-schp-201908261155-lip.pth](https://drive.google.com/file/d/1k4dllHpu0bdx38J7H28rVVLpU-kOHmnH/view?usp=sharing)) 31 | 32 | * mIoU on LIP validation: **59.36 %**. 33 | 34 | * LIP is the largest single person human parsing dataset with 50000+ images. This dataset focus more on the complicated real scenarios. LIP has 20 labels, including 'Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe'. 35 | 36 | **ATR** ([exp-schp-201908301523-atr.pth](https://drive.google.com/file/d/1ruJg4lqR_jgQPj-9K0PP-L2vJERYOxLP/view?usp=sharing)) 37 | 38 | * mIoU on ATR test: **82.29%**. 39 | 40 | * ATR is a large single person human parsing dataset with 17000+ images. This dataset focus more on fashion AI. ATR has 18 labels, including 'Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt', 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf'. 41 | 42 | **Pascal-Person-Part** ([exp-schp-201908270938-pascal-person-part.pth](https://drive.google.com/file/d/1E5YwNKW2VOEayK9mWCS3Kpsxf-3z04ZE/view?usp=sharing)) 43 | 44 | * mIoU on Pascal-Person-Part validation: **71.46** %. 45 | 46 | * Pascal Person Part is a tiny single person human parsing dataset with 3000+ images. This dataset focus more on body parts segmentation. Pascal Person Part has 7 labels, including 'Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'. 47 | 48 | Choose one and have fun on your own task! 49 | 50 | To extract the human parsing representation, simply put your own image in the `INPUT_PATH` folder, then download a pretrained model and run the following command. The output images with the same file name will be saved in `OUTPUT_PATH` 51 | 52 | ``` 53 | python simple_extractor.py --dataset [DATASET] --model-restore [CHECKPOINT_PATH] --input-dir [INPUT_PATH] --output-dir [OUTPUT_PATH] 54 | ``` 55 | 56 | **[Updated]** Here is also a [colab demo example](https://colab.research.google.com/drive/1JOwOPaChoc9GzyBi5FUEYTSaP2qxJl10?usp=sharing) for quick inference provided by [@levindabhi](https://github.com/levindabhi). 57 | 58 | The `DATASET` command has three options, including 'lip', 'atr' and 'pascal'. Note each pixel in the output images denotes the predicted label number. The output images have the same size as the input ones. To better visualization, we put a palette with the output images. We suggest you to read the image with `PIL`. 59 | 60 | If you need not only the final parsing images, but also the feature map representations. Add `--logits` command to save the output feature maps. These feature maps are the logits before softmax layer. 61 | 62 | ## Dataset Preparation 63 | 64 | Please download the [LIP](http://sysu-hcp.net/lip/) dataset following the below structure. 65 | 66 | ```commandline 67 | data/LIP 68 | |--- train_imgaes # 30462 training single person images 69 | |--- val_images # 10000 validation single person images 70 | |--- train_segmentations # 30462 training annotations 71 | |--- val_segmentations # 10000 training annotations 72 | |--- train_id.txt # training image list 73 | |--- val_id.txt # validation image list 74 | ``` 75 | 76 | ## Training 77 | 78 | ``` 79 | python train.py 80 | ``` 81 | By default, the trained model will be saved in `./log` directory. Please read the arguments for more details. 82 | 83 | ## Evaluation 84 | ``` 85 | python evaluate.py --model-restore [CHECKPOINT_PATH] 86 | ``` 87 | CHECKPOINT_PATH should be the path of trained model. 88 | 89 | ## Extension on Multiple Human Parsing 90 | 91 | Please read [MultipleHumanParsing.md](./mhp_extension/README.md) for more details. 92 | 93 | ## Citation 94 | 95 | Please cite our work if you find this repo useful in your research. 96 | 97 | ```latex 98 | @article{li2020self, 99 | title={Self-Correction for Human Parsing}, 100 | author={Li, Peike and Xu, Yunqiu and Wei, Yunchao and Yang, Yi}, 101 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 102 | year={2020}, 103 | doi={10.1109/TPAMI.2020.3048039}} 104 | ``` 105 | 106 | ## Visualization 107 | 108 | * Source Image. 109 | ![demo](./demo/demo.jpg) 110 | * LIP Parsing Result. 111 | ![demo-lip](./demo/demo_lip.png) 112 | * ATR Parsing Result. 113 | ![demo-atr](./demo/demo_atr.png) 114 | * Pascal-Person-Part Parsing Result. 115 | ![demo-pascal](./demo/demo_pascal.png) 116 | * Source Image. 117 | ![demo](./mhp_extension/demo/demo.jpg) 118 | * Instance Human Mask. 119 | ![demo-lip](./mhp_extension/demo/demo_instance_human_mask.png) 120 | * Global Human Parsing Result. 121 | ![demo-lip](./mhp_extension/demo/demo_global_human_parsing.png) 122 | * Multiple Human Parsing Result. 123 | ![demo-lip](./mhp_extension/demo/demo_multiple_human_parsing.png) 124 | 125 | 126 | ## Related 127 | Our code adopts the [InplaceSyncBN](https://github.com/mapillary/inplace_abn) to save gpu memory cost. 128 | 129 | There is also a [PaddlePaddle](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/ACE2P) Implementation of this project. 130 | -------------------------------------------------------------------------------- /schp/networks/backbone/resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : resnet.py 8 | @Time : 8/4/19 3:35 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import functools 15 | import torch.nn as nn 16 | import math 17 | from torch.utils.model_zoo import load_url 18 | 19 | from modules import InPlaceABNSync 20 | 21 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 22 | 23 | __all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101'] # resnet101 is coming soon! 24 | 25 | model_urls = { 26 | 'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth', 27 | 'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth', 28 | 'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth' 29 | } 30 | 31 | 32 | def conv3x3(in_planes, out_planes, stride=1): 33 | "3x3 convolution with padding" 34 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 35 | padding=1, bias=False) 36 | 37 | 38 | class BasicBlock(nn.Module): 39 | expansion = 1 40 | 41 | def __init__(self, inplanes, planes, stride=1, downsample=None): 42 | super(BasicBlock, self).__init__() 43 | self.conv1 = conv3x3(inplanes, planes, stride) 44 | self.bn1 = BatchNorm2d(planes) 45 | self.relu = nn.ReLU(inplace=True) 46 | self.conv2 = conv3x3(planes, planes) 47 | self.bn2 = BatchNorm2d(planes) 48 | self.downsample = downsample 49 | self.stride = stride 50 | 51 | def forward(self, x): 52 | residual = x 53 | 54 | out = self.conv1(x) 55 | out = self.bn1(out) 56 | out = self.relu(out) 57 | 58 | out = self.conv2(out) 59 | out = self.bn2(out) 60 | 61 | if self.downsample is not None: 62 | residual = self.downsample(x) 63 | 64 | out += residual 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class Bottleneck(nn.Module): 71 | expansion = 4 72 | 73 | def __init__(self, inplanes, planes, stride=1, downsample=None): 74 | super(Bottleneck, self).__init__() 75 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 76 | self.bn1 = BatchNorm2d(planes) 77 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 78 | padding=1, bias=False) 79 | self.bn2 = BatchNorm2d(planes) 80 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 81 | self.bn3 = BatchNorm2d(planes * 4) 82 | self.relu = nn.ReLU(inplace=True) 83 | self.downsample = downsample 84 | self.stride = stride 85 | 86 | def forward(self, x): 87 | residual = x 88 | 89 | out = self.conv1(x) 90 | out = self.bn1(out) 91 | out = self.relu(out) 92 | 93 | out = self.conv2(out) 94 | out = self.bn2(out) 95 | out = self.relu(out) 96 | 97 | out = self.conv3(out) 98 | out = self.bn3(out) 99 | 100 | if self.downsample is not None: 101 | residual = self.downsample(x) 102 | 103 | out += residual 104 | out = self.relu(out) 105 | 106 | return out 107 | 108 | 109 | class ResNet(nn.Module): 110 | 111 | def __init__(self, block, layers, num_classes=1000): 112 | self.inplanes = 128 113 | super(ResNet, self).__init__() 114 | self.conv1 = conv3x3(3, 64, stride=2) 115 | self.bn1 = BatchNorm2d(64) 116 | self.relu1 = nn.ReLU(inplace=True) 117 | self.conv2 = conv3x3(64, 64) 118 | self.bn2 = BatchNorm2d(64) 119 | self.relu2 = nn.ReLU(inplace=True) 120 | self.conv3 = conv3x3(64, 128) 121 | self.bn3 = BatchNorm2d(128) 122 | self.relu3 = nn.ReLU(inplace=True) 123 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 124 | 125 | self.layer1 = self._make_layer(block, 64, layers[0]) 126 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 127 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 128 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 129 | self.avgpool = nn.AvgPool2d(7, stride=1) 130 | self.fc = nn.Linear(512 * block.expansion, num_classes) 131 | 132 | for m in self.modules(): 133 | if isinstance(m, nn.Conv2d): 134 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 135 | m.weight.data.normal_(0, math.sqrt(2. / n)) 136 | elif isinstance(m, BatchNorm2d): 137 | m.weight.data.fill_(1) 138 | m.bias.data.zero_() 139 | 140 | def _make_layer(self, block, planes, blocks, stride=1): 141 | downsample = None 142 | if stride != 1 or self.inplanes != planes * block.expansion: 143 | downsample = nn.Sequential( 144 | nn.Conv2d(self.inplanes, planes * block.expansion, 145 | kernel_size=1, stride=stride, bias=False), 146 | BatchNorm2d(planes * block.expansion), 147 | ) 148 | 149 | layers = [] 150 | layers.append(block(self.inplanes, planes, stride, downsample)) 151 | self.inplanes = planes * block.expansion 152 | for i in range(1, blocks): 153 | layers.append(block(self.inplanes, planes)) 154 | 155 | return nn.Sequential(*layers) 156 | 157 | def forward(self, x): 158 | x = self.relu1(self.bn1(self.conv1(x))) 159 | x = self.relu2(self.bn2(self.conv2(x))) 160 | x = self.relu3(self.bn3(self.conv3(x))) 161 | x = self.maxpool(x) 162 | 163 | x = self.layer1(x) 164 | x = self.layer2(x) 165 | x = self.layer3(x) 166 | x = self.layer4(x) 167 | 168 | x = self.avgpool(x) 169 | x = x.view(x.size(0), -1) 170 | x = self.fc(x) 171 | 172 | return x 173 | 174 | 175 | def resnet18(pretrained=False, **kwargs): 176 | """Constructs a ResNet-18 model. 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 181 | if pretrained: 182 | model.load_state_dict(load_url(model_urls['resnet18'])) 183 | return model 184 | 185 | 186 | def resnet50(pretrained=False, **kwargs): 187 | """Constructs a ResNet-50 model. 188 | Args: 189 | pretrained (bool): If True, returns a model pre-trained on ImageNet 190 | """ 191 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 192 | if pretrained: 193 | model.load_state_dict(load_url(model_urls['resnet50']), strict=False) 194 | return model 195 | 196 | 197 | def resnet101(pretrained=False, **kwargs): 198 | """Constructs a ResNet-101 model. 199 | Args: 200 | pretrained (bool): If True, returns a model pre-trained on ImageNet 201 | """ 202 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 203 | if pretrained: 204 | model.load_state_dict(load_url(model_urls['resnet101']), strict=False) 205 | return model 206 | -------------------------------------------------------------------------------- /schp/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : datasets.py 8 | @Time : 8/4/19 3:35 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import os 15 | import numpy as np 16 | import random 17 | import torch 18 | import cv2 19 | from torch.utils import data 20 | from utils.transforms import get_affine_transform 21 | 22 | 23 | class LIPDataSet(data.Dataset): 24 | def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25, 25 | rotation_factor=30, ignore_label=255, transform=None): 26 | self.root = root 27 | self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0] 28 | self.crop_size = np.asarray(crop_size) 29 | self.ignore_label = ignore_label 30 | self.scale_factor = scale_factor 31 | self.rotation_factor = rotation_factor 32 | self.flip_prob = 0.5 33 | self.transform = transform 34 | self.dataset = dataset 35 | 36 | list_path = os.path.join(self.root, self.dataset + '_id.txt') 37 | train_list = [i_id.strip() for i_id in open(list_path)] 38 | 39 | self.train_list = train_list 40 | self.number_samples = len(self.train_list) 41 | 42 | def __len__(self): 43 | return self.number_samples 44 | 45 | def _box2cs(self, box): 46 | x, y, w, h = box[:4] 47 | return self._xywh2cs(x, y, w, h) 48 | 49 | def _xywh2cs(self, x, y, w, h): 50 | center = np.zeros((2), dtype=np.float32) 51 | center[0] = x + w * 0.5 52 | center[1] = y + h * 0.5 53 | if w > self.aspect_ratio * h: 54 | h = w * 1.0 / self.aspect_ratio 55 | elif w < self.aspect_ratio * h: 56 | w = h * self.aspect_ratio 57 | scale = np.array([w * 1.0, h * 1.0], dtype=np.float32) 58 | return center, scale 59 | 60 | def __getitem__(self, index): 61 | train_item = self.train_list[index] 62 | 63 | im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg') 64 | parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png') 65 | 66 | im = cv2.imread(im_path, cv2.IMREAD_COLOR) 67 | h, w, _ = im.shape 68 | parsing_anno = np.zeros((h, w), dtype=np.long) 69 | 70 | # Get person center and scale 71 | person_center, s = self._box2cs([0, 0, w - 1, h - 1]) 72 | r = 0 73 | 74 | if self.dataset != 'test': 75 | # Get pose annotation 76 | parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) 77 | if self.dataset == 'train' or self.dataset == 'trainval': 78 | sf = self.scale_factor 79 | rf = self.rotation_factor 80 | s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) 81 | r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 82 | 83 | if random.random() <= self.flip_prob: 84 | im = im[:, ::-1, :] 85 | parsing_anno = parsing_anno[:, ::-1] 86 | person_center[0] = im.shape[1] - person_center[0] - 1 87 | right_idx = [15, 17, 19] 88 | left_idx = [14, 16, 18] 89 | for i in range(0, 3): 90 | right_pos = np.where(parsing_anno == right_idx[i]) 91 | left_pos = np.where(parsing_anno == left_idx[i]) 92 | parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] 93 | parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] 94 | 95 | trans = get_affine_transform(person_center, s, r, self.crop_size) 96 | input = cv2.warpAffine( 97 | im, 98 | trans, 99 | (int(self.crop_size[1]), int(self.crop_size[0])), 100 | flags=cv2.INTER_LINEAR, 101 | borderMode=cv2.BORDER_CONSTANT, 102 | borderValue=(0, 0, 0)) 103 | 104 | if self.transform: 105 | input = self.transform(input) 106 | 107 | meta = { 108 | 'name': train_item, 109 | 'center': person_center, 110 | 'height': h, 111 | 'width': w, 112 | 'scale': s, 113 | 'rotation': r 114 | } 115 | 116 | if self.dataset == 'val' or self.dataset == 'test': 117 | return input, meta 118 | else: 119 | label_parsing = cv2.warpAffine( 120 | parsing_anno, 121 | trans, 122 | (int(self.crop_size[1]), int(self.crop_size[0])), 123 | flags=cv2.INTER_NEAREST, 124 | borderMode=cv2.BORDER_CONSTANT, 125 | borderValue=(255)) 126 | 127 | label_parsing = torch.from_numpy(label_parsing) 128 | 129 | return input, label_parsing, meta 130 | 131 | 132 | class LIPDataValSet(data.Dataset): 133 | def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False): 134 | self.root = root 135 | self.crop_size = crop_size 136 | self.transform = transform 137 | self.flip = flip 138 | self.dataset = dataset 139 | self.root = root 140 | self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0] 141 | self.crop_size = np.asarray(crop_size) 142 | 143 | list_path = os.path.join(self.root, self.dataset + '_id.txt') 144 | val_list = [i_id.strip() for i_id in open(list_path)] 145 | 146 | self.val_list = val_list 147 | self.number_samples = len(self.val_list) 148 | 149 | def __len__(self): 150 | return len(self.val_list) 151 | 152 | def _box2cs(self, box): 153 | x, y, w, h = box[:4] 154 | return self._xywh2cs(x, y, w, h) 155 | 156 | def _xywh2cs(self, x, y, w, h): 157 | center = np.zeros((2), dtype=np.float32) 158 | center[0] = x + w * 0.5 159 | center[1] = y + h * 0.5 160 | if w > self.aspect_ratio * h: 161 | h = w * 1.0 / self.aspect_ratio 162 | elif w < self.aspect_ratio * h: 163 | w = h * self.aspect_ratio 164 | scale = np.array([w * 1.0, h * 1.0], dtype=np.float32) 165 | 166 | return center, scale 167 | 168 | def __getitem__(self, index): 169 | val_item = self.val_list[index] 170 | # Load training image 171 | im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg') 172 | im = cv2.imread(im_path, cv2.IMREAD_COLOR) 173 | h, w, _ = im.shape 174 | # Get person center and scale 175 | person_center, s = self._box2cs([0, 0, w - 1, h - 1]) 176 | r = 0 177 | trans = get_affine_transform(person_center, s, r, self.crop_size) 178 | input = cv2.warpAffine( 179 | im, 180 | trans, 181 | (int(self.crop_size[1]), int(self.crop_size[0])), 182 | flags=cv2.INTER_LINEAR, 183 | borderMode=cv2.BORDER_CONSTANT, 184 | borderValue=(0, 0, 0)) 185 | input = self.transform(input) 186 | flip_input = input.flip(dims=[-1]) 187 | if self.flip: 188 | batch_input_im = torch.stack([input, flip_input]) 189 | else: 190 | batch_input_im = input 191 | 192 | meta = { 193 | 'name': val_item, 194 | 'center': person_center, 195 | 'height': h, 196 | 'width': w, 197 | 'scale': s, 198 | 'rotation': r 199 | } 200 | 201 | return batch_input_im, meta 202 | -------------------------------------------------------------------------------- /schp/utils/encoding.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Data Parallel""" 12 | import threading 13 | import functools 14 | import torch 15 | from torch.autograd import Variable, Function 16 | import torch.cuda.comm as comm 17 | from torch.nn.parallel.data_parallel import DataParallel 18 | from torch.nn.parallel.parallel_apply import get_a_var 19 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 20 | 21 | torch_ver = torch.__version__[:3] 22 | 23 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback'] 24 | 25 | def allreduce(*inputs): 26 | """Cross GPU all reduce autograd operation for calculate mean and 27 | variance in SyncBN. 28 | """ 29 | return AllReduce.apply(*inputs) 30 | 31 | class AllReduce(Function): 32 | @staticmethod 33 | def forward(ctx, num_inputs, *inputs): 34 | ctx.num_inputs = num_inputs 35 | ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)] 36 | inputs = [inputs[i:i + num_inputs] 37 | for i in range(0, len(inputs), num_inputs)] 38 | # sort before reduce sum 39 | inputs = sorted(inputs, key=lambda i: i[0].get_device()) 40 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 41 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 42 | return tuple([t for tensors in outputs for t in tensors]) 43 | 44 | @staticmethod 45 | def backward(ctx, *inputs): 46 | inputs = [i.data for i in inputs] 47 | inputs = [inputs[i:i + ctx.num_inputs] 48 | for i in range(0, len(inputs), ctx.num_inputs)] 49 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 50 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 51 | return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors]) 52 | 53 | class Reduce(Function): 54 | @staticmethod 55 | def forward(ctx, *inputs): 56 | ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))] 57 | inputs = sorted(inputs, key=lambda i: i.get_device()) 58 | return comm.reduce_add(inputs) 59 | 60 | @staticmethod 61 | def backward(ctx, gradOutput): 62 | return Broadcast.apply(ctx.target_gpus, gradOutput) 63 | 64 | 65 | class DataParallelModel(DataParallel): 66 | """Implements data parallelism at the module level. 67 | 68 | This container parallelizes the application of the given module by 69 | splitting the input across the specified devices by chunking in the 70 | batch dimension. 71 | In the forward pass, the module is replicated on each device, 72 | and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module. 73 | Note that the outputs are not gathered, please use compatible 74 | :class:`encoding.parallel.DataParallelCriterion`. 75 | 76 | The batch size should be larger than the number of GPUs used. It should 77 | also be an integer multiple of the number of GPUs so that each chunk is 78 | the same size (so that each GPU processes the same number of samples). 79 | 80 | Args: 81 | module: module to be parallelized 82 | device_ids: CUDA devices (default: all devices) 83 | 84 | Reference: 85 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 86 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 87 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 88 | 89 | Example:: 90 | 91 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 92 | >>> y = net(x) 93 | """ 94 | def gather(self, outputs, output_device): 95 | return outputs 96 | 97 | def replicate(self, module, device_ids): 98 | modules = super(DataParallelModel, self).replicate(module, device_ids) 99 | return modules 100 | 101 | 102 | class DataParallelCriterion(DataParallel): 103 | """ 104 | Calculate loss in multiple-GPUs, which balance the memory usage for 105 | Semantic Segmentation. 106 | 107 | The targets are splitted across the specified devices by chunking in 108 | the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`. 109 | 110 | Reference: 111 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 112 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 113 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 114 | 115 | Example:: 116 | 117 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 118 | >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2]) 119 | >>> y = net(x) 120 | >>> loss = criterion(y, target) 121 | """ 122 | def forward(self, inputs, *targets, **kwargs): 123 | # input should be already scatterd 124 | # scattering the targets instead 125 | if not self.device_ids: 126 | return self.module(inputs, *targets, **kwargs) 127 | targets, kwargs = self.scatter(targets, kwargs, self.device_ids) 128 | if len(self.device_ids) == 1: 129 | return self.module(inputs, *targets[0], **kwargs[0]) 130 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 131 | outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs) 132 | return Reduce.apply(*outputs) / len(outputs) 133 | 134 | 135 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None): 136 | assert len(modules) == len(inputs) 137 | assert len(targets) == len(inputs) 138 | if kwargs_tup: 139 | assert len(modules) == len(kwargs_tup) 140 | else: 141 | kwargs_tup = ({},) * len(modules) 142 | if devices is not None: 143 | assert len(modules) == len(devices) 144 | else: 145 | devices = [None] * len(modules) 146 | 147 | lock = threading.Lock() 148 | results = {} 149 | if torch_ver != "0.3": 150 | grad_enabled = torch.is_grad_enabled() 151 | 152 | def _worker(i, module, input, target, kwargs, device=None): 153 | if torch_ver != "0.3": 154 | torch.set_grad_enabled(grad_enabled) 155 | if device is None: 156 | device = get_a_var(input).get_device() 157 | try: 158 | if not isinstance(input, tuple): 159 | input = (input,) 160 | with torch.cuda.device(device): 161 | output = module(*(input + target), **kwargs) 162 | with lock: 163 | results[i] = output 164 | except Exception as e: 165 | with lock: 166 | results[i] = e 167 | 168 | if len(modules) > 1: 169 | threads = [threading.Thread(target=_worker, 170 | args=(i, module, input, target, 171 | kwargs, device),) 172 | for i, (module, input, target, kwargs, device) in 173 | enumerate(zip(modules, inputs, targets, kwargs_tup, devices))] 174 | 175 | for thread in threads: 176 | thread.start() 177 | for thread in threads: 178 | thread.join() 179 | else: 180 | _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0]) 181 | 182 | outputs = [] 183 | for i in range(len(inputs)): 184 | output = results[i] 185 | if isinstance(output, Exception): 186 | raise output 187 | outputs.append(output) 188 | return outputs 189 | -------------------------------------------------------------------------------- /schp/modules/residual.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.nn as nn 4 | 5 | from .bn import ABN, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 6 | import torch.nn.functional as functional 7 | 8 | 9 | class ResidualBlock(nn.Module): 10 | """Configurable residual block 11 | 12 | Parameters 13 | ---------- 14 | in_channels : int 15 | Number of input channels. 16 | channels : list of int 17 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct 18 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then 19 | `3 x 3` then `1 x 1` convolutions. 20 | stride : int 21 | Stride of the first `3 x 3` convolution 22 | dilation : int 23 | Dilation to apply to the `3 x 3` convolutions. 24 | groups : int 25 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with 26 | bottleneck blocks. 27 | norm_act : callable 28 | Function to create normalization / activation Module. 29 | dropout: callable 30 | Function to create Dropout Module. 31 | """ 32 | 33 | def __init__(self, 34 | in_channels, 35 | channels, 36 | stride=1, 37 | dilation=1, 38 | groups=1, 39 | norm_act=ABN, 40 | dropout=None): 41 | super(ResidualBlock, self).__init__() 42 | 43 | # Check parameters for inconsistencies 44 | if len(channels) != 2 and len(channels) != 3: 45 | raise ValueError("channels must contain either two or three values") 46 | if len(channels) == 2 and groups != 1: 47 | raise ValueError("groups > 1 are only valid if len(channels) == 3") 48 | 49 | is_bottleneck = len(channels) == 3 50 | need_proj_conv = stride != 1 or in_channels != channels[-1] 51 | 52 | if not is_bottleneck: 53 | bn2 = norm_act(channels[1]) 54 | bn2.activation = ACT_NONE 55 | layers = [ 56 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, 57 | dilation=dilation)), 58 | ("bn1", norm_act(channels[0])), 59 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 60 | dilation=dilation)), 61 | ("bn2", bn2) 62 | ] 63 | if dropout is not None: 64 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:] 65 | else: 66 | bn3 = norm_act(channels[2]) 67 | bn3.activation = ACT_NONE 68 | layers = [ 69 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=1, padding=0, bias=False)), 70 | ("bn1", norm_act(channels[0])), 71 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=stride, padding=dilation, bias=False, 72 | groups=groups, dilation=dilation)), 73 | ("bn2", norm_act(channels[1])), 74 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)), 75 | ("bn3", bn3) 76 | ] 77 | if dropout is not None: 78 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:] 79 | self.convs = nn.Sequential(OrderedDict(layers)) 80 | 81 | if need_proj_conv: 82 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) 83 | self.proj_bn = norm_act(channels[-1]) 84 | self.proj_bn.activation = ACT_NONE 85 | 86 | def forward(self, x): 87 | if hasattr(self, "proj_conv"): 88 | residual = self.proj_conv(x) 89 | residual = self.proj_bn(residual) 90 | else: 91 | residual = x 92 | x = self.convs(x) + residual 93 | 94 | if self.convs.bn1.activation == ACT_LEAKY_RELU: 95 | return functional.leaky_relu(x, negative_slope=self.convs.bn1.slope, inplace=True) 96 | elif self.convs.bn1.activation == ACT_ELU: 97 | return functional.elu(x, inplace=True) 98 | else: 99 | return x 100 | 101 | 102 | class IdentityResidualBlock(nn.Module): 103 | def __init__(self, 104 | in_channels, 105 | channels, 106 | stride=1, 107 | dilation=1, 108 | groups=1, 109 | norm_act=ABN, 110 | dropout=None): 111 | """Configurable identity-mapping residual block 112 | 113 | Parameters 114 | ---------- 115 | in_channels : int 116 | Number of input channels. 117 | channels : list of int 118 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct 119 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then 120 | `3 x 3` then `1 x 1` convolutions. 121 | stride : int 122 | Stride of the first `3 x 3` convolution 123 | dilation : int 124 | Dilation to apply to the `3 x 3` convolutions. 125 | groups : int 126 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with 127 | bottleneck blocks. 128 | norm_act : callable 129 | Function to create normalization / activation Module. 130 | dropout: callable 131 | Function to create Dropout Module. 132 | """ 133 | super(IdentityResidualBlock, self).__init__() 134 | 135 | # Check parameters for inconsistencies 136 | if len(channels) != 2 and len(channels) != 3: 137 | raise ValueError("channels must contain either two or three values") 138 | if len(channels) == 2 and groups != 1: 139 | raise ValueError("groups > 1 are only valid if len(channels) == 3") 140 | 141 | is_bottleneck = len(channels) == 3 142 | need_proj_conv = stride != 1 or in_channels != channels[-1] 143 | 144 | self.bn1 = norm_act(in_channels) 145 | if not is_bottleneck: 146 | layers = [ 147 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, 148 | dilation=dilation)), 149 | ("bn2", norm_act(channels[0])), 150 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 151 | dilation=dilation)) 152 | ] 153 | if dropout is not None: 154 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:] 155 | else: 156 | layers = [ 157 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), 158 | ("bn2", norm_act(channels[0])), 159 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 160 | groups=groups, dilation=dilation)), 161 | ("bn3", norm_act(channels[1])), 162 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) 163 | ] 164 | if dropout is not None: 165 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:] 166 | self.convs = nn.Sequential(OrderedDict(layers)) 167 | 168 | if need_proj_conv: 169 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) 170 | 171 | def forward(self, x): 172 | if hasattr(self, "proj_conv"): 173 | bn1 = self.bn1(x) 174 | shortcut = self.proj_conv(bn1) 175 | else: 176 | shortcut = x.clone() 177 | bn1 = self.bn1(x) 178 | 179 | out = self.convs(bn1) 180 | out.add_(shortcut) 181 | 182 | return out 183 | -------------------------------------------------------------------------------- /schp/evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : evaluate.py 8 | @Time : 8/4/19 3:36 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import os 15 | import argparse 16 | import numpy as np 17 | import torch 18 | 19 | from torch.utils import data 20 | from tqdm import tqdm 21 | from PIL import Image as PILImage 22 | import torchvision.transforms as transforms 23 | import torch.backends.cudnn as cudnn 24 | 25 | import networks 26 | from datasets.datasets import LIPDataValSet 27 | from utils.miou import compute_mean_ioU 28 | from utils.transforms import BGR2RGB_transform 29 | from utils.transforms import transform_parsing 30 | 31 | 32 | def get_arguments(): 33 | """Parse all the arguments provided from the CLI. 34 | 35 | Returns: 36 | A list of parsed arguments. 37 | """ 38 | parser = argparse.ArgumentParser(description="Self Correction for Human Parsing") 39 | 40 | # Network Structure 41 | parser.add_argument("--arch", type=str, default='resnet101') 42 | # Data Preference 43 | parser.add_argument("--data-dir", type=str, default='./data/LIP') 44 | parser.add_argument("--batch-size", type=int, default=1) 45 | parser.add_argument("--input-size", type=str, default='473,473') 46 | parser.add_argument("--num-classes", type=int, default=20) 47 | parser.add_argument("--ignore-label", type=int, default=255) 48 | parser.add_argument("--random-mirror", action="store_true") 49 | parser.add_argument("--random-scale", action="store_true") 50 | # Evaluation Preference 51 | parser.add_argument("--log-dir", type=str, default='./log') 52 | parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar') 53 | parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.") 54 | parser.add_argument("--save-results", action="store_true", help="whether to save the results.") 55 | parser.add_argument("--flip", action="store_true", help="random flip during the test.") 56 | parser.add_argument("--multi-scales", type=str, default='1', help="multiple scales during the test") 57 | return parser.parse_args() 58 | 59 | 60 | def get_palette(num_cls): 61 | """ Returns the color map for visualizing the segmentation mask. 62 | Args: 63 | num_cls: Number of classes 64 | Returns: 65 | The color map 66 | """ 67 | n = num_cls 68 | palette = [0] * (n * 3) 69 | for j in range(0, n): 70 | lab = j 71 | palette[j * 3 + 0] = 0 72 | palette[j * 3 + 1] = 0 73 | palette[j * 3 + 2] = 0 74 | i = 0 75 | while lab: 76 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 77 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 78 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 79 | i += 1 80 | lab >>= 3 81 | return palette 82 | 83 | 84 | def multi_scale_testing(model, batch_input_im, crop_size=[473, 473], flip=True, multi_scales=[1]): 85 | flipped_idx = (15, 14, 17, 16, 19, 18) 86 | if len(batch_input_im.shape) > 4: 87 | batch_input_im = batch_input_im.squeeze() 88 | if len(batch_input_im.shape) == 3: 89 | batch_input_im = batch_input_im.unsqueeze(0) 90 | 91 | interp = torch.nn.Upsample(size=crop_size, mode='bilinear', align_corners=True) 92 | ms_outputs = [] 93 | for s in multi_scales: 94 | interp_im = torch.nn.Upsample(scale_factor=s, mode='bilinear', align_corners=True) 95 | scaled_im = interp_im(batch_input_im) 96 | parsing_output = model(scaled_im) 97 | parsing_output = parsing_output[0][-1] 98 | output = parsing_output[0] 99 | if flip: 100 | flipped_output = parsing_output[1] 101 | flipped_output[14:20, :, :] = flipped_output[flipped_idx, :, :] 102 | output += flipped_output.flip(dims=[-1]) 103 | output *= 0.5 104 | output = interp(output.unsqueeze(0)) 105 | ms_outputs.append(output[0]) 106 | ms_fused_parsing_output = torch.stack(ms_outputs) 107 | ms_fused_parsing_output = ms_fused_parsing_output.mean(0) 108 | ms_fused_parsing_output = ms_fused_parsing_output.permute(1, 2, 0) # HWC 109 | parsing = torch.argmax(ms_fused_parsing_output, dim=2) 110 | parsing = parsing.data.cpu().numpy() 111 | ms_fused_parsing_output = ms_fused_parsing_output.data.cpu().numpy() 112 | return parsing, ms_fused_parsing_output 113 | 114 | 115 | def main(): 116 | """Create the model and start the evaluation process.""" 117 | args = get_arguments() 118 | multi_scales = [float(i) for i in args.multi_scales.split(',')] 119 | gpus = [int(i) for i in args.gpu.split(',')] 120 | assert len(gpus) == 1 121 | if not args.gpu == 'None': 122 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 123 | 124 | cudnn.benchmark = True 125 | cudnn.enabled = True 126 | 127 | h, w = map(int, args.input_size.split(',')) 128 | input_size = [h, w] 129 | 130 | model = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=None) 131 | 132 | IMAGE_MEAN = model.mean 133 | IMAGE_STD = model.std 134 | INPUT_SPACE = model.input_space 135 | print('image mean: {}'.format(IMAGE_MEAN)) 136 | print('image std: {}'.format(IMAGE_STD)) 137 | print('input space:{}'.format(INPUT_SPACE)) 138 | if INPUT_SPACE == 'BGR': 139 | print('BGR Transformation') 140 | transform = transforms.Compose([ 141 | transforms.ToTensor(), 142 | transforms.Normalize(mean=IMAGE_MEAN, 143 | std=IMAGE_STD), 144 | 145 | ]) 146 | if INPUT_SPACE == 'RGB': 147 | print('RGB Transformation') 148 | transform = transforms.Compose([ 149 | transforms.ToTensor(), 150 | BGR2RGB_transform(), 151 | transforms.Normalize(mean=IMAGE_MEAN, 152 | std=IMAGE_STD), 153 | ]) 154 | 155 | # Data loader 156 | lip_test_dataset = LIPDataValSet(args.data_dir, 'val', crop_size=input_size, transform=transform, flip=args.flip) 157 | num_samples = len(lip_test_dataset) 158 | print('Totoal testing sample numbers: {}'.format(num_samples)) 159 | testloader = data.DataLoader(lip_test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) 160 | 161 | # Load model weight 162 | state_dict = torch.load(args.model_restore)['state_dict'] 163 | from collections import OrderedDict 164 | new_state_dict = OrderedDict() 165 | for k, v in state_dict.items(): 166 | name = k[7:] # remove `module.` 167 | new_state_dict[name] = v 168 | model.load_state_dict(new_state_dict) 169 | model.cuda() 170 | model.eval() 171 | 172 | sp_results_dir = os.path.join(args.log_dir, 'sp_results') 173 | if not os.path.exists(sp_results_dir): 174 | os.makedirs(sp_results_dir) 175 | 176 | palette = get_palette(20) 177 | parsing_preds = [] 178 | scales = np.zeros((num_samples, 2), dtype=np.float32) 179 | centers = np.zeros((num_samples, 2), dtype=np.int32) 180 | with torch.no_grad(): 181 | for idx, batch in enumerate(tqdm(testloader)): 182 | image, meta = batch 183 | if (len(image.shape) > 4): 184 | image = image.squeeze() 185 | im_name = meta['name'][0] 186 | c = meta['center'].numpy()[0] 187 | s = meta['scale'].numpy()[0] 188 | w = meta['width'].numpy()[0] 189 | h = meta['height'].numpy()[0] 190 | scales[idx, :] = s 191 | centers[idx, :] = c 192 | parsing, logits = multi_scale_testing(model, image.cuda(), crop_size=input_size, flip=args.flip, 193 | multi_scales=multi_scales) 194 | if args.save_results: 195 | parsing_result = transform_parsing(parsing, c, s, w, h, input_size) 196 | parsing_result_path = os.path.join(sp_results_dir, im_name + '.png') 197 | output_im = PILImage.fromarray(np.asarray(parsing_result, dtype=np.uint8)) 198 | output_im.putpalette(palette) 199 | output_im.save(parsing_result_path) 200 | 201 | parsing_preds.append(parsing) 202 | assert len(parsing_preds) == num_samples 203 | mIoU = compute_mean_ioU(parsing_preds, scales, centers, args.num_classes, args.data_dir, input_size) 204 | print(mIoU) 205 | return 206 | 207 | 208 | if __name__ == '__main__': 209 | main() 210 | -------------------------------------------------------------------------------- /schp/modules/functions.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import torch 3 | import torch.distributed as dist 4 | import torch.autograd as autograd 5 | import torch.cuda.comm as comm 6 | from torch.autograd.function import once_differentiable 7 | from torch.utils.cpp_extension import load 8 | 9 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src") 10 | 11 | if torch.cuda.is_available(): 12 | _backend = load(name="inplace_abn", 13 | extra_cflags=["-O3"], 14 | sources=[path.join(_src_path, f) for f in [ 15 | "inplace_abn.cpp", 16 | "inplace_abn_cpu.cpp", 17 | "inplace_abn_cuda.cu", 18 | "inplace_abn_cuda_half.cu" 19 | ]], 20 | extra_cuda_cflags=["--expt-extended-lambda"]) 21 | else: 22 | _backend = load(name="inplace_abn", 23 | extra_cflags=["-O3"], 24 | sources=[path.join(_src_path, f) for f in [ 25 | "inplace_abn_cpu_only.cpp", 26 | "inplace_abn_cpu.cpp" 27 | ]]) 28 | 29 | # Activation names 30 | ACT_RELU = "relu" 31 | ACT_LEAKY_RELU = "leaky_relu" 32 | ACT_ELU = "elu" 33 | ACT_NONE = "none" 34 | 35 | 36 | def _check(fn, *args, **kwargs): 37 | success = fn(*args, **kwargs) 38 | if not success: 39 | raise RuntimeError("CUDA Error encountered in {}".format(fn)) 40 | 41 | 42 | def _broadcast_shape(x): 43 | out_size = [] 44 | for i, s in enumerate(x.size()): 45 | if i != 1: 46 | out_size.append(1) 47 | else: 48 | out_size.append(s) 49 | return out_size 50 | 51 | 52 | def _reduce(x): 53 | if len(x.size()) == 2: 54 | return x.sum(dim=0) 55 | else: 56 | n, c = x.size()[0:2] 57 | return x.contiguous().view((n, c, -1)).sum(2).sum(0) 58 | 59 | 60 | def _count_samples(x): 61 | count = 1 62 | for i, s in enumerate(x.size()): 63 | if i != 1: 64 | count *= s 65 | return count 66 | 67 | 68 | def _act_forward(ctx, x): 69 | if ctx.activation == ACT_LEAKY_RELU: 70 | _backend.leaky_relu_forward(x, ctx.slope) 71 | elif ctx.activation == ACT_ELU: 72 | _backend.elu_forward(x) 73 | elif ctx.activation == ACT_NONE: 74 | pass 75 | 76 | 77 | def _act_backward(ctx, x, dx): 78 | if ctx.activation == ACT_LEAKY_RELU: 79 | _backend.leaky_relu_backward(x, dx, ctx.slope) 80 | elif ctx.activation == ACT_ELU: 81 | _backend.elu_backward(x, dx) 82 | elif ctx.activation == ACT_NONE: 83 | pass 84 | 85 | 86 | class InPlaceABN(autograd.Function): 87 | @staticmethod 88 | def forward(ctx, x, weight, bias, running_mean, running_var, 89 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 90 | # Save context 91 | ctx.training = training 92 | ctx.momentum = momentum 93 | ctx.eps = eps 94 | ctx.activation = activation 95 | ctx.slope = slope 96 | ctx.affine = weight is not None and bias is not None 97 | 98 | # Prepare inputs 99 | count = _count_samples(x) 100 | x = x.contiguous() 101 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 102 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 103 | 104 | if ctx.training: 105 | mean, var = _backend.mean_var(x) 106 | 107 | # Update running stats 108 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 109 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 110 | 111 | # Mark in-place modified tensors 112 | ctx.mark_dirty(x, running_mean, running_var) 113 | else: 114 | mean, var = running_mean.contiguous(), running_var.contiguous() 115 | ctx.mark_dirty(x) 116 | 117 | # BN forward + activation 118 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 119 | _act_forward(ctx, x) 120 | 121 | # Output 122 | ctx.var = var 123 | ctx.save_for_backward(x, var, weight, bias) 124 | ctx.mark_non_differentiable(running_mean, running_var) 125 | return x, running_mean, running_var 126 | 127 | @staticmethod 128 | @once_differentiable 129 | def backward(ctx, dz, _drunning_mean, _drunning_var): 130 | z, var, weight, bias = ctx.saved_tensors 131 | dz = dz.contiguous() 132 | 133 | # Undo activation 134 | _act_backward(ctx, z, dz) 135 | 136 | if ctx.training: 137 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 138 | else: 139 | # TODO: implement simplified CUDA backward for inference mode 140 | edz = dz.new_zeros(dz.size(1)) 141 | eydz = dz.new_zeros(dz.size(1)) 142 | 143 | dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 144 | # dweight = eydz * weight.sign() if ctx.affine else None 145 | dweight = eydz if ctx.affine else None 146 | if dweight is not None: 147 | dweight[weight < 0] *= -1 148 | dbias = edz if ctx.affine else None 149 | 150 | return dx, dweight, dbias, None, None, None, None, None, None, None 151 | 152 | 153 | class InPlaceABNSync(autograd.Function): 154 | @classmethod 155 | def forward(cls, ctx, x, weight, bias, running_mean, running_var, 156 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True): 157 | # Save context 158 | ctx.training = training 159 | ctx.momentum = momentum 160 | ctx.eps = eps 161 | ctx.activation = activation 162 | ctx.slope = slope 163 | ctx.affine = weight is not None and bias is not None 164 | 165 | # Prepare inputs 166 | ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1 167 | 168 | # count = _count_samples(x) 169 | batch_size = x.new_tensor([x.shape[0]], dtype=torch.long) 170 | 171 | x = x.contiguous() 172 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 173 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 174 | 175 | if ctx.training: 176 | mean, var = _backend.mean_var(x) 177 | if ctx.world_size > 1: 178 | # get global batch size 179 | if equal_batches: 180 | batch_size *= ctx.world_size 181 | else: 182 | dist.all_reduce(batch_size, dist.ReduceOp.SUM) 183 | 184 | ctx.factor = x.shape[0] / float(batch_size.item()) 185 | 186 | mean_all = mean.clone() * ctx.factor 187 | dist.all_reduce(mean_all, dist.ReduceOp.SUM) 188 | 189 | var_all = (var + (mean - mean_all) ** 2) * ctx.factor 190 | dist.all_reduce(var_all, dist.ReduceOp.SUM) 191 | 192 | mean = mean_all 193 | var = var_all 194 | 195 | # Update running stats 196 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 197 | count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1] 198 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1))) 199 | 200 | # Mark in-place modified tensors 201 | ctx.mark_dirty(x, running_mean, running_var) 202 | else: 203 | mean, var = running_mean.contiguous(), running_var.contiguous() 204 | ctx.mark_dirty(x) 205 | 206 | # BN forward + activation 207 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 208 | _act_forward(ctx, x) 209 | 210 | # Output 211 | ctx.var = var 212 | ctx.save_for_backward(x, var, weight, bias) 213 | ctx.mark_non_differentiable(running_mean, running_var) 214 | return x, running_mean, running_var 215 | 216 | @staticmethod 217 | @once_differentiable 218 | def backward(ctx, dz, _drunning_mean, _drunning_var): 219 | z, var, weight, bias = ctx.saved_tensors 220 | dz = dz.contiguous() 221 | 222 | # Undo activation 223 | _act_backward(ctx, z, dz) 224 | 225 | if ctx.training: 226 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 227 | edz_local = edz.clone() 228 | eydz_local = eydz.clone() 229 | 230 | if ctx.world_size > 1: 231 | edz *= ctx.factor 232 | dist.all_reduce(edz, dist.ReduceOp.SUM) 233 | 234 | eydz *= ctx.factor 235 | dist.all_reduce(eydz, dist.ReduceOp.SUM) 236 | else: 237 | edz_local = edz = dz.new_zeros(dz.size(1)) 238 | eydz_local = eydz = dz.new_zeros(dz.size(1)) 239 | 240 | dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 241 | # dweight = eydz_local * weight.sign() if ctx.affine else None 242 | dweight = eydz_local if ctx.affine else None 243 | if dweight is not None: 244 | dweight[weight < 0] *= -1 245 | dbias = edz_local if ctx.affine else None 246 | 247 | return dx, dweight, dbias, None, None, None, None, None, None, None 248 | 249 | 250 | inplace_abn = InPlaceABN.apply 251 | inplace_abn_sync = InPlaceABNSync.apply 252 | 253 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] 254 | -------------------------------------------------------------------------------- /schp/modules/src/inplace_abn_cuda_half.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "utils/checks.h" 8 | #include "utils/cuda.cuh" 9 | #include "inplace_abn.h" 10 | 11 | #include 12 | 13 | // Operations for reduce 14 | struct SumOpH { 15 | __device__ SumOpH(const half *t, int c, int s) 16 | : tensor(t), chn(c), sp(s) {} 17 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 18 | return __half2float(tensor[(batch * chn + plane) * sp + n]); 19 | } 20 | const half *tensor; 21 | const int chn; 22 | const int sp; 23 | }; 24 | 25 | struct VarOpH { 26 | __device__ VarOpH(float m, const half *t, int c, int s) 27 | : mean(m), tensor(t), chn(c), sp(s) {} 28 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 29 | const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]); 30 | return (t - mean) * (t - mean); 31 | } 32 | const float mean; 33 | const half *tensor; 34 | const int chn; 35 | const int sp; 36 | }; 37 | 38 | struct GradOpH { 39 | __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s) 40 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} 41 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { 42 | float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight; 43 | float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); 44 | return Pair(_dz, _y * _dz); 45 | } 46 | const float weight; 47 | const float bias; 48 | const half *z; 49 | const half *dz; 50 | const int chn; 51 | const int sp; 52 | }; 53 | 54 | /*********** 55 | * mean_var 56 | ***********/ 57 | 58 | __global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) { 59 | int plane = blockIdx.x; 60 | float norm = 1.f / static_cast(num * sp); 61 | 62 | float _mean = reduce(SumOpH(x, chn, sp), plane, num, sp) * norm; 63 | __syncthreads(); 64 | float _var = reduce(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm; 65 | 66 | if (threadIdx.x == 0) { 67 | mean[plane] = _mean; 68 | var[plane] = _var; 69 | } 70 | } 71 | 72 | std::vector mean_var_cuda_h(at::Tensor x) { 73 | CHECK_CUDA_INPUT(x); 74 | 75 | // Extract dimensions 76 | int64_t num, chn, sp; 77 | get_dims(x, num, chn, sp); 78 | 79 | // Prepare output tensors 80 | auto mean = at::empty({chn},x.options().dtype(at::kFloat)); 81 | auto var = at::empty({chn},x.options().dtype(at::kFloat)); 82 | 83 | // Run kernel 84 | dim3 blocks(chn); 85 | dim3 threads(getNumThreads(sp)); 86 | auto stream = at::cuda::getCurrentCUDAStream(); 87 | mean_var_kernel_h<<>>( 88 | reinterpret_cast(x.data()), 89 | mean.data(), 90 | var.data(), 91 | num, chn, sp); 92 | 93 | return {mean, var}; 94 | } 95 | 96 | /********** 97 | * forward 98 | **********/ 99 | 100 | __global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias, 101 | bool affine, float eps, int num, int chn, int sp) { 102 | int plane = blockIdx.x; 103 | 104 | const float _mean = mean[plane]; 105 | const float _var = var[plane]; 106 | const float _weight = affine ? abs(weight[plane]) + eps : 1.f; 107 | const float _bias = affine ? bias[plane] : 0.f; 108 | 109 | const float mul = rsqrt(_var + eps) * _weight; 110 | 111 | for (int batch = 0; batch < num; ++batch) { 112 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 113 | half *x_ptr = x + (batch * chn + plane) * sp + n; 114 | float _x = __half2float(*x_ptr); 115 | float _y = (_x - _mean) * mul + _bias; 116 | 117 | *x_ptr = __float2half(_y); 118 | } 119 | } 120 | } 121 | 122 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 123 | bool affine, float eps) { 124 | CHECK_CUDA_INPUT(x); 125 | CHECK_CUDA_INPUT(mean); 126 | CHECK_CUDA_INPUT(var); 127 | CHECK_CUDA_INPUT(weight); 128 | CHECK_CUDA_INPUT(bias); 129 | 130 | // Extract dimensions 131 | int64_t num, chn, sp; 132 | get_dims(x, num, chn, sp); 133 | 134 | // Run kernel 135 | dim3 blocks(chn); 136 | dim3 threads(getNumThreads(sp)); 137 | auto stream = at::cuda::getCurrentCUDAStream(); 138 | forward_kernel_h<<>>( 139 | reinterpret_cast(x.data()), 140 | mean.data(), 141 | var.data(), 142 | weight.data(), 143 | bias.data(), 144 | affine, eps, num, chn, sp); 145 | 146 | return x; 147 | } 148 | 149 | __global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias, 150 | float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) { 151 | int plane = blockIdx.x; 152 | 153 | float _weight = affine ? abs(weight[plane]) + eps : 1.f; 154 | float _bias = affine ? bias[plane] : 0.f; 155 | 156 | Pair res = reduce, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp); 157 | __syncthreads(); 158 | 159 | if (threadIdx.x == 0) { 160 | edz[plane] = res.v1; 161 | eydz[plane] = res.v2; 162 | } 163 | } 164 | 165 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 166 | bool affine, float eps) { 167 | CHECK_CUDA_INPUT(z); 168 | CHECK_CUDA_INPUT(dz); 169 | CHECK_CUDA_INPUT(weight); 170 | CHECK_CUDA_INPUT(bias); 171 | 172 | // Extract dimensions 173 | int64_t num, chn, sp; 174 | get_dims(z, num, chn, sp); 175 | 176 | auto edz = at::empty({chn},z.options().dtype(at::kFloat)); 177 | auto eydz = at::empty({chn},z.options().dtype(at::kFloat)); 178 | 179 | // Run kernel 180 | dim3 blocks(chn); 181 | dim3 threads(getNumThreads(sp)); 182 | auto stream = at::cuda::getCurrentCUDAStream(); 183 | edz_eydz_kernel_h<<>>( 184 | reinterpret_cast(z.data()), 185 | reinterpret_cast(dz.data()), 186 | weight.data(), 187 | bias.data(), 188 | edz.data(), 189 | eydz.data(), 190 | affine, eps, num, chn, sp); 191 | 192 | return {edz, eydz}; 193 | } 194 | 195 | __global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz, 196 | const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) { 197 | int plane = blockIdx.x; 198 | 199 | float _weight = affine ? abs(weight[plane]) + eps : 1.f; 200 | float _bias = affine ? bias[plane] : 0.f; 201 | float _var = var[plane]; 202 | float _edz = edz[plane]; 203 | float _eydz = eydz[plane]; 204 | 205 | float _mul = _weight * rsqrt(_var + eps); 206 | float count = float(num * sp); 207 | 208 | for (int batch = 0; batch < num; ++batch) { 209 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 210 | float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); 211 | float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight; 212 | 213 | dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul); 214 | } 215 | } 216 | } 217 | 218 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 219 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 220 | CHECK_CUDA_INPUT(z); 221 | CHECK_CUDA_INPUT(dz); 222 | CHECK_CUDA_INPUT(var); 223 | CHECK_CUDA_INPUT(weight); 224 | CHECK_CUDA_INPUT(bias); 225 | CHECK_CUDA_INPUT(edz); 226 | CHECK_CUDA_INPUT(eydz); 227 | 228 | // Extract dimensions 229 | int64_t num, chn, sp; 230 | get_dims(z, num, chn, sp); 231 | 232 | auto dx = at::zeros_like(z); 233 | 234 | // Run kernel 235 | dim3 blocks(chn); 236 | dim3 threads(getNumThreads(sp)); 237 | auto stream = at::cuda::getCurrentCUDAStream(); 238 | backward_kernel_h<<>>( 239 | reinterpret_cast(z.data()), 240 | reinterpret_cast(dz.data()), 241 | var.data(), 242 | weight.data(), 243 | bias.data(), 244 | edz.data(), 245 | eydz.data(), 246 | reinterpret_cast(dx.data()), 247 | affine, eps, num, chn, sp); 248 | 249 | return dx; 250 | } 251 | 252 | __global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) { 253 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){ 254 | float _z = __half2float(z[i]); 255 | if (_z < 0) { 256 | dz[i] = __float2half(__half2float(dz[i]) * slope); 257 | z[i] = __float2half(_z / slope); 258 | } 259 | } 260 | } 261 | 262 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) { 263 | CHECK_CUDA_INPUT(z); 264 | CHECK_CUDA_INPUT(dz); 265 | 266 | int64_t count = z.numel(); 267 | dim3 threads(getNumThreads(count)); 268 | dim3 blocks = (count + threads.x - 1) / threads.x; 269 | auto stream = at::cuda::getCurrentCUDAStream(); 270 | leaky_relu_backward_impl_h<<>>( 271 | reinterpret_cast(z.data()), 272 | reinterpret_cast(dz.data()), 273 | slope, count); 274 | } 275 | 276 | -------------------------------------------------------------------------------- /schp/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : train.py 8 | @Time : 8/4/19 3:36 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import os 15 | import json 16 | import timeit 17 | import argparse 18 | 19 | import torch 20 | import torch.optim as optim 21 | import torchvision.transforms as transforms 22 | import torch.backends.cudnn as cudnn 23 | from torch.utils import data 24 | 25 | import networks 26 | import utils.schp as schp 27 | from datasets.datasets import LIPDataSet 28 | from datasets.target_generation import generate_edge_tensor 29 | from utils.transforms import BGR2RGB_transform 30 | from utils.criterion import CriterionAll 31 | from utils.encoding import DataParallelModel, DataParallelCriterion 32 | from utils.warmup_scheduler import SGDRScheduler 33 | 34 | 35 | def get_arguments(): 36 | """Parse all the arguments provided from the CLI. 37 | Returns: 38 | A list of parsed arguments. 39 | """ 40 | parser = argparse.ArgumentParser(description="Self Correction for Human Parsing") 41 | 42 | # Network Structure 43 | parser.add_argument("--arch", type=str, default='resnet101') 44 | # Data Preference 45 | parser.add_argument("--data-dir", type=str, default='./data/LIP') 46 | parser.add_argument("--batch-size", type=int, default=16) 47 | parser.add_argument("--input-size", type=str, default='473,473') 48 | parser.add_argument("--num-classes", type=int, default=20) 49 | parser.add_argument("--ignore-label", type=int, default=255) 50 | parser.add_argument("--random-mirror", action="store_true") 51 | parser.add_argument("--random-scale", action="store_true") 52 | # Training Strategy 53 | parser.add_argument("--learning-rate", type=float, default=7e-3) 54 | parser.add_argument("--momentum", type=float, default=0.9) 55 | parser.add_argument("--weight-decay", type=float, default=5e-4) 56 | parser.add_argument("--gpu", type=str, default='0,1,2') 57 | parser.add_argument("--start-epoch", type=int, default=0) 58 | parser.add_argument("--epochs", type=int, default=150) 59 | parser.add_argument("--eval-epochs", type=int, default=10) 60 | parser.add_argument("--imagenet-pretrain", type=str, default='./pretrain_model/resnet101-imagenet.pth') 61 | parser.add_argument("--log-dir", type=str, default='./log') 62 | parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar') 63 | parser.add_argument("--schp-start", type=int, default=100, help='schp start epoch') 64 | parser.add_argument("--cycle-epochs", type=int, default=10, help='schp cyclical epoch') 65 | parser.add_argument("--schp-restore", type=str, default='./log/schp_checkpoint.pth.tar') 66 | parser.add_argument("--lambda-s", type=float, default=1, help='segmentation loss weight') 67 | parser.add_argument("--lambda-e", type=float, default=1, help='edge loss weight') 68 | parser.add_argument("--lambda-c", type=float, default=0.1, help='segmentation-edge consistency loss weight') 69 | return parser.parse_args() 70 | 71 | 72 | def main(): 73 | args = get_arguments() 74 | print(args) 75 | 76 | start_epoch = 0 77 | cycle_n = 0 78 | 79 | if not os.path.exists(args.log_dir): 80 | os.makedirs(args.log_dir) 81 | with open(os.path.join(args.log_dir, 'args.json'), 'w') as opt_file: 82 | json.dump(vars(args), opt_file) 83 | 84 | gpus = [int(i) for i in args.gpu.split(',')] 85 | if not args.gpu == 'None': 86 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 87 | 88 | input_size = list(map(int, args.input_size.split(','))) 89 | 90 | cudnn.enabled = True 91 | cudnn.benchmark = True 92 | 93 | # Model Initialization 94 | AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain) 95 | model = DataParallelModel(AugmentCE2P) 96 | model.cuda() 97 | 98 | IMAGE_MEAN = AugmentCE2P.mean 99 | IMAGE_STD = AugmentCE2P.std 100 | INPUT_SPACE = AugmentCE2P.input_space 101 | print('image mean: {}'.format(IMAGE_MEAN)) 102 | print('image std: {}'.format(IMAGE_STD)) 103 | print('input space:{}'.format(INPUT_SPACE)) 104 | 105 | restore_from = args.model_restore 106 | if os.path.exists(restore_from): 107 | print('Resume training from {}'.format(restore_from)) 108 | checkpoint = torch.load(restore_from) 109 | model.load_state_dict(checkpoint['state_dict']) 110 | start_epoch = checkpoint['epoch'] 111 | 112 | SCHP_AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain) 113 | schp_model = DataParallelModel(SCHP_AugmentCE2P) 114 | schp_model.cuda() 115 | 116 | if os.path.exists(args.schp_restore): 117 | print('Resuming schp checkpoint from {}'.format(args.schp_restore)) 118 | schp_checkpoint = torch.load(args.schp_restore) 119 | schp_model_state_dict = schp_checkpoint['state_dict'] 120 | cycle_n = schp_checkpoint['cycle_n'] 121 | schp_model.load_state_dict(schp_model_state_dict) 122 | 123 | # Loss Function 124 | criterion = CriterionAll(lambda_1=args.lambda_s, lambda_2=args.lambda_e, lambda_3=args.lambda_c, 125 | num_classes=args.num_classes) 126 | criterion = DataParallelCriterion(criterion) 127 | criterion.cuda() 128 | 129 | # Data Loader 130 | if INPUT_SPACE == 'BGR': 131 | print('BGR Transformation') 132 | transform = transforms.Compose([ 133 | transforms.ToTensor(), 134 | transforms.Normalize(mean=IMAGE_MEAN, 135 | std=IMAGE_STD), 136 | ]) 137 | 138 | elif INPUT_SPACE == 'RGB': 139 | print('RGB Transformation') 140 | transform = transforms.Compose([ 141 | transforms.ToTensor(), 142 | BGR2RGB_transform(), 143 | transforms.Normalize(mean=IMAGE_MEAN, 144 | std=IMAGE_STD), 145 | ]) 146 | 147 | train_dataset = LIPDataSet(args.data_dir, 'train', crop_size=input_size, transform=transform) 148 | train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size * len(gpus), 149 | num_workers=16, shuffle=True, pin_memory=True, drop_last=True) 150 | print('Total training samples: {}'.format(len(train_dataset))) 151 | 152 | # Optimizer Initialization 153 | optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, 154 | weight_decay=args.weight_decay) 155 | 156 | lr_scheduler = SGDRScheduler(optimizer, total_epoch=args.epochs, 157 | eta_min=args.learning_rate / 100, warmup_epoch=10, 158 | start_cyclical=args.schp_start, cyclical_base_lr=args.learning_rate / 2, 159 | cyclical_epoch=args.cycle_epochs) 160 | 161 | total_iters = args.epochs * len(train_loader) 162 | start = timeit.default_timer() 163 | for epoch in range(start_epoch, args.epochs): 164 | lr_scheduler.step(epoch=epoch) 165 | lr = lr_scheduler.get_lr()[0] 166 | 167 | model.train() 168 | for i_iter, batch in enumerate(train_loader): 169 | i_iter += len(train_loader) * epoch 170 | 171 | images, labels, _ = batch 172 | labels = labels.cuda(non_blocking=True) 173 | 174 | edges = generate_edge_tensor(labels) 175 | labels = labels.type(torch.cuda.LongTensor) 176 | edges = edges.type(torch.cuda.LongTensor) 177 | 178 | preds = model(images) 179 | 180 | # Online Self Correction Cycle with Label Refinement 181 | if cycle_n >= 1: 182 | with torch.no_grad(): 183 | soft_preds = schp_model(images) 184 | soft_parsing = [] 185 | soft_edge = [] 186 | for soft_pred in soft_preds: 187 | soft_parsing.append(soft_pred[0][-1]) 188 | soft_edge.append(soft_pred[1][-1]) 189 | soft_preds = torch.cat(soft_parsing, dim=0) 190 | soft_edges = torch.cat(soft_edge, dim=0) 191 | else: 192 | soft_preds = None 193 | soft_edges = None 194 | 195 | loss = criterion(preds, [labels, edges, soft_preds, soft_edges], cycle_n) 196 | 197 | optimizer.zero_grad() 198 | loss.backward() 199 | optimizer.step() 200 | 201 | if i_iter % 100 == 0: 202 | print('iter = {} of {} completed, lr = {}, loss = {}'.format(i_iter, total_iters, lr, 203 | loss.data.cpu().numpy())) 204 | if (epoch + 1) % (args.eval_epochs) == 0: 205 | schp.save_schp_checkpoint({ 206 | 'epoch': epoch + 1, 207 | 'state_dict': model.state_dict(), 208 | }, False, args.log_dir, filename='checkpoint_{}.pth.tar'.format(epoch + 1)) 209 | 210 | # Self Correction Cycle with Model Aggregation 211 | if (epoch + 1) >= args.schp_start and (epoch + 1 - args.schp_start) % args.cycle_epochs == 0: 212 | print('Self-correction cycle number {}'.format(cycle_n)) 213 | schp.moving_average(schp_model, model, 1.0 / (cycle_n + 1)) 214 | cycle_n += 1 215 | schp.bn_re_estimate(train_loader, schp_model) 216 | schp.save_schp_checkpoint({ 217 | 'state_dict': schp_model.state_dict(), 218 | 'cycle_n': cycle_n, 219 | }, False, args.log_dir, filename='schp_{}_checkpoint.pth.tar'.format(cycle_n)) 220 | 221 | torch.cuda.empty_cache() 222 | end = timeit.default_timer() 223 | print('epoch = {} of {} completed using {} s'.format(epoch, args.epochs, 224 | (end - start) / (epoch - start_epoch + 1))) 225 | 226 | end = timeit.default_timer() 227 | print('Training Finished in {} seconds'.format(end - start)) 228 | 229 | 230 | if __name__ == '__main__': 231 | main() 232 | -------------------------------------------------------------------------------- /schp/networks/context_encoding/ocnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : ocnet.py 8 | @Time : 8/4/19 3:36 PM 9 | @Desc : 10 | @License : This source code is licensed under the license found in the 11 | LICENSE file in the root directory of this source tree. 12 | """ 13 | 14 | import functools 15 | 16 | import torch 17 | import torch.nn as nn 18 | from torch.autograd import Variable 19 | from torch.nn import functional as F 20 | 21 | from modules import InPlaceABNSync 22 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 23 | 24 | 25 | class _SelfAttentionBlock(nn.Module): 26 | ''' 27 | The basic implementation for self-attention block/non-local block 28 | Input: 29 | N X C X H X W 30 | Parameters: 31 | in_channels : the dimension of the input feature map 32 | key_channels : the dimension after the key/query transform 33 | value_channels : the dimension after the value transform 34 | scale : choose the scale to downsample the input feature maps (save memory cost) 35 | Return: 36 | N X C X H X W 37 | position-aware context features.(w/o concate or add with the input) 38 | ''' 39 | 40 | def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1): 41 | super(_SelfAttentionBlock, self).__init__() 42 | self.scale = scale 43 | self.in_channels = in_channels 44 | self.out_channels = out_channels 45 | self.key_channels = key_channels 46 | self.value_channels = value_channels 47 | if out_channels == None: 48 | self.out_channels = in_channels 49 | self.pool = nn.MaxPool2d(kernel_size=(scale, scale)) 50 | self.f_key = nn.Sequential( 51 | nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, 52 | kernel_size=1, stride=1, padding=0), 53 | InPlaceABNSync(self.key_channels), 54 | ) 55 | self.f_query = self.f_key 56 | self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels, 57 | kernel_size=1, stride=1, padding=0) 58 | self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels, 59 | kernel_size=1, stride=1, padding=0) 60 | nn.init.constant(self.W.weight, 0) 61 | nn.init.constant(self.W.bias, 0) 62 | 63 | def forward(self, x): 64 | batch_size, h, w = x.size(0), x.size(2), x.size(3) 65 | if self.scale > 1: 66 | x = self.pool(x) 67 | 68 | value = self.f_value(x).view(batch_size, self.value_channels, -1) 69 | value = value.permute(0, 2, 1) 70 | query = self.f_query(x).view(batch_size, self.key_channels, -1) 71 | query = query.permute(0, 2, 1) 72 | key = self.f_key(x).view(batch_size, self.key_channels, -1) 73 | 74 | sim_map = torch.matmul(query, key) 75 | sim_map = (self.key_channels ** -.5) * sim_map 76 | sim_map = F.softmax(sim_map, dim=-1) 77 | 78 | context = torch.matmul(sim_map, value) 79 | context = context.permute(0, 2, 1).contiguous() 80 | context = context.view(batch_size, self.value_channels, *x.size()[2:]) 81 | context = self.W(context) 82 | if self.scale > 1: 83 | context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True) 84 | return context 85 | 86 | 87 | class SelfAttentionBlock2D(_SelfAttentionBlock): 88 | def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1): 89 | super(SelfAttentionBlock2D, self).__init__(in_channels, 90 | key_channels, 91 | value_channels, 92 | out_channels, 93 | scale) 94 | 95 | 96 | class BaseOC_Module(nn.Module): 97 | """ 98 | Implementation of the BaseOC module 99 | Parameters: 100 | in_features / out_features: the channels of the input / output feature maps. 101 | dropout: we choose 0.05 as the default value. 102 | size: you can apply multiple sizes. Here we only use one size. 103 | Return: 104 | features fused with Object context information. 105 | """ 106 | 107 | def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])): 108 | super(BaseOC_Module, self).__init__() 109 | self.stages = [] 110 | self.stages = nn.ModuleList( 111 | [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes]) 112 | self.conv_bn_dropout = nn.Sequential( 113 | nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0), 114 | InPlaceABNSync(out_channels), 115 | nn.Dropout2d(dropout) 116 | ) 117 | 118 | def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size): 119 | return SelfAttentionBlock2D(in_channels, 120 | key_channels, 121 | value_channels, 122 | output_channels, 123 | size) 124 | 125 | def forward(self, feats): 126 | priors = [stage(feats) for stage in self.stages] 127 | context = priors[0] 128 | for i in range(1, len(priors)): 129 | context += priors[i] 130 | output = self.conv_bn_dropout(torch.cat([context, feats], 1)) 131 | return output 132 | 133 | 134 | class BaseOC_Context_Module(nn.Module): 135 | """ 136 | Output only the context features. 137 | Parameters: 138 | in_features / out_features: the channels of the input / output feature maps. 139 | dropout: specify the dropout ratio 140 | fusion: We provide two different fusion method, "concat" or "add" 141 | size: we find that directly learn the attention weights on even 1/8 feature maps is hard. 142 | Return: 143 | features after "concat" or "add" 144 | """ 145 | 146 | def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])): 147 | super(BaseOC_Context_Module, self).__init__() 148 | self.stages = [] 149 | self.stages = nn.ModuleList( 150 | [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes]) 151 | self.conv_bn_dropout = nn.Sequential( 152 | nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0), 153 | InPlaceABNSync(out_channels), 154 | ) 155 | 156 | def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size): 157 | return SelfAttentionBlock2D(in_channels, 158 | key_channels, 159 | value_channels, 160 | output_channels, 161 | size) 162 | 163 | def forward(self, feats): 164 | priors = [stage(feats) for stage in self.stages] 165 | context = priors[0] 166 | for i in range(1, len(priors)): 167 | context += priors[i] 168 | output = self.conv_bn_dropout(context) 169 | return output 170 | 171 | 172 | class ASP_OC_Module(nn.Module): 173 | def __init__(self, features, out_features=256, dilations=(12, 24, 36)): 174 | super(ASP_OC_Module, self).__init__() 175 | self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True), 176 | InPlaceABNSync(out_features), 177 | BaseOC_Context_Module(in_channels=out_features, out_channels=out_features, 178 | key_channels=out_features // 2, value_channels=out_features, 179 | dropout=0, sizes=([2]))) 180 | self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 181 | InPlaceABNSync(out_features)) 182 | self.conv3 = nn.Sequential( 183 | nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), 184 | InPlaceABNSync(out_features)) 185 | self.conv4 = nn.Sequential( 186 | nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), 187 | InPlaceABNSync(out_features)) 188 | self.conv5 = nn.Sequential( 189 | nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), 190 | InPlaceABNSync(out_features)) 191 | 192 | self.conv_bn_dropout = nn.Sequential( 193 | nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 194 | InPlaceABNSync(out_features), 195 | nn.Dropout2d(0.1) 196 | ) 197 | 198 | def _cat_each(self, feat1, feat2, feat3, feat4, feat5): 199 | assert (len(feat1) == len(feat2)) 200 | z = [] 201 | for i in range(len(feat1)): 202 | z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1)) 203 | return z 204 | 205 | def forward(self, x): 206 | if isinstance(x, Variable): 207 | _, _, h, w = x.size() 208 | elif isinstance(x, tuple) or isinstance(x, list): 209 | _, _, h, w = x[0].size() 210 | else: 211 | raise RuntimeError('unknown input type') 212 | 213 | feat1 = self.context(x) 214 | feat2 = self.conv2(x) 215 | feat3 = self.conv3(x) 216 | feat4 = self.conv4(x) 217 | feat5 = self.conv5(x) 218 | 219 | if isinstance(x, Variable): 220 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) 221 | elif isinstance(x, tuple) or isinstance(x, list): 222 | out = self._cat_each(feat1, feat2, feat3, feat4, feat5) 223 | else: 224 | raise RuntimeError('unknown input type') 225 | output = self.conv_bn_dropout(out) 226 | return output 227 | -------------------------------------------------------------------------------- /schp/utils/lovasz_softmax.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | @Author : Peike Li 6 | @Contact : peike.li@yahoo.com 7 | @File : lovasz_softmax.py 8 | @Time : 8/30/19 7:12 PM 9 | @Desc : Lovasz-Softmax and Jaccard hinge loss in PyTorch 10 | Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License) 11 | @License : This source code is licensed under the license found in the 12 | LICENSE file in the root directory of this source tree. 13 | """ 14 | 15 | from __future__ import print_function, division 16 | 17 | import torch 18 | from torch.autograd import Variable 19 | import torch.nn.functional as F 20 | import numpy as np 21 | from torch import nn 22 | 23 | try: 24 | from itertools import ifilterfalse 25 | except ImportError: # py3k 26 | from itertools import filterfalse as ifilterfalse 27 | 28 | 29 | def lovasz_grad(gt_sorted): 30 | """ 31 | Computes gradient of the Lovasz extension w.r.t sorted errors 32 | See Alg. 1 in paper 33 | """ 34 | p = len(gt_sorted) 35 | gts = gt_sorted.sum() 36 | intersection = gts - gt_sorted.float().cumsum(0) 37 | union = gts + (1 - gt_sorted).float().cumsum(0) 38 | jaccard = 1. - intersection / union 39 | if p > 1: # cover 1-pixel case 40 | jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] 41 | return jaccard 42 | 43 | 44 | def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True): 45 | """ 46 | IoU for foreground class 47 | binary: 1 foreground, 0 background 48 | """ 49 | if not per_image: 50 | preds, labels = (preds,), (labels,) 51 | ious = [] 52 | for pred, label in zip(preds, labels): 53 | intersection = ((label == 1) & (pred == 1)).sum() 54 | union = ((label == 1) | ((pred == 1) & (label != ignore))).sum() 55 | if not union: 56 | iou = EMPTY 57 | else: 58 | iou = float(intersection) / float(union) 59 | ious.append(iou) 60 | iou = mean(ious) # mean accross images if per_image 61 | return 100 * iou 62 | 63 | 64 | def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False): 65 | """ 66 | Array of IoU for each (non ignored) class 67 | """ 68 | if not per_image: 69 | preds, labels = (preds,), (labels,) 70 | ious = [] 71 | for pred, label in zip(preds, labels): 72 | iou = [] 73 | for i in range(C): 74 | if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes) 75 | intersection = ((label == i) & (pred == i)).sum() 76 | union = ((label == i) | ((pred == i) & (label != ignore))).sum() 77 | if not union: 78 | iou.append(EMPTY) 79 | else: 80 | iou.append(float(intersection) / float(union)) 81 | ious.append(iou) 82 | ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image 83 | return 100 * np.array(ious) 84 | 85 | 86 | # --------------------------- BINARY LOSSES --------------------------- 87 | 88 | 89 | def lovasz_hinge(logits, labels, per_image=True, ignore=None): 90 | """ 91 | Binary Lovasz hinge loss 92 | logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) 93 | labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) 94 | per_image: compute the loss per image instead of per batch 95 | ignore: void class id 96 | """ 97 | if per_image: 98 | loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)) 99 | for log, lab in zip(logits, labels)) 100 | else: 101 | loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) 102 | return loss 103 | 104 | 105 | def lovasz_hinge_flat(logits, labels): 106 | """ 107 | Binary Lovasz hinge loss 108 | logits: [P] Variable, logits at each prediction (between -\infty and +\infty) 109 | labels: [P] Tensor, binary ground truth labels (0 or 1) 110 | ignore: label to ignore 111 | """ 112 | if len(labels) == 0: 113 | # only void pixels, the gradients should be 0 114 | return logits.sum() * 0. 115 | signs = 2. * labels.float() - 1. 116 | errors = (1. - logits * Variable(signs)) 117 | errors_sorted, perm = torch.sort(errors, dim=0, descending=True) 118 | perm = perm.data 119 | gt_sorted = labels[perm] 120 | grad = lovasz_grad(gt_sorted) 121 | loss = torch.dot(F.relu(errors_sorted), Variable(grad)) 122 | return loss 123 | 124 | 125 | def flatten_binary_scores(scores, labels, ignore=None): 126 | """ 127 | Flattens predictions in the batch (binary case) 128 | Remove labels equal to 'ignore' 129 | """ 130 | scores = scores.view(-1) 131 | labels = labels.view(-1) 132 | if ignore is None: 133 | return scores, labels 134 | valid = (labels != ignore) 135 | vscores = scores[valid] 136 | vlabels = labels[valid] 137 | return vscores, vlabels 138 | 139 | 140 | class StableBCELoss(torch.nn.modules.Module): 141 | def __init__(self): 142 | super(StableBCELoss, self).__init__() 143 | 144 | def forward(self, input, target): 145 | neg_abs = - input.abs() 146 | loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() 147 | return loss.mean() 148 | 149 | 150 | def binary_xloss(logits, labels, ignore=None): 151 | """ 152 | Binary Cross entropy loss 153 | logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) 154 | labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) 155 | ignore: void class id 156 | """ 157 | logits, labels = flatten_binary_scores(logits, labels, ignore) 158 | loss = StableBCELoss()(logits, Variable(labels.float())) 159 | return loss 160 | 161 | 162 | # --------------------------- MULTICLASS LOSSES --------------------------- 163 | 164 | 165 | def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None): 166 | """ 167 | Multi-class Lovasz-Softmax loss 168 | probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). 169 | Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. 170 | labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) 171 | classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. 172 | per_image: compute the loss per image instead of per batch 173 | ignore: void class labels 174 | """ 175 | if per_image: 176 | loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted) 177 | for prob, lab in zip(probas, labels)) 178 | else: 179 | loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted ) 180 | return loss 181 | 182 | 183 | def lovasz_softmax_flat(probas, labels, classes='present', weighted=None): 184 | """ 185 | Multi-class Lovasz-Softmax loss 186 | probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) 187 | labels: [P] Tensor, ground truth labels (between 0 and C - 1) 188 | classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. 189 | """ 190 | if probas.numel() == 0: 191 | # only void pixels, the gradients should be 0 192 | return probas * 0. 193 | C = probas.size(1) 194 | losses = [] 195 | class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes 196 | for c in class_to_sum: 197 | fg = (labels == c).float() # foreground for class c 198 | if (classes is 'present' and fg.sum() == 0): 199 | continue 200 | if C == 1: 201 | if len(classes) > 1: 202 | raise ValueError('Sigmoid output possible only with 1 class') 203 | class_pred = probas[:, 0] 204 | else: 205 | class_pred = probas[:, c] 206 | errors = (Variable(fg) - class_pred).abs() 207 | errors_sorted, perm = torch.sort(errors, 0, descending=True) 208 | perm = perm.data 209 | fg_sorted = fg[perm] 210 | if weighted is not None: 211 | losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) 212 | else: 213 | losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) 214 | return mean(losses) 215 | 216 | 217 | def flatten_probas(probas, labels, ignore=None): 218 | """ 219 | Flattens predictions in the batch 220 | """ 221 | if probas.dim() == 3: 222 | # assumes output of a sigmoid layer 223 | B, H, W = probas.size() 224 | probas = probas.view(B, 1, H, W) 225 | B, C, H, W = probas.size() 226 | probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C 227 | labels = labels.view(-1) 228 | if ignore is None: 229 | return probas, labels 230 | valid = (labels != ignore) 231 | vprobas = probas[valid.nonzero().squeeze()] 232 | vlabels = labels[valid] 233 | return vprobas, vlabels 234 | 235 | 236 | def xloss(logits, labels, ignore=None): 237 | """ 238 | Cross entropy loss 239 | """ 240 | return F.cross_entropy(logits, Variable(labels), ignore_index=255) 241 | 242 | 243 | # --------------------------- HELPER FUNCTIONS --------------------------- 244 | def isnan(x): 245 | return x != x 246 | 247 | 248 | def mean(l, ignore_nan=False, empty=0): 249 | """ 250 | nanmean compatible with generators. 251 | """ 252 | l = iter(l) 253 | if ignore_nan: 254 | l = ifilterfalse(isnan, l) 255 | try: 256 | n = 1 257 | acc = next(l) 258 | except StopIteration: 259 | if empty == 'raise': 260 | raise ValueError('Empty mean') 261 | return empty 262 | for n, v in enumerate(l, 2): 263 | acc += v 264 | if n == 1: 265 | return acc 266 | return acc / n 267 | 268 | # --------------------------- Class --------------------------- 269 | class LovaszSoftmax(nn.Module): 270 | def __init__(self, per_image=False, ignore_index=255, weighted=None): 271 | super(LovaszSoftmax, self).__init__() 272 | self.lovasz_softmax = lovasz_softmax 273 | self.per_image = per_image 274 | self.ignore_index=ignore_index 275 | self.weighted = weighted 276 | 277 | def forward(self, pred, label): 278 | pred = F.softmax(pred, dim=1) 279 | return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted) --------------------------------------------------------------------------------