├── mask2former ├── evaluation │ └── __init__.py ├── utils │ └── __init__.py ├── modeling │ ├── backbone │ │ └── __init__.py │ ├── meta_arch │ │ └── __init__.py │ ├── pixel_decoder │ │ ├── __init__.py │ │ └── ops │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ └── __init__.py │ │ │ ├── functions │ │ │ └── __init__.py │ │ │ ├── src │ │ │ ├── vision.cpp │ │ │ ├── cuda │ │ │ │ └── ms_deform_attn_cuda.h │ │ │ ├── cpu │ │ │ │ ├── ms_deform_attn_cpu.h │ │ │ │ └── ms_deform_attn_cpu.cpp │ │ │ └── ms_deform_attn.h │ │ │ └── setup.py │ ├── transformer_decoder │ │ ├── __init__.py │ │ └── position_encoding.py │ └── __init__.py ├── data │ ├── dataset_mappers │ │ └── __init__.py │ ├── __init__.py │ └── datasets │ │ ├── __init__.py │ │ ├── register_road_anomaly.py │ │ ├── register_fs_static.py │ │ └── register_fs_laf.py └── __init__.py ├── denseflow ├── flows │ ├── __init__.py │ ├── inverse_flow.py │ ├── flow.py │ ├── cond_flow.py │ └── cond_inverse_flow.py ├── nn │ ├── __init__.py │ ├── layers │ │ ├── encoding │ │ │ ├── __init__.py │ │ │ └── positional_encoding_image.py │ │ ├── autoregressive │ │ │ ├── __init__.py │ │ │ ├── ar_shift.py │ │ │ ├── masked_conv_2d.py │ │ │ └── utils.py │ │ ├── __init__.py │ │ ├── lambda_layer.py │ │ ├── constraints_factory.py │ │ ├── activations_functional.py │ │ ├── activations.py │ │ └── activations_factory.py │ ├── nets │ │ ├── __init__.py │ │ ├── matching │ │ │ ├── __init__.py │ │ │ ├── resnet.py │ │ │ └── multiscale_densenet.py │ │ ├── autoregressive │ │ │ ├── __init__.py │ │ │ ├── pixelcnn.py │ │ │ └── transformer.py │ │ └── mlp.py │ └── blocks │ │ ├── autoregressive │ │ ├── __init__.py │ │ └── masked_residual_block_2d.py │ │ ├── __init__.py │ │ └── resblock.py ├── __init__.py ├── utils │ ├── __init__.py │ ├── context.py │ └── tensors.py ├── transforms │ ├── bijections │ │ ├── functional │ │ │ ├── __init__.py │ │ │ ├── mixtures │ │ │ │ ├── __init__.py │ │ │ │ ├── params.py │ │ │ │ ├── utils_logistic.py │ │ │ │ ├── gaussian_mixture.py │ │ │ │ ├── logistic_mixture_censored.py │ │ │ │ ├── logistic_mixture.py │ │ │ │ └── utils_logistic_censored.py │ │ │ ├── splines │ │ │ │ ├── __init__.py │ │ │ │ └── utils.py │ │ │ └── iterative_inversion.py │ │ ├── coupling │ │ │ ├── __init__.py │ │ │ ├── coupling.py │ │ │ └── coupling_linear.py │ │ ├── conditional │ │ │ ├── __init__.py │ │ │ ├── coupling │ │ │ │ ├── __init__.py │ │ │ │ ├── coupling_linear.py │ │ │ │ └── coupling.py │ │ │ ├── autoregressive │ │ │ │ ├── __init__.py │ │ │ │ ├── autoregressive_linear_2d.py │ │ │ │ └── autoregressive_2d.py │ │ │ ├── base.py │ │ │ └── elementwise_linear.py │ │ ├── base.py │ │ ├── autoregressive │ │ │ ├── __init__.py │ │ │ ├── autoregressive.py │ │ │ ├── autoregressive_linear.py │ │ │ ├── autoregressive_linear_2d.py │ │ │ └── autoregressive_2d.py │ │ ├── channel_switch.py │ │ ├── __init__.py │ │ ├── reshape.py │ │ ├── permute_axes.py │ │ ├── wavelet.py │ │ ├── orth_squeeze.py │ │ ├── unsqueeze.py │ │ ├── affine.py │ │ ├── orth_squeeze_pgd.py │ │ ├── linear.py │ │ ├── rotate.py │ │ ├── permute.py │ │ ├── linear_lowrank.py │ │ └── conv1x1.py │ ├── stochastic │ │ ├── __init__.py │ │ ├── base.py │ │ ├── vae.py │ │ └── permutation.py │ ├── __init__.py │ ├── surjections │ │ ├── __init__.py │ │ ├── base.py │ │ ├── abs.py │ │ ├── slice.py │ │ ├── augment.py │ │ ├── dequantization_uniform.py │ │ ├── dequantization_variational.py │ │ ├── maxpool2d.py │ │ └── sort.py │ ├── cond_base.py │ └── base.py ├── distributions │ ├── conditional │ │ ├── __init__.py │ │ ├── base.py │ │ ├── categorical.py │ │ ├── bernoulli.py │ │ └── normal.py │ ├── __init__.py │ ├── data_parallel.py │ ├── uniform.py │ ├── half_normal.py │ ├── base.py │ └── normal.py ├── dense_distribution.py └── dense_flow.py ├── requirements.txt ├── .idea ├── vcs.xml ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── Open-set-M2F.iml └── deployment.xml ├── configs └── cityscapes │ └── semantic-segmentation │ ├── maskformer2_R101_bs16_90k.yaml │ ├── swin │ ├── maskformer2_swin_tiny_bs16_90k.yaml │ ├── maskformer2_swin_small_bs16_90k.yaml │ ├── maskformer2_swin_base_IN21k_384_bs16_90k.yaml │ ├── maskformer2_swin_large_IN21k_384_bs16_90k.yaml │ ├── maskformer2_swin_large_IN21k_384_bs18_115k_city+vistas.yaml │ ├── maskformer2_swin_large_IN21k_384_bs12_2k_city+vistas_oe.yaml │ ├── maskformer2_swin_large_IN21k_384_bs12_2k_city+vistas_uno.yaml │ └── maskformer2_swin_large_IN21k_384_bs12_2k_city+vistas_uno_synthetic.yaml │ ├── maskformer2_R50_bs16_90k.yaml │ └── Base-Cityscapes-SemanticSegmentation.yaml ├── .gitignore ├── tools ├── convert-pretrained-swin-model-to-d2.py ├── convert-torchvision-to-d2.py ├── evaluate_coco_boundary_ap.py └── README.md ├── cog.yaml ├── datasets ├── prepare_ade20k_sem_seg.py ├── ade20k_instance_catid_mapping.txt └── prepare_coco_semantic_annos_from_panoptic_annos.py ├── LICENSE ├── CONTRIBUTING.md ├── INSTALL.md └── predict.py /mask2former/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /denseflow/flows/__init__.py: -------------------------------------------------------------------------------- 1 | from .flow import Flow -------------------------------------------------------------------------------- /denseflow/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .layers import * 2 | -------------------------------------------------------------------------------- /denseflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .dense_flow import DenseFlow -------------------------------------------------------------------------------- /denseflow/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .tensors import * 2 | from .context import * -------------------------------------------------------------------------------- /denseflow/nn/layers/encoding/__init__.py: -------------------------------------------------------------------------------- 1 | from .positional_encoding_image import * 2 | -------------------------------------------------------------------------------- /mask2former/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mask2former/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mask2former/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | scipy 3 | shapely 4 | timm 5 | h5py 6 | submitit 7 | scikit-image 8 | -------------------------------------------------------------------------------- /mask2former/data/dataset_mappers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/__init__.py: -------------------------------------------------------------------------------- 1 | from .splines import * 2 | from .mixtures import * 3 | -------------------------------------------------------------------------------- /mask2former/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import datasets 3 | -------------------------------------------------------------------------------- /denseflow/nn/nets/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp import MLP 2 | 3 | from .autoregressive import * 4 | from .matching import * 5 | -------------------------------------------------------------------------------- /denseflow/transforms/stochastic/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | 3 | from .vae import * 4 | from .permutation import * 5 | -------------------------------------------------------------------------------- /denseflow/nn/blocks/autoregressive/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_residual_block_2d import * 2 | 3 | from .transformer import * 4 | from .sparse_transformer import * 5 | -------------------------------------------------------------------------------- /denseflow/nn/layers/autoregressive/__init__.py: -------------------------------------------------------------------------------- 1 | from .ar_shift import * 2 | from .seq_reorder import * 3 | 4 | from .masked_linear import * 5 | from .masked_conv_2d import * 6 | -------------------------------------------------------------------------------- /denseflow/distributions/conditional/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ConditionalDistribution 2 | 3 | from .bernoulli import * 4 | from .categorical import * 5 | from .normal import * 6 | -------------------------------------------------------------------------------- /denseflow/nn/nets/matching/__init__.py: -------------------------------------------------------------------------------- 1 | from .densenet import DenseNet, DenseNetMultihead, PureDenseNet 2 | from .multiscale_densenet import MultiscaleDenseNet 3 | from .resnet import ResNet -------------------------------------------------------------------------------- /denseflow/transforms/bijections/coupling/__init__.py: -------------------------------------------------------------------------------- 1 | from .coupling import * 2 | from .coupling_linear import * 3 | from .coupling_splines import * 4 | from .coupling_mixtures import * 5 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | 3 | from .elementwise_linear import * 4 | 5 | from .coupling import * 6 | from .autoregressive import * 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/coupling/__init__.py: -------------------------------------------------------------------------------- 1 | from .coupling import * 2 | from .coupling_linear import * 3 | from .coupling_splines import * 4 | from .coupling_mixtures import * 5 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/__init__.py: -------------------------------------------------------------------------------- 1 | from .params import * 2 | 3 | from .gaussian_mixture import * 4 | from .logistic_mixture import * 5 | from .logistic_mixture_censored import * 6 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /denseflow/nn/nets/autoregressive/__init__.py: -------------------------------------------------------------------------------- 1 | from .made import MADE, AgnosticMADE 2 | from .pixelcnn import PixelCNN 3 | from .transformer import DecoderOnlyTransformer2d 4 | from .sparse_transformer import DenseTransformer2d 5 | -------------------------------------------------------------------------------- /denseflow/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Transform, SequentialTransform 2 | from .cond_base import ConditionalTransform 3 | 4 | from .bijections import * 5 | from .surjections import * 6 | from .stochastic import * 7 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/autoregressive/__init__.py: -------------------------------------------------------------------------------- 1 | from .autoregressive_2d import * 2 | from .autoregressive_linear_2d import * 3 | from .autoregressive_splines_2d import * 4 | from .autoregressive_mixtures_2d import * 5 | -------------------------------------------------------------------------------- /denseflow/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Distribution 2 | from .data_parallel import DataParallelDistribution 3 | 4 | from .normal import * 5 | from .uniform import * 6 | from .half_normal import * 7 | 8 | from .conditional import * 9 | -------------------------------------------------------------------------------- /denseflow/nn/blocks/__init__.py: -------------------------------------------------------------------------------- 1 | from .denseblock import DenseLayer, DenseBlock, ResidualDenseBlock, MultiHeadDenseBlock 2 | 3 | from .autoregressive import * 4 | 5 | from .attention import MultiheadAttention 6 | from .resblock import ResidualBlock 7 | -------------------------------------------------------------------------------- /denseflow/nn/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .lambda_layer import * 2 | from .elementwise_params import * 3 | from .activations_functional import * 4 | from .activations import * 5 | from .activations_factory import * 6 | from .constraints_factory import * 7 | -------------------------------------------------------------------------------- /denseflow/utils/context.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def context_size(context): 5 | while not isinstance(context, torch.Tensor): 6 | first_key = list(context.keys())[0] 7 | context = context[first_key] 8 | return context.shape[0] 9 | -------------------------------------------------------------------------------- /mask2former/modeling/transformer_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .maskformer_transformer_decoder import StandardTransformerDecoder 3 | from .mask2former_transformer_decoder import MultiScaleMaskedTransformerDecoder 4 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/base.py: -------------------------------------------------------------------------------- 1 | from denseflow.transforms import Transform 2 | 3 | 4 | class Bijection(Transform): 5 | """Base class for Bijection""" 6 | 7 | bijective = True 8 | stochastic_forward = False 9 | stochastic_inverse = False 10 | lower_bound = False 11 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | 3 | from .abs import * 4 | from .sort import * 5 | from .slice import * 6 | from .augment import * 7 | from .maxpool2d import * 8 | 9 | from .dequantization_uniform import * 10 | from .dequantization_variational import * 11 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /denseflow/transforms/stochastic/base.py: -------------------------------------------------------------------------------- 1 | from denseflow.transforms import Transform 2 | 3 | 4 | class StochasticTransform(Transform): 5 | """Base class for StochasticTransform""" 6 | 7 | has_inverse = True 8 | bijective = False 9 | stochastic_forward = True 10 | stochastic_inverse = True 11 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/base.py: -------------------------------------------------------------------------------- 1 | from denseflow.transforms import ConditionalTransform 2 | 3 | 4 | class ConditionalBijection(ConditionalTransform): 5 | """Base class for ConditionalBijection""" 6 | 7 | bijective = True 8 | stochastic_forward = False 9 | stochastic_inverse = False 10 | lower_bound = False 11 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/autoregressive/__init__.py: -------------------------------------------------------------------------------- 1 | from .autoregressive import * 2 | from .autoregressive_linear import * 3 | from .autoregressive_splines import * 4 | from .autoregressive_mixtures import * 5 | 6 | from .autoregressive_2d import * 7 | from .autoregressive_linear_2d import * 8 | from .autoregressive_splines_2d import * 9 | from .autoregressive_mixtures_2d import * 10 | -------------------------------------------------------------------------------- /mask2former/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .backbone.swin import D2SwinTransformer 3 | from .pixel_decoder.fpn import BasePixelDecoder 4 | from .pixel_decoder.msdeformattn import MSDeformAttnPixelDecoder 5 | from .meta_arch.mask_former_head import MaskFormerHead 6 | from .meta_arch.per_pixel_baseline import PerPixelBaselineHead, PerPixelBaselinePlusHead 7 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/maskformer2_R101_bs16_90k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | WEIGHTS: "R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | STEM_TYPE: "basic" # not used 7 | STEM_OUT_CHANNELS: 64 8 | STRIDE_IN_1X1: False 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | NORM: "SyncBN" 11 | RES5_MULTI_GRID: [1, 1, 1] # not used 12 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/base.py: -------------------------------------------------------------------------------- 1 | from denseflow.transforms import Transform 2 | 3 | 4 | class Surjection(Transform): 5 | """Base class for Surjection""" 6 | 7 | bijective = False 8 | 9 | @property 10 | def stochastic_forward(self): 11 | raise NotImplementedError() 12 | 13 | @property 14 | def stochastic_inverse(self): 15 | return not self.stochastic_forward 16 | -------------------------------------------------------------------------------- /denseflow/nn/layers/lambda_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LambdaLayer(nn.Module): 6 | def __init__(self, lambd): 7 | super(LambdaLayer, self).__init__() 8 | if lambd is None: lambd = lambda x: x 9 | self.lambd = lambd 10 | 11 | def forward(self, x): 12 | return self.lambd(x) 13 | 14 | 15 | class Flatten(nn.Module): 16 | def forward(self, x): 17 | return x.view(x.shape[0], -1) 18 | -------------------------------------------------------------------------------- /mask2former/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import ( 3 | register_ade20k_full, 4 | register_ade20k_panoptic, 5 | register_coco_stuff_10k, 6 | register_mapillary_vistas, 7 | register_coco_panoptic_annos_semseg, 8 | register_ade20k_instance, 9 | register_mapillary_vistas_panoptic, 10 | register_smiyc, 11 | register_fs_laf, 12 | register_fs_static, 13 | register_road_anomaly, 14 | ) 15 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_mixture_params(params, num_mixtures): 5 | '''Get parameters for mixture transforms.''' 6 | assert params.shape[-1] == 3 * num_mixtures 7 | 8 | unnormalized_weights = params[..., :num_mixtures] 9 | means = params[..., num_mixtures:2*num_mixtures] 10 | log_scales = params[..., 2*num_mixtures:3*num_mixtures] 11 | 12 | return unnormalized_weights, means, log_scales 13 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/splines/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear import ( 2 | linear_spline, 3 | unconstrained_linear_spline 4 | ) 5 | 6 | from .quadratic import ( 7 | quadratic_spline, 8 | unconstrained_quadratic_spline 9 | ) 10 | 11 | from .cubic import ( 12 | cubic_spline, 13 | unconstrained_cubic_spline 14 | ) 15 | 16 | from .rational_quadratic import ( 17 | rational_quadratic_spline, 18 | unconstrained_rational_quadratic_spline 19 | ) 20 | 21 | -------------------------------------------------------------------------------- /.idea/Open-set-M2F.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_tiny_bs16_90k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 96 7 | DEPTHS: [2, 2, 6, 2] 8 | NUM_HEADS: [3, 6, 12, 24] 9 | WINDOW_SIZE: 7 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | WEIGHTS: "swin_tiny_patch4_window7_224.pkl" 14 | PIXEL_MEAN: [123.675, 116.280, 103.530] 15 | PIXEL_STD: [58.395, 57.120, 57.375] 16 | -------------------------------------------------------------------------------- /denseflow/nn/layers/constraints_factory.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def scale_fn(scale_str): 6 | assert scale_str in {'exp', 'softplus', 'sigmoid', 'tanh_exp'} 7 | if scale_str == 'exp': return lambda s: torch.exp(s) 8 | elif scale_str == 'softplus': return lambda s: F.softplus(s) 9 | elif scale_str == 'sigmoid': return lambda s: torch.sigmoid(s + 2.) + 1e-3 10 | elif scale_str == 'tanh_exp': return lambda s: torch.exp(2.*torch.tanh(s/2.)) 11 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_small_bs16_90k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 96 7 | DEPTHS: [2, 2, 18, 2] 8 | NUM_HEADS: [3, 6, 12, 24] 9 | WINDOW_SIZE: 7 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | WEIGHTS: "swin_small_patch4_window7_224.pkl" 14 | PIXEL_MEAN: [123.675, 116.280, 103.530] 15 | PIXEL_STD: [58.395, 57.120, 57.375] 16 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_base_IN21k_384_bs16_90k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 128 7 | DEPTHS: [2, 2, 18, 2] 8 | NUM_HEADS: [4, 8, 16, 32] 9 | WINDOW_SIZE: 12 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | PRETRAIN_IMG_SIZE: 384 14 | WEIGHTS: "swin_base_patch4_window12_384_22k.pkl" 15 | PIXEL_MEAN: [123.675, 116.280, 103.530] 16 | PIXEL_STD: [58.395, 57.120, 57.375] 17 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/channel_switch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Bijection 3 | 4 | class SwitchChannels(Bijection): 5 | 6 | def __init__(self): 7 | super(SwitchChannels, self).__init__() 8 | 9 | def forward(self, x): 10 | x1, x2 = x.chunk(2, dim=1) 11 | ldj = torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 12 | return torch.cat([x2, x1], dim=1), ldj 13 | 14 | def inverse(self, z): 15 | x2, x1 = z.chunk(2, dim=1) 16 | return torch.cat([x1, x2], dim=1) 17 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs16_90k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 192 7 | DEPTHS: [2, 2, 18, 2] 8 | NUM_HEADS: [6, 12, 24, 48] 9 | WINDOW_SIZE: 12 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | PRETRAIN_IMG_SIZE: 384 14 | WEIGHTS: "swin_large_patch4_window12_384_22k.pkl" 15 | PIXEL_MEAN: [123.675, 116.280, 103.530] 16 | PIXEL_STD: [58.395, 57.120, 57.375] 17 | MASK_FORMER: 18 | NUM_OBJECT_QUERIES: 100 19 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /denseflow/distributions/data_parallel.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class DataParallelDistribution(nn.DataParallel): 7 | """ 8 | A DataParallel wrapper for Distribution. 9 | To be used instead of nn.DataParallel for Distribution objects. 10 | """ 11 | 12 | def log_prob(self, *args, **kwargs): 13 | return self.forward(*args, mode='log_prob', **kwargs) 14 | 15 | def sample(self, *args, **kwargs): 16 | return self.module.sample(*args, **kwargs) 17 | 18 | def sample_with_log_prob(self, *args, **kwargs): 19 | return self.module.sample_with_log_prob(*args, **kwargs) 20 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | 3 | from .affine import * 4 | from .elementwise_nonlinear import * 5 | 6 | from .squeeze import * 7 | from .unsqueeze import * 8 | from .reshape import * 9 | from .rotate import * 10 | from .permute import * 11 | from .permute_axes import * 12 | 13 | from .linear import * 14 | from .linear_lu import * 15 | 16 | from .conv1x1 import * 17 | 18 | from .actnorm import * 19 | from .batchnorm import * 20 | 21 | from .coupling import * 22 | from .autoregressive import * 23 | 24 | from .conditional import * 25 | from .wavelet import * 26 | from .orth_squeeze import * 27 | from .orth_squeeze_pgd import * 28 | from .orth_conv1x1_pgd import * 29 | 30 | from .channel_switch import * 31 | -------------------------------------------------------------------------------- /denseflow/nn/layers/autoregressive/ar_shift.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AutoregressiveShift(nn.Module): 6 | '''Shifts input right to make model autoregressive.''' 7 | 8 | def __init__(self, embed_dim): 9 | super(AutoregressiveShift, self).__init__() 10 | self.embed_dim = embed_dim 11 | self.first_token = nn.Parameter(torch.Tensor(1, 1, embed_dim)) 12 | self._reset_parameters() 13 | 14 | def _reset_parameters(self): 15 | nn.init.xavier_uniform_(self.first_token) 16 | 17 | def forward(self, x): 18 | # x.shape = (l,b,dim) 19 | first_token = self.first_token.expand(1, x.shape[1], self.embed_dim) # (1,b,dim) 20 | return torch.cat([first_token, x[:-1]], dim=0) 21 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/abs.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn.functional as F 4 | from denseflow.transforms.surjections import Surjection 5 | from denseflow.utils import sum_except_batch 6 | 7 | 8 | class SimpleAbsSurjection(Surjection): 9 | ''' 10 | An absolute value layer. 11 | Uses a fixed inverse which flips the sign with probability 0.5. 12 | This enforces symmetry across all axes. 13 | ''' 14 | 15 | stochastic_forward = False 16 | 17 | def forward(self, x): 18 | z = x.abs() 19 | ldj = - x.new_ones(x.shape[0]) * math.log(2) * x.shape[1:].numel() 20 | return z, ldj 21 | 22 | def inverse(self, z): 23 | s = torch.bernoulli(0.5*torch.ones_like(z)) 24 | x = (2*s-1)*z 25 | return x 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | 7 | *.png 8 | *.json 9 | *.diff 10 | *.jpg 11 | !/projects/DensePose/doc/images/*.jpg 12 | 13 | # compilation and distribution 14 | __pycache__ 15 | _ext 16 | *.pyc 17 | *.pyd 18 | *.so 19 | *.dll 20 | *.egg-info/ 21 | build/ 22 | dist/ 23 | wheels/ 24 | 25 | # pytorch/python/numpy formats 26 | *.pth 27 | *.pkl 28 | *.npy 29 | *.ts 30 | model_ts*.txt 31 | 32 | # ipython/jupyter notebooks 33 | *.ipynb 34 | **/.ipynb_checkpoints/ 35 | 36 | # Editor temporaries 37 | *.swn 38 | *.swo 39 | *.swp 40 | *~ 41 | 42 | # editor settings 43 | .idea 44 | .vscode 45 | _darcs 46 | 47 | # project dirs 48 | /detectron2/model_zoo/configs 49 | /datasets/* 50 | !/datasets/*.* 51 | /projects/*/datasets 52 | /models 53 | /snippet -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/utils_logistic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def logistic_log_cdf(x, means, log_scales): 6 | return F.logsigmoid(torch.exp(-log_scales) * (x - means)) 7 | 8 | 9 | def logistic_log_one_minus_cdf(x, means, log_scales): 10 | ''' 11 | Uses that: 12 | `log(1-sigmoid(x)) = - softplus(x)` 13 | ''' 14 | return -F.softplus(torch.exp(-log_scales) * (x - means)) 15 | 16 | 17 | def logistic_log_pdf(x, means, log_scales): 18 | ''' 19 | Uses that: 20 | pdf(x) = dcdf(x)/dx 21 | = dsigmoid((x-m)/s)/dx 22 | = 1/s * sigmoid((x-m)/s) * (1-sigmoid((x-m)/s)) 23 | ''' 24 | return - log_scales + logistic_log_cdf(x, means, log_scales) + logistic_log_one_minus_cdf(x, means, log_scales) 25 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ------------------------------------------------------------------------------------------------ 3 | # Deformable DETR 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------------------------------ 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | # ------------------------------------------------------------------------------------------------ 9 | 10 | # Copyright (c) Facebook, Inc. and its affiliates. 11 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 12 | 13 | python setup.py build install 14 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | from .ms_deform_attn import MSDeformAttn 13 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/reshape.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Bijection 3 | 4 | 5 | class Reshape(Bijection): 6 | 7 | def __init__(self, input_shape, output_shape): 8 | super(Reshape, self).__init__() 9 | self.input_shape = torch.Size(input_shape) 10 | self.output_shape = torch.Size(output_shape) 11 | assert self.input_shape.numel() == self.output_shape.numel() 12 | 13 | def forward(self, x): 14 | batch_size = (x.shape[0],) 15 | z = x.reshape(batch_size + self.output_shape) 16 | ldj = torch.zeros(batch_size, device=x.device, dtype=x.dtype) 17 | return z, ldj 18 | 19 | def inverse(self, z): 20 | batch_size = (z.shape[0],) 21 | x = z.reshape(batch_size + self.input_shape) 22 | return x 23 | -------------------------------------------------------------------------------- /denseflow/nn/nets/mlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from denseflow.nn.layers import LambdaLayer 5 | from denseflow.nn.layers import act_module 6 | 7 | 8 | class MLP(nn.Sequential): 9 | def __init__(self, input_size, output_size, hidden_units, activation='relu', in_lambda=None, out_lambda=None): 10 | layers = [] 11 | if in_lambda: layers.append(LambdaLayer(in_lambda)) 12 | for in_size, out_size in zip([input_size] + hidden_units[:-1], hidden_units): 13 | layers.append(nn.Linear(in_size, out_size)) 14 | layers.append(act_module(activation)) 15 | layers.append(nn.Linear(hidden_units[-1], output_size)) 16 | if out_lambda: layers.append(LambdaLayer(out_lambda)) 17 | 18 | super(MLP, self).__init__(*layers) 19 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | from .ms_deform_attn_func import MSDeformAttnFunction 13 | 14 | -------------------------------------------------------------------------------- /denseflow/distributions/uniform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.distributions import Distribution 3 | from denseflow.utils import mean_except_batch 4 | 5 | 6 | class StandardUniform(Distribution): 7 | """A multivariate Uniform with boundaries (0,1).""" 8 | 9 | def __init__(self, shape): 10 | super().__init__() 11 | self.shape = torch.Size(shape) 12 | self.register_buffer('zero', torch.zeros(1)) 13 | self.register_buffer('one', torch.ones(1)) 14 | 15 | def log_prob(self, x): 16 | lb = mean_except_batch(x.ge(self.zero).type(self.zero.dtype)) 17 | ub = mean_except_batch(x.le(self.one).type(self.one.dtype)) 18 | return torch.log(lb*ub) 19 | 20 | def sample(self, num_samples): 21 | return torch.rand((num_samples,) + self.shape, device=self.zero.device, dtype=self.zero.dtype) 22 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/iterative_inversion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bisection_inverse(fn, z, init_x, init_lower, init_upper, eps=1e-10, max_iters=100): 5 | '''Bisection method to find the inverse of `fn`. Computed by finding the root of `z-fn(x)=0`.''' 6 | 7 | def body(x_, lb_, ub_, cur_z_): 8 | gt = (cur_z_ > z).type(z.dtype) 9 | lt = 1 - gt 10 | new_x_ = gt * (x_ + lb_) / 2. + lt * (x_ + ub_) / 2. 11 | new_lb = gt * lb_ + lt * x_ 12 | new_ub = gt * x_ + lt * ub_ 13 | return new_x_, new_lb, new_ub 14 | 15 | x, lb, ub = init_x, init_lower, init_upper 16 | cur_z = fn(x) 17 | diff = float('inf') 18 | i = 0 19 | while diff > eps and i < max_iters: 20 | x, lb, ub = body(x, lb, ub, cur_z) 21 | cur_z = fn(x) 22 | diff = (z - cur_z).abs().max() 23 | i += 1 24 | 25 | return x 26 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/splines/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def gather_elementwise(tensor, idx_tensor): 5 | ''' 6 | For `tensor.shape = tensor_shape + (K,)` 7 | and `idx_tensor.shape = tensor_shape` with elements in {0,1,...,K-1} 8 | ''' 9 | return tensor.gather(-1, idx_tensor[..., None])[..., 0] 10 | 11 | 12 | 13 | # Taken from https://github.com/bayesiains/nsf/blob/master/utils/torchutils.py 14 | 15 | def searchsorted(bin_locations, inputs, eps=1e-6): 16 | bin_locations[..., -1] += eps 17 | return torch.sum( 18 | inputs[..., None] >= bin_locations, 19 | dim=-1 20 | ) - 1 21 | 22 | 23 | # Taken from https://github.com/bayesiains/nsf/blob/master/utils/torchutils.py 24 | 25 | def cbrt(x): 26 | """Cube root. Equivalent to torch.pow(x, 1/3), but numerically stable.""" 27 | return torch.sign(x) * torch.exp(torch.log(torch.abs(x)) / 3.0) 28 | -------------------------------------------------------------------------------- /denseflow/nn/layers/activations_functional.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def gelu(x): 6 | '''GELU activation (https://arxiv.org/abs/1606.08415) as used in Sparse Transformers (https://arxiv.org/abs/1904.10509).''' 7 | return x * torch.sigmoid(1.702 * x) 8 | 9 | 10 | def swish(x): 11 | '''Swish activation (https://arxiv.org/abs/1710.05941).''' 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | def concat_relu(x): 16 | '''Concatenated ReLU (http://arxiv.org/abs/1603.05201).''' 17 | return F.relu(torch.cat([x, -x], dim=1)) 18 | 19 | 20 | def concat_elu(x): 21 | '''Like concatenated ReLU (http://arxiv.org/abs/1603.05201), but with ELU instead.''' 22 | return F.elu(torch.cat([x, -x], dim=1)) 23 | 24 | 25 | def gated_tanh(x, dim): 26 | '''Gated Tanh activation.''' 27 | x_tanh, x_sigmoid = torch.chunk(x, 2, dim=dim) 28 | return torch.tanh(x_tanh) * torch.sigmoid(x_sigmoid) 29 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/permute_axes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections.abc import Iterable 3 | from denseflow.transforms.bijections import Bijection 4 | 5 | 6 | class PermuteAxes(Bijection): 7 | 8 | def __init__(self, permutation): 9 | super(PermuteAxes, self).__init__() 10 | assert isinstance(permutation, Iterable), 'permutation must be an Iterable' 11 | assert permutation[0] == 0, 'First element of permutation must be 0 (such that batch dimension stays intact)' 12 | self.permutation = permutation 13 | self.inverse_permutation = torch.argsort(torch.tensor(self.permutation)).tolist() 14 | 15 | def forward(self, x): 16 | z = x.permute(self.permutation).contiguous() 17 | ldj = torch.zeros((x.shape[0],), device=x.device, dtype=x.dtype) 18 | return z, ldj 19 | 20 | def inverse(self, z): 21 | x = z.permute(self.inverse_permutation).contiguous() 22 | return x 23 | -------------------------------------------------------------------------------- /denseflow/distributions/half_normal.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from denseflow.distributions import Distribution 4 | from denseflow.utils import sum_except_batch 5 | 6 | 7 | class StandardHalfNormal(Distribution): 8 | """A standard half-Normal with zero mean and unit covariance.""" 9 | 10 | def __init__(self, shape): 11 | super(StandardHalfNormal, self).__init__() 12 | self.shape = torch.Size(shape) 13 | self.register_buffer('buffer', torch.zeros(1)) 14 | 15 | def log_prob(self, x): 16 | log_scaling = math.log(2) 17 | log_base = - 0.5 * math.log(2 * math.pi) 18 | log_inner = - 0.5 * x**2 19 | log_probs = log_scaling+log_base+log_inner 20 | log_probs[x < 0] = -math.inf 21 | return sum_except_batch(log_probs) 22 | 23 | def sample(self, num_samples): 24 | return torch.randn(num_samples, *self.shape, device=self.buffer.device, dtype=self.buffer.dtype).abs() 25 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs18_115k_city+vistas.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 192 7 | DEPTHS: [2, 2, 18, 2] 8 | NUM_HEADS: [6, 12, 24, 48] 9 | WINDOW_SIZE: 12 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | PRETRAIN_IMG_SIZE: 384 14 | WEIGHTS: "swin_large_patch4_window12_384_22k.pkl" 15 | PIXEL_MEAN: [123.675, 116.280, 103.530] 16 | PIXEL_STD: [58.395, 57.120, 57.375] 17 | MASK_FORMER: 18 | NUM_OBJECT_QUERIES: 100 19 | 20 | INPUT: 21 | DATASET_MAPPER_NAME: "mask_former_semantic_traffic" 22 | SOLVER: 23 | IMS_PER_BATCH: 12 24 | BASE_LR: 0.0001 25 | MAX_ITER: 150000 26 | 27 | DATASETS: 28 | TRAIN: ("cityscapes_fine_sem_seg_train","mapillary_vistas_sem_seg_train") 29 | TEST: ("cityscapes_fine_sem_seg_val",) 30 | 31 | OUTPUT_DIR: './M2F_SWIN-L_cityscapes+vistas_bs_12_115k' -------------------------------------------------------------------------------- /tools/convert-pretrained-swin-model-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import pickle as pkl 5 | import sys 6 | 7 | import torch 8 | 9 | """ 10 | Usage: 11 | # download pretrained swin model: 12 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth 13 | # run the conversion 14 | ./convert-pretrained-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl 15 | # Then, use swin_tiny_patch4_window7_224.pkl with the following changes in config: 16 | MODEL: 17 | WEIGHTS: "/path/to/swin_tiny_patch4_window7_224.pkl" 18 | INPUT: 19 | FORMAT: "RGB" 20 | """ 21 | 22 | if __name__ == "__main__": 23 | input = sys.argv[1] 24 | 25 | obj = torch.load(input, map_location="cpu")["model"] 26 | 27 | res = {"model": obj, "__author__": "third_party", "matching_heuristics": True} 28 | 29 | with open(sys.argv[2], "wb") as f: 30 | pkl.dump(res, f) 31 | -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- 1 | build: 2 | gpu: true 3 | cuda: "10.1" 4 | python_version: "3.8" 5 | system_packages: 6 | - "libgl1-mesa-glx" 7 | - "libglib2.0-0" 8 | python_packages: 9 | - "ipython==7.30.1" 10 | - "numpy==1.21.4" 11 | - "torch==1.8.1" 12 | - "torchvision==0.9.1" 13 | - "opencv-python==4.5.5.62" 14 | - "Shapely==1.8.0" 15 | - "h5py==3.6.0" 16 | - "scipy==1.7.3" 17 | - "submitit==1.4.1" 18 | - "scikit-image==0.19.1" 19 | - "Cython==0.29.27" 20 | - "timm==0.4.12" 21 | run: 22 | - pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html 23 | - pip install git+https://github.com/cocodataset/panopticapi.git 24 | - pip install git+https://github.com/mcordts/cityscapesScripts.git 25 | - git clone https://github.com/facebookresearch/Mask2Former 26 | - TORCH_CUDA_ARCH_LIST='7.5' FORCE_CUDA=1 python Mask2Former/mask2former/modeling/pixel_decoder/ops/setup.py build install 27 | 28 | predict: "predict.py:Predictor" 29 | -------------------------------------------------------------------------------- /datasets/prepare_ade20k_sem_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | import os 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | import tqdm 9 | from PIL import Image 10 | 11 | 12 | def convert(input, output): 13 | img = np.asarray(Image.open(input)) 14 | assert img.dtype == np.uint8 15 | img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1 16 | Image.fromarray(img).save(output) 17 | 18 | 19 | if __name__ == "__main__": 20 | dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016" 21 | for name in ["training", "validation"]: 22 | annotation_dir = dataset_dir / "annotations" / name 23 | output_dir = dataset_dir / "annotations_detectron2" / name 24 | output_dir.mkdir(parents=True, exist_ok=True) 25 | for file in tqdm.tqdm(list(annotation_dir.iterdir())): 26 | output_file = output_dir / file.name 27 | convert(file, output_file) 28 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #include "ms_deform_attn.h" 17 | 18 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 19 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 20 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 21 | } 22 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/wavelet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from denseflow.transforms.bijections import Bijection 4 | 5 | 6 | def _create_kernel(): 7 | w = torch.ones(4, 1, 2, 2) 8 | w[1, 0, 0, 1] = -1 9 | w[1, 0, 1, 1] = -1 10 | 11 | w[2, 0, 1, 0] = -1 12 | w[2, 0, 1, 1] = -1 13 | 14 | w[3, 0, 1, 0] = -1 15 | w[3, 0, 0, 1] = -1 16 | w *= 0.5 17 | return w 18 | 19 | class WaveletSqueeze2d(Bijection): 20 | def __init__(self, in_channels): 21 | super(WaveletSqueeze2d, self).__init__() 22 | 23 | w = _create_kernel() 24 | self.in_channels = in_channels 25 | w = torch.cat([w] * self.in_channels, 0) 26 | self.register_buffer('weight', w) 27 | 28 | def forward(self, x): 29 | z = F.conv2d(x, self.weight, bias=None, stride=2, groups=self.in_channels) 30 | ldj = torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 31 | return z, ldj 32 | 33 | def inverse(self, z): 34 | x = F.conv_transpose2d(z, self.weight, bias=None, stride=2, groups=self.in_channels) 35 | return x 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Matej Grcić 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /denseflow/transforms/stochastic/vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.stochastic import StochasticTransform 3 | 4 | 5 | class VAE(StochasticTransform): 6 | ''' 7 | A variational autoencoder [1, 2] layer. 8 | 9 | Args: 10 | decoder: ConditionalDistribution, the decoder p(x|z). 11 | encoder: ConditionalDistribution, the encoder q(z|x). 12 | 13 | References: 14 | [1] Auto-Encoding Variational Bayes, 15 | Kingma & Welling, 2013, https://arxiv.org/abs/1312.6114 16 | [2] Stochastic Backpropagation and Approximate Inference in Deep Generative Models, 17 | Rezende et al., 2014, https://arxiv.org/abs/1401.4082 18 | ''' 19 | 20 | def __init__(self, decoder, encoder): 21 | super(VAE, self).__init__() 22 | self.decoder = decoder 23 | self.encoder = encoder 24 | 25 | def forward(self, x): 26 | z, log_qz = self.encoder.sample_with_log_prob(context=x) 27 | log_px = self.decoder.log_prob(x, context=z) 28 | return z, log_px - log_qz 29 | 30 | def inverse(self, z): 31 | return self.decoder.sample(context=z) 32 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs12_2k_city+vistas_oe.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 192 7 | DEPTHS: [2, 2, 18, 2] 8 | NUM_HEADS: [6, 12, 24, 48] 9 | WINDOW_SIZE: 12 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | PRETRAIN_IMG_SIZE: 384 14 | WEIGHTS: "model_weights/model_0114999_SWIN-L_city_vistas.pth" 15 | PIXEL_MEAN: [123.675, 116.280, 103.530] 16 | PIXEL_STD: [58.395, 57.120, 57.375] 17 | MASK_FORMER: 18 | NUM_OBJECT_QUERIES: 100 19 | 20 | INPUT: 21 | DATASET_MAPPER_NAME: "mask_former_semantic_traffic_oe" 22 | SOLVER: 23 | CHECKPOINT_PERIOD: 1000 24 | IMS_PER_BATCH: 12 25 | BASE_LR: 0.00001 26 | MAX_ITER: 2000 27 | WARMUP_FACTOR: 1.0 28 | WARMUP_ITERS: 0 29 | WEIGHT_DECAY: 0.05 30 | TEST: 31 | EVAL_PERIOD: 1000 32 | 33 | DATASETS: 34 | TRAIN: ("cityscapes_fine_sem_seg_train","mapillary_vistas_sem_seg_train") 35 | TEST: ("cityscapes_fine_sem_seg_val",) 36 | 37 | ANOMALY_DETECTOR: "EAM" 38 | 39 | OUTPUT_DIR: './M2F_SWIN-L_cityscapes+vistas_bs_12_2k_oe' 40 | -------------------------------------------------------------------------------- /denseflow/nn/layers/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from denseflow.nn.layers import gelu, swish, concat_relu, concat_elu, gated_tanh 4 | 5 | 6 | class GELU(nn.Module): 7 | '''GELU activation (https://arxiv.org/abs/1606.08415) as used in Sparse Transformers (https://arxiv.org/abs/1904.10509).''' 8 | 9 | def forward(self, input): 10 | return gelu(input) 11 | 12 | 13 | class Swish(nn.Module): 14 | '''Swish activation (https://arxiv.org/abs/1710.05941).''' 15 | 16 | def forward(self, input): 17 | return swish(input) 18 | 19 | 20 | class ConcatReLU(nn.Module): 21 | '''Concatenated ReLU (http://arxiv.org/abs/1603.05201).''' 22 | 23 | def forward(self, input): 24 | return concat_relu(input) 25 | 26 | 27 | class ConcatELU(nn.Module): 28 | '''Like concatenated ReLU (http://arxiv.org/abs/1603.05201), but with ELU instead.''' 29 | 30 | def forward(self, input): 31 | return concat_elu(input) 32 | 33 | 34 | class GatedTanhUnit(nn.Module): 35 | '''Gated Tanh activation.''' 36 | 37 | def __init__(self, dim=-1): 38 | super(GatedTanhUnit, self).__init__() 39 | self.dim = dim 40 | 41 | def forward(self, x): 42 | return gated_tanh(x, dim=self.dim) 43 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs12_2k_city+vistas_uno.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "D2SwinTransformer" 5 | SWIN: 6 | EMBED_DIM: 192 7 | DEPTHS: [2, 2, 18, 2] 8 | NUM_HEADS: [6, 12, 24, 48] 9 | WINDOW_SIZE: 12 10 | APE: False 11 | DROP_PATH_RATE: 0.3 12 | PATCH_NORM: True 13 | PRETRAIN_IMG_SIZE: 384 14 | WEIGHTS: "model_weights/model_0114999_SWIN-L_city_vistas.pth" 15 | 16 | PIXEL_MEAN: [123.675, 116.280, 103.530] 17 | PIXEL_STD: [58.395, 57.120, 57.375] 18 | MASK_FORMER: 19 | NUM_OBJECT_QUERIES: 100 20 | SEM_SEG_HEAD: 21 | NUM_CLASSES: 20 # K+1+no_object 22 | 23 | INPUT: 24 | DATASET_MAPPER_NAME: "mask_former_semantic_traffic_uno" 25 | SOLVER: 26 | CHECKPOINT_PERIOD: 1000 27 | IMS_PER_BATCH: 12 28 | BASE_LR: 0.00001 29 | MAX_ITER: 2000 30 | WARMUP_FACTOR: 1.0 31 | WARMUP_ITERS: 0 32 | WEIGHT_DECAY: 0.05 33 | 34 | TEST: 35 | EVAL_PERIOD: 1000 36 | 37 | DATASETS: 38 | TRAIN: ("cityscapes_fine_sem_seg_train", "mapillary_vistas_sem_seg_train") 39 | TEST: ("cityscapes_fine_sem_seg_val",) 40 | # TEST: ("fs_laf_val", "fs_static_val", "road_anomaly") 41 | 42 | ANOMALY_DETECTOR: "UNO" 43 | 44 | OUTPUT_DIR: './M2F_SWIN-L_cityscapes+vistas_bs_12_2k_uno' 45 | 46 | -------------------------------------------------------------------------------- /denseflow/nn/layers/activations_factory.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from denseflow.nn.layers import GELU, Swish, ConcatReLU, ConcatELU 4 | 5 | act_strs = {'elu', 'relu', 'gelu', 'swish'} 6 | concat_act_strs = {'concat_elu', 'concat_relu'} 7 | 8 | 9 | def act_module(act_str, allow_concat=False): 10 | if allow_concat: assert act_str in act_strs + concat_act_strs, 'Got invalid activation {}'.format(act_str) 11 | else: assert act_str in act_strs, 'Got invalid activation {}'.format(act_str) 12 | if act_str == 'relu': return nn.ReLU() 13 | elif act_str == 'elu': return nn.ELU() 14 | elif act_str == 'gelu': return GELU() 15 | elif act_str == 'swish': return Swish() 16 | elif act_str == 'concat_relu': return ConcatReLU() 17 | elif act_str == 'concat_elu': return ConcatELU() 18 | 19 | 20 | def act_factor(act_str, allow_concat=False): 21 | if allow_concat: assert act_str in act_strs + concat_act_strs, 'Got invalid activation {}'.format(act_str) 22 | else: assert act_str in act_strs, 'Got invalid activation {}'.format(act_str) 23 | if act_str == 'relu': return 1 24 | elif act_str == 'elu': return 1 25 | elif act_str == 'gelu': return 1 26 | elif act_str == 'swish': return 1 27 | elif act_str == 'concat_relu': return 2 28 | elif act_str == 'concat_elu': return 2 29 | -------------------------------------------------------------------------------- /denseflow/distributions/conditional/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from denseflow.distributions import Distribution 4 | 5 | 6 | class ConditionalDistribution(Distribution): 7 | """ConditionalDistribution base class""" 8 | 9 | def log_prob(self, x, context): 10 | """Calculate log probability under the distribution. 11 | 12 | Args: 13 | x: Tensor, shape (batch_size, ...). 14 | context: Tensor, shape (batch_size, ...). 15 | 16 | Returns: 17 | log_prob: Tensor, shape (batch_size,) 18 | """ 19 | raise NotImplementedError() 20 | 21 | def sample(self, context): 22 | """Generates samples from the distribution. 23 | 24 | Args: 25 | context: Tensor, shape (batch_size, ...). 26 | 27 | Returns: 28 | samples: Tensor, shape (batch_size, ...). 29 | """ 30 | raise NotImplementedError() 31 | 32 | def sample_with_log_prob(self, context): 33 | """Generates samples from the distribution together with their log probability. 34 | 35 | Args: 36 | context: Tensor, shape (batch_size, ...). 37 | 38 | Returns:: 39 | samples: Tensor, shape (batch_size, ...). 40 | log_prob: Tensor, shape (batch_size,) 41 | """ 42 | raise NotImplementedError() 43 | -------------------------------------------------------------------------------- /denseflow/nn/nets/autoregressive/pixelcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from denseflow.nn.layers import LambdaLayer, ElementwiseParams2d 6 | from denseflow.nn.layers.autoregressive import MaskedConv2d 7 | from denseflow.nn.blocks.autoregressive import MaskedResidualBlock2d 8 | 9 | 10 | class PixelCNN(nn.Sequential): 11 | '''PixelCNN (van den Oord et al., 2016) (https://arxiv.org/abs/1601.06759).''' 12 | 13 | def __init__(self, in_channels, num_params, filters=128, num_blocks=15, output_filters=1024, kernel_size=3, kernel_size_in=7, init_transforms=lambda x: 2*x-1): 14 | 15 | layers = [LambdaLayer(init_transforms)] +\ 16 | [MaskedConv2d(in_channels, 2 * filters, kernel_size=kernel_size_in, padding=kernel_size_in//2, mask_type='A', data_channels=in_channels)] +\ 17 | [MaskedResidualBlock2d(filters, data_channels=in_channels, kernel_size=kernel_size) for _ in range(num_blocks)] +\ 18 | [nn.ReLU(True), MaskedConv2d(2 * filters, output_filters, kernel_size=1, mask_type='B', data_channels=in_channels)] +\ 19 | [nn.ReLU(True), MaskedConv2d(output_filters, num_params * in_channels, kernel_size=1, mask_type='B', data_channels=in_channels)] +\ 20 | [ElementwiseParams2d(num_params)] 21 | 22 | super(PixelCNN, self).__init__(*layers) 23 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs12_2k_city+vistas_uno_synthetic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../maskformer2_R50_bs16_90k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "MaskFormerJointFlow" 4 | BACKBONE: 5 | NAME: "D2SwinTransformer" 6 | SWIN: 7 | EMBED_DIM: 192 8 | DEPTHS: [2, 2, 18, 2] 9 | NUM_HEADS: [6, 12, 24, 48] 10 | WINDOW_SIZE: 12 11 | APE: False 12 | DROP_PATH_RATE: 0.3 13 | PATCH_NORM: True 14 | PRETRAIN_IMG_SIZE: 384 15 | WEIGHTS: "model_weights/model_0114999_SWIN-L_city_vistas.pth" 16 | 17 | PIXEL_MEAN: [123.675, 116.280, 103.530] 18 | PIXEL_STD: [58.395, 57.120, 57.375] 19 | MASK_FORMER: 20 | NUM_OBJECT_QUERIES: 100 21 | SEM_SEG_HEAD: 22 | NUM_CLASSES: 20 # K+1+no_object 23 | 24 | INPUT: 25 | DATASET_MAPPER_NAME: "mask_former_semantic_traffic" 26 | SOLVER: 27 | CHECKPOINT_PERIOD: 1000 28 | IMS_PER_BATCH: 12 29 | BASE_LR: 0.00001 30 | MAX_ITER: 2000 31 | WARMUP_FACTOR: 1.0 32 | WARMUP_ITERS: 0 33 | WEIGHT_DECAY: 0.05 34 | 35 | TEST: 36 | EVAL_PERIOD: 1000 37 | 38 | DATASETS: 39 | TRAIN: ("cityscapes_fine_sem_seg_train", "mapillary_vistas_sem_seg_train") 40 | TEST: ("cityscapes_fine_sem_seg_val",) 41 | # TEST: ("fs_laf_val", "fs_static_val", "road_anomaly") 42 | 43 | ANOMALY_DETECTOR: "UNO" 44 | 45 | OUTPUT_DIR: './M2F_SWIN-L_cityscapes+vistas_bs_12_2k_uno_synthetic' 46 | -------------------------------------------------------------------------------- /denseflow/distributions/conditional/categorical.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.distributions import Categorical 3 | from denseflow.distributions.conditional import ConditionalDistribution 4 | from denseflow.utils import sum_except_batch 5 | 6 | 7 | class ConditionalCategorical(ConditionalDistribution): 8 | """A Categorical distribution with conditional logits.""" 9 | 10 | def __init__(self, net): 11 | super(ConditionalCategorical, self).__init__() 12 | self.net = net 13 | 14 | def cond_dist(self, context): 15 | logits = self.net(context) 16 | return Categorical(logits=logits) 17 | 18 | def log_prob(self, x, context): 19 | dist = self.cond_dist(context) 20 | return sum_except_batch(dist.log_prob(x)) 21 | 22 | def sample(self, context): 23 | dist = self.cond_dist(context) 24 | return dist.sample() 25 | 26 | def sample_with_log_prob(self, context): 27 | dist = self.cond_dist(context) 28 | z = dist.sample() 29 | log_prob = dist.log_prob(z) 30 | log_prob = sum_except_batch(log_prob) 31 | return z, log_prob 32 | 33 | def logits(self, context): 34 | return self.cond_dist(context).logits 35 | 36 | def probs(self, context): 37 | return self.cond_dist(context).probs 38 | 39 | def mode(self, context): 40 | return self.cond_dist(context).logits.argmax(-1) 41 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/orth_squeeze.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from denseflow.transforms.bijections import Bijection 4 | 5 | 6 | class OrthogonalSqueeze2d(Bijection): 7 | def __init__(self, in_channels): 8 | super(OrthogonalSqueeze2d, self).__init__() 9 | self.in_channels = in_channels 10 | 11 | self.weight = torch.nn.Parameter(self._initialize_kernels()) 12 | 13 | def _initialize_kernels(self): 14 | kernels = [] 15 | for _ in range(self.in_channels): 16 | w = torch.empty(4, 4) 17 | torch.nn.init.orthogonal_(w) 18 | kernels.append(w.reshape(4, 1, 2, 2)) 19 | return torch.cat(kernels, dim=0) 20 | 21 | def forward(self, x): 22 | z = F.conv2d(x, self.weight, bias=None, stride=2, groups=self.in_channels) 23 | ldj = torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 24 | return z, ldj 25 | 26 | def inverse(self, z): 27 | x = F.conv_transpose2d(z, self.weight, bias=None, stride=2, groups=self.in_channels) 28 | return x 29 | 30 | def compute_regularization(self): 31 | total = 0. 32 | kernels = torch.chunk(self.weight, self.in_channels, dim=0) 33 | for kernel in kernels: 34 | w = kernel.reshape(4, 4) 35 | total += (torch.matmul(w, w.T) - torch.eye(4).to(w)).abs().mean() 36 | return total / len(kernels) -------------------------------------------------------------------------------- /denseflow/transforms/bijections/unsqueeze.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Squeeze2d 3 | 4 | 5 | class Unsqueeze2d(Squeeze2d): 6 | """ 7 | A bijection defined for image data that trades channel dimensions for spatial 8 | dimensions, i.e. "unsqueezes" the inputs along the channel dimensions. 9 | Introduced in the RealNVP paper [1]. 10 | 11 | Args: 12 | factor: int, the factor to squeeze by (default=2). 13 | ordered: bool, if True, squeezing happens imagewise. 14 | if False, squeezing happens channelwise. 15 | For more details, see example (default=False). 16 | 17 | Source implementation: 18 | Based on `squeeze_nxn`, `squeeze_2x2`, `squeeze_2x2_ordered`, `unsqueeze_2x2` in: 19 | https://github.com/laurent-dinh/models/blob/master/real_nvp/real_nvp_utils.py 20 | 21 | References: 22 | [1] Density estimation using Real NVP, 23 | Dinh et al., 2017, https://arxiv.org/abs/1605.08803 24 | """ 25 | 26 | def __init__(self, factor=2, ordered=False): 27 | super(Unsqueeze2d, self).__init__(factor=factor, ordered=ordered) 28 | 29 | def forward(self, x): 30 | z = self._unsqueeze(x) 31 | ldj = torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 32 | return z, ldj 33 | 34 | def inverse(self, z): 35 | x = self._squeeze(z) 36 | return x 37 | -------------------------------------------------------------------------------- /denseflow/transforms/stochastic/permutation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.stochastic import StochasticTransform 3 | 4 | 5 | class StochasticPermutation(StochasticTransform): 6 | '''A stochastic permutation layer.''' 7 | 8 | def __init__(self, dim=1): 9 | super(StochasticPermutation, self).__init__() 10 | self.register_buffer('buffer', torch.zeros(1)) 11 | self.dim = dim 12 | 13 | def forward(self, x): 14 | rand = torch.rand(x.shape[0], x.shape[self.dim], device=x.device) 15 | permutation = rand.argsort(dim=1) 16 | for d in range(1, self.dim): 17 | permutation = permutation.unsqueeze(1) 18 | for d in range(self.dim+1, x.dim()): 19 | permutation = permutation.unsqueeze(-1) 20 | permutation = permutation.expand_as(x) 21 | z = torch.gather(x, self.dim, permutation) 22 | ldj = self.buffer.new_zeros(x.shape[0]) 23 | return z, ldj 24 | 25 | def inverse(self, z): 26 | rand = torch.rand(z.shape[0], z.shape[self.dim], device=z.device) 27 | permutation = rand.argsort(dim=1) 28 | for d in range(1, self.dim): 29 | permutation = permutation.unsqueeze(1) 30 | for d in range(self.dim+1, z.dim()): 31 | permutation = permutation.unsqueeze(-1) 32 | permutation = permutation.expand_as(z) 33 | x = torch.gather(z, self.dim, permutation) 34 | return x 35 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #pragma once 17 | #include 18 | 19 | at::Tensor ms_deform_attn_cuda_forward( 20 | const at::Tensor &value, 21 | const at::Tensor &spatial_shapes, 22 | const at::Tensor &level_start_index, 23 | const at::Tensor &sampling_loc, 24 | const at::Tensor &attn_weight, 25 | const int im2col_step); 26 | 27 | std::vector ms_deform_attn_cuda_backward( 28 | const at::Tensor &value, 29 | const at::Tensor &spatial_shapes, 30 | const at::Tensor &level_start_index, 31 | const at::Tensor &sampling_loc, 32 | const at::Tensor &attn_weight, 33 | const at::Tensor &grad_output, 34 | const int im2col_step); 35 | 36 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #pragma once 17 | #include 18 | 19 | at::Tensor 20 | ms_deform_attn_cpu_forward( 21 | const at::Tensor &value, 22 | const at::Tensor &spatial_shapes, 23 | const at::Tensor &level_start_index, 24 | const at::Tensor &sampling_loc, 25 | const at::Tensor &attn_weight, 26 | const int im2col_step); 27 | 28 | std::vector 29 | ms_deform_attn_cpu_backward( 30 | const at::Tensor &value, 31 | const at::Tensor &spatial_shapes, 32 | const at::Tensor &level_start_index, 33 | const at::Tensor &sampling_loc, 34 | const at::Tensor &attn_weight, 35 | const at::Tensor &grad_output, 36 | const int im2col_step); 37 | 38 | 39 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/maskformer2_R50_bs16_90k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-Cityscapes-SemanticSegmentation.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "MaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "MaskFormerHead" 6 | IGNORE_VALUE: 255 7 | NUM_CLASSES: 19 8 | LOSS_WEIGHT: 1.0 9 | CONVS_DIM: 256 10 | MASK_DIM: 256 11 | NORM: "GN" 12 | # pixel decoder 13 | PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" 14 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 15 | DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] 16 | COMMON_STRIDE: 4 17 | TRANSFORMER_ENC_LAYERS: 6 18 | MASK_FORMER: 19 | TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder" 20 | TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" 21 | DEEP_SUPERVISION: True 22 | NO_OBJECT_WEIGHT: 0.1 23 | CLASS_WEIGHT: 2.0 24 | MASK_WEIGHT: 5.0 25 | DICE_WEIGHT: 5.0 26 | HIDDEN_DIM: 256 27 | NUM_OBJECT_QUERIES: 100 28 | NHEADS: 8 29 | DROPOUT: 0.0 30 | DIM_FEEDFORWARD: 2048 31 | ENC_LAYERS: 0 32 | PRE_NORM: False 33 | ENFORCE_INPUT_PROJ: False 34 | SIZE_DIVISIBILITY: 32 35 | DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query 36 | TRAIN_NUM_POINTS: 12544 37 | OVERSAMPLE_RATIO: 3.0 38 | IMPORTANCE_SAMPLE_RATIO: 0.75 39 | TEST: 40 | SEMANTIC_ON: True 41 | INSTANCE_ON: False 42 | PANOPTIC_ON: False 43 | OVERLAP_THRESHOLD: 0.8 44 | OBJECT_MASK_THRESHOLD: 0.8 45 | -------------------------------------------------------------------------------- /denseflow/distributions/conditional/bernoulli.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.distributions import Bernoulli 3 | from denseflow.distributions.conditional import ConditionalDistribution 4 | from denseflow.utils import sum_except_batch 5 | 6 | 7 | class ConditionalBernoulli(ConditionalDistribution): 8 | """A Bernoulli distribution with conditional logits.""" 9 | 10 | def __init__(self, net): 11 | super(ConditionalBernoulli, self).__init__() 12 | self.net = net 13 | 14 | def cond_dist(self, context): 15 | logits = self.net(context) 16 | return Bernoulli(logits=logits) 17 | 18 | def log_prob(self, x, context): 19 | dist = self.cond_dist(context) 20 | return sum_except_batch(dist.log_prob(x.float())) 21 | 22 | def sample(self, context): 23 | dist = self.cond_dist(context) 24 | return dist.sample().long() 25 | 26 | def sample_with_log_prob(self, context): 27 | dist = self.cond_dist(context) 28 | z = dist.sample() 29 | log_prob = dist.log_prob(z) 30 | log_prob = sum_except_batch(log_prob) 31 | return z.long(), log_prob 32 | 33 | def logits(self, context): 34 | return self.cond_dist(context).logits 35 | 36 | def probs(self, context): 37 | return self.cond_dist(context).probs 38 | 39 | def mean(self, context): 40 | return self.cond_dist(context).mean 41 | 42 | def mode(self, context): 43 | return (self.cond_dist(context).logits>=0).long() 44 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/affine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Bijection 3 | 4 | 5 | class ScalarAffineBijection(Bijection): 6 | """ 7 | Computes `z = shift + scale * x`, where `scale` and `shift` are scalars, and `scale` is non-zero. 8 | """ 9 | 10 | def __init__(self, shift=None, scale=None): 11 | super(ScalarAffineBijection, self).__init__() 12 | assert isinstance(shift, float) or shift is None, 'shift must be a float or None' 13 | assert isinstance(scale, float) or scale is None, 'scale must be a float or None' 14 | 15 | if shift is None and scale is None: 16 | raise ValueError('At least one of scale and shift must be provided.') 17 | if scale == 0.: 18 | raise ValueError('Scale` cannot be zero.') 19 | 20 | self.register_buffer('_shift', torch.tensor(shift if (shift is not None) else 0.)) 21 | self.register_buffer('_scale', torch.tensor(scale if (scale is not None) else 1.)) 22 | 23 | @property 24 | def _log_scale(self): 25 | return torch.log(torch.abs(self._scale)) 26 | 27 | def forward(self, x): 28 | batch_size = x.shape[0] 29 | num_dims = x.shape[1:].numel() 30 | z = x * self._scale + self._shift 31 | ldj = torch.full([batch_size], self._log_scale * num_dims, device=x.device, dtype=x.dtype) 32 | return z, ldj 33 | 34 | def inverse(self, z): 35 | batch_size = z.shape[0] 36 | num_dims = z.shape[1:].numel() 37 | x = (z - self._shift) / self._scale 38 | return x 39 | -------------------------------------------------------------------------------- /denseflow/nn/blocks/autoregressive/masked_residual_block_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from denseflow.nn.layers.autoregressive import MaskedConv2d, SpatialMaskedConv2d 5 | 6 | 7 | class MaskedResidualBlock2d(nn.Module): 8 | 9 | def __init__(self, h, kernel_size=3, data_channels=3): 10 | super(MaskedResidualBlock2d, self).__init__() 11 | 12 | self.conv1 = MaskedConv2d(2 * h, h, kernel_size=1, mask_type='B', data_channels=data_channels) 13 | self.conv2 = MaskedConv2d(h, h, kernel_size=kernel_size, padding=kernel_size//2, mask_type='B', data_channels=data_channels) 14 | self.conv3 = MaskedConv2d(h, 2 * h, kernel_size=1, mask_type='B', data_channels=data_channels) 15 | 16 | def forward(self, x): 17 | identity = x 18 | 19 | x = self.conv1(F.relu(x)) 20 | x = self.conv2(F.relu(x)) 21 | x = self.conv3(F.relu(x)) 22 | 23 | return x + identity 24 | 25 | 26 | class SpatialMaskedResidualBlock2d(nn.Module): 27 | 28 | def __init__(self, h, kernel_size=3): 29 | super(SpatialMaskedResidualBlock2d, self).__init__() 30 | 31 | self.conv1 = nn.Conv2d(2 * h, h, kernel_size=1) 32 | self.conv2 = SpatialMaskedConv2d(h, h, kernel_size=kernel_size, padding=kernel_size//2, mask_type='B') 33 | self.conv3 = nn.Conv2d(h, 2 * h, kernel_size=1) 34 | 35 | def forward(self, x): 36 | identity = x 37 | 38 | x = self.conv1(F.relu(x)) 39 | x = self.conv2(F.relu(x)) 40 | x = self.conv3(F.relu(x)) 41 | 42 | return x + identity 43 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/slice.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.distributions import ConditionalDistribution 4 | from denseflow.transforms.surjections import Surjection 5 | 6 | 7 | class Slice(Surjection): 8 | ''' 9 | A simple slice layer which factors out some elements and returns 10 | the remaining elements for further transformation. 11 | This is useful for constructing multi-scale architectures [1]. 12 | 13 | References: 14 | [1] Density estimation using Real NVP, 15 | Dinh et al., 2017, https://arxiv.org/abs/1605.08803 16 | ''' 17 | 18 | stochastic_forward = False 19 | 20 | def __init__(self, decoder, num_keep, dim=1): 21 | super(Slice, self).__init__() 22 | assert dim >= 1 23 | self.decoder = decoder 24 | self.dim = dim 25 | self.num_keep = num_keep 26 | self.cond = isinstance(self.decoder, ConditionalDistribution) 27 | 28 | def split_input(self, input): 29 | split_proportions = (self.num_keep, input.shape[self.dim] - self.num_keep) 30 | return torch.split(input, split_proportions, dim=self.dim) 31 | 32 | def forward(self, x): 33 | z, x2 = self.split_input(x) 34 | if self.cond: ldj = self.decoder.log_prob(x2, context=z) 35 | else: ldj = self.decoder.log_prob(x2) 36 | return z, ldj 37 | 38 | def inverse(self, z): 39 | if self.cond: x2 = self.decoder.sample(context=z) 40 | else: x2 = self.decoder.sample(num_samples=z.shape) 41 | x = torch.cat([z, x2], dim=self.dim) 42 | return x 43 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | 22 | at::Tensor 23 | ms_deform_attn_cpu_forward( 24 | const at::Tensor &value, 25 | const at::Tensor &spatial_shapes, 26 | const at::Tensor &level_start_index, 27 | const at::Tensor &sampling_loc, 28 | const at::Tensor &attn_weight, 29 | const int im2col_step) 30 | { 31 | AT_ERROR("Not implement on cpu"); 32 | } 33 | 34 | std::vector 35 | ms_deform_attn_cpu_backward( 36 | const at::Tensor &value, 37 | const at::Tensor &spatial_shapes, 38 | const at::Tensor &level_start_index, 39 | const at::Tensor &sampling_loc, 40 | const at::Tensor &attn_weight, 41 | const at::Tensor &grad_output, 42 | const int im2col_step) 43 | { 44 | AT_ERROR("Not implement on cpu"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /mask2former/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import data # register all new datasets 3 | from . import modeling 4 | 5 | # config 6 | from .config import add_maskformer2_config 7 | 8 | # dataset loading 9 | from .data.dataset_mappers.coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper 10 | from .data.dataset_mappers.coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper 11 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper_traffic_oe import MaskFormerSemanticDatasetMapperTrafficWithOE 12 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper_traffic_uno import MaskFormerSemanticDatasetMapperWithUNO 13 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper_traffic import MaskFormerSemanticDatasetMapperTraffic 14 | 15 | from .data.dataset_mappers.mask_former_instance_dataset_mapper import ( 16 | MaskFormerInstanceDatasetMapper, 17 | ) 18 | from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import ( 19 | MaskFormerPanopticDatasetMapper, 20 | ) 21 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper import ( 22 | MaskFormerSemanticDatasetMapper, 23 | ) 24 | 25 | # models 26 | from .maskformer_model import MaskFormer 27 | from .maskformer_model_joint_flow import MaskFormerJointFlow 28 | from .test_time_augmentation import SemanticSegmentorWithTTA 29 | 30 | # evaluation 31 | from .evaluation.instance_evaluation import InstanceSegEvaluator 32 | from .evaluation.ood_detection_evaluation import DenseOODDetectionEvaluator 33 | from .evaluation.ood_detection_evaluation_UNO import DenseOODDetectionEvaluatorUNO -------------------------------------------------------------------------------- /denseflow/transforms/cond_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from collections.abc import Iterable 4 | from denseflow.transforms import Transform 5 | 6 | 7 | class ConditionalTransform(Transform): 8 | """Base class for ConditionalTransform""" 9 | 10 | has_inverse = True 11 | 12 | @property 13 | def bijective(self): 14 | raise NotImplementedError() 15 | 16 | @property 17 | def stochastic_forward(self): 18 | raise NotImplementedError() 19 | 20 | @property 21 | def stochastic_inverse(self): 22 | raise NotImplementedError() 23 | 24 | @property 25 | def lower_bound(self): 26 | return self.stochastic_forward 27 | 28 | def forward(self, x, context): 29 | """ 30 | Forward transform. 31 | Computes `z = f(x|context)` and `log|det J|` for `J = df(x|context)/dx` 32 | such that `log p_x(x|context) = log p_z(f(x|context)) + log|det J|`. 33 | 34 | Args: 35 | x: Tensor, shape (batch_size, ...) 36 | context: Tensor, shape (batch_size, ...). 37 | 38 | Returns: 39 | z: Tensor, shape (batch_size, ...) 40 | ldj: Tensor, shape (batch_size,) 41 | """ 42 | raise NotImplementedError() 43 | 44 | def inverse(self, z, context): 45 | """ 46 | Inverse transform. 47 | Computes `x = f^{-1}(z|context)`. 48 | 49 | Args: 50 | z: Tensor, shape (batch_size, ...) 51 | context: Tensor, shape (batch_size, ...). 52 | 53 | Returns: 54 | x: Tensor, shape (batch_size, ...) 55 | """ 56 | raise NotImplementedError() 57 | -------------------------------------------------------------------------------- /tools/convert-torchvision-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import pickle as pkl 5 | import sys 6 | 7 | import torch 8 | 9 | """ 10 | Usage: 11 | # download one of the ResNet{18,34,50,101,152} models from torchvision: 12 | wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth 13 | # run the conversion 14 | ./convert-torchvision-to-d2.py r50.pth r50.pkl 15 | # Then, use r50.pkl with the following changes in config: 16 | MODEL: 17 | WEIGHTS: "/path/to/r50.pkl" 18 | PIXEL_MEAN: [123.675, 116.280, 103.530] 19 | PIXEL_STD: [58.395, 57.120, 57.375] 20 | RESNETS: 21 | DEPTH: 50 22 | STRIDE_IN_1X1: False 23 | INPUT: 24 | FORMAT: "RGB" 25 | """ 26 | 27 | if __name__ == "__main__": 28 | input = sys.argv[1] 29 | 30 | obj = torch.load(input, map_location="cpu") 31 | 32 | newmodel = {} 33 | for k in list(obj.keys()): 34 | old_k = k 35 | if "layer" not in k: 36 | k = "stem." + k 37 | for t in [1, 2, 3, 4]: 38 | k = k.replace("layer{}".format(t), "res{}".format(t + 1)) 39 | for t in [1, 2, 3]: 40 | k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) 41 | k = k.replace("downsample.0", "shortcut") 42 | k = k.replace("downsample.1", "shortcut.norm") 43 | print(old_k, "->", k) 44 | newmodel[k] = obj.pop(old_k).detach().numpy() 45 | 46 | res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} 47 | 48 | with open(sys.argv[2], "wb") as f: 49 | pkl.dump(res, f) 50 | if obj: 51 | print("Unconverted keys:", obj.keys()) 52 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/orth_squeeze_pgd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from denseflow.transforms.bijections import Bijection 4 | from denseflow.utils import orthogonalize_tensor 5 | 6 | class OrthogonalSqueeze2dPgd(Bijection): 7 | def __init__(self, in_channels): 8 | super(OrthogonalSqueeze2dPgd, self).__init__() 9 | self.in_channels = in_channels 10 | 11 | self.weight = torch.nn.Parameter(self._initialize_kernels()) 12 | 13 | def _initialize_kernels(self): 14 | kernels = [] 15 | for _ in range(self.in_channels): 16 | w = torch.empty(4, 4) 17 | torch.nn.init.orthogonal_(w) 18 | kernels.append(w.reshape(4, 1, 2, 2)) 19 | return torch.cat(kernels, dim=0) 20 | 21 | def forward(self, x): 22 | z = F.conv2d(x, self.weight, bias=None, stride=2, groups=self.in_channels) 23 | ldj = torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 24 | return z, ldj 25 | 26 | def inverse(self, z): 27 | x = F.conv_transpose2d(z, self.weight, bias=None, stride=2, groups=self.in_channels) 28 | return x 29 | 30 | def orthogonalize_kernel(self): 31 | with torch.no_grad(): 32 | updated_kernels = [] 33 | kernels = torch.chunk(self.weight, self.in_channels, dim=0) 34 | for kernel in kernels: 35 | # a = kernel.reshape(4, 4) 36 | # print((torch.matmul())) 37 | w = orthogonalize_tensor(kernel.reshape(4, 4)) 38 | updated_kernels.append(w.reshape(4, 1, 2, 2)) 39 | 40 | self.weight.data = torch.cat(updated_kernels, dim=0).to(self.weight) -------------------------------------------------------------------------------- /denseflow/flows/inverse_flow.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from collections.abc import Iterable 4 | from denseflow.distributions import Distribution 5 | from denseflow.transforms import Transform 6 | 7 | 8 | class InverseFlow(Distribution): 9 | """ 10 | Base class for InverseFlow. 11 | Inverse flows use the forward transforms to transform noise to samples. 12 | These are typically useful as variational distributions. 13 | Here, we are not interested in the log probability of novel samples. 14 | However, using .sample_with_log_prob(), samples can be obtained together 15 | with their log probability. 16 | """ 17 | 18 | def __init__(self, base_dist, transforms): 19 | super(InverseFlow, self).__init__() 20 | assert isinstance(base_dist, Distribution) 21 | if isinstance(transforms, Transform): transforms = [transforms] 22 | assert isinstance(transforms, Iterable) 23 | assert all(isinstance(transform, Transform) for transform in transforms) 24 | self.base_dist = base_dist 25 | self.transforms = nn.ModuleList(transforms) 26 | 27 | def log_prob(self, x): 28 | raise RuntimeError("InverseFlow does not support log_prob, see Flow instead.") 29 | 30 | def sample(self, num_samples): 31 | z = self.base_dist.sample(num_samples) 32 | for transform in self.transforms: 33 | z, _ = transform(z) 34 | return z 35 | 36 | def sample_with_log_prob(self, num_samples): 37 | z, log_prob = self.base_dist.sample_with_log_prob(num_samples) 38 | for transform in self.transforms: 39 | z, ldj = transform(z) 40 | log_prob -= ldj 41 | return z, log_prob 42 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/elementwise_linear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections.conditional import ConditionalBijection 3 | from denseflow.utils import sum_except_batch 4 | 5 | 6 | class ConditionalAdditiveBijection(ConditionalBijection): 7 | """ 8 | Computes `z = shift + x`, where `shift = net(context)`. 9 | """ 10 | 11 | def __init__(self, context_net): 12 | super(ConditionalAdditiveBijection, self).__init__() 13 | self.context_net = context_net 14 | 15 | def forward(self, x, context): 16 | z = x + self.context_net(context) 17 | ldj = torch.zeros(x.shape[0], dtype=x.dtype, device=x.device) 18 | return z, ldj 19 | 20 | def inverse(self, z, context): 21 | x = z - self.context_net(context) 22 | return x 23 | 24 | 25 | class ConditionalAffineBijection(ConditionalBijection): 26 | """ 27 | Computes `z = shift + scale * x`, where `shift, log_scale = net(context)`. 28 | """ 29 | 30 | def __init__(self, context_net, param_dim=1): 31 | super(ConditionalAffineBijection, self).__init__() 32 | self.context_net = context_net 33 | self.param_dim = param_dim 34 | 35 | def forward(self, x, context): 36 | params = self.context_net(context) 37 | mean, log_std = torch.chunk(params, chunks=2, dim=self.param_dim) 38 | z = mean + log_std.exp() * x 39 | ldj = sum_except_batch(log_std) 40 | return z, ldj 41 | 42 | def inverse(self, z, context): 43 | params = self.context_net(context) 44 | mean, log_std = torch.chunk(params, chunks=2, dim=self.param_dim) 45 | x = (z - mean) * torch.exp(-log_std) 46 | return x 47 | -------------------------------------------------------------------------------- /denseflow/nn/nets/matching/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from denseflow.nn.blocks import ResidualBlock 4 | import torch.utils.checkpoint as cp 5 | 6 | checkpoint = lambda func, inputs: cp.checkpoint(func, inputs, preserve_rng_state=True) 7 | 8 | def _checkpoint_dn(t): 9 | def func(x): 10 | return t(x) 11 | return func 12 | 13 | 14 | class ResNet(nn.Module): 15 | def __init__(self, in_channels, out_channels, num_blocks, 16 | mid_channels, dropout, 17 | gated_conv=False, zero_init=False, checkpointing=False): 18 | super(ResNet, self).__init__() 19 | 20 | layers = [nn.Conv2d(in_channels, mid_channels, kernel_size=1, padding=0)] +\ 21 | [ResidualBlock(in_channels=mid_channels, 22 | out_channels=mid_channels, 23 | dropout=dropout, 24 | gated_conv=gated_conv, 25 | zero_init=False) for _ in range(num_blocks)] +\ 26 | [nn.Conv2d(mid_channels, out_channels, kernel_size=1, padding=0)] 27 | 28 | if zero_init: 29 | nn.init.zeros_(layers[-1].weight) 30 | if hasattr(layers[-1], 'bias'): 31 | nn.init.zeros_(layers[-1].bias) 32 | 33 | self.transform = nn.Sequential(*layers) 34 | 35 | # super(DenseNet, self).__init__(*layers) 36 | self.checkpointing = checkpointing 37 | self.cp_func = _checkpoint_dn(self.transform) 38 | 39 | def forward(self, x): 40 | if self.training and self.checkpointing: 41 | return checkpoint(self.cp_func, x) 42 | else: 43 | return self.cp_func(x) -------------------------------------------------------------------------------- /tools/evaluate_coco_boundary_ap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # Modified by Bowen Cheng from: https://github.com/bowenc0221/boundary-iou-api/blob/master/tools/coco_instance_evaluation.py 4 | 5 | """ 6 | Evaluation for COCO val2017: 7 | python ./tools/coco_instance_evaluation.py \ 8 | --gt-json-file COCO_GT_JSON \ 9 | --dt-json-file COCO_DT_JSON 10 | """ 11 | import argparse 12 | import json 13 | 14 | from boundary_iou.coco_instance_api.coco import COCO 15 | from boundary_iou.coco_instance_api.cocoeval import COCOeval 16 | 17 | 18 | def main(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--gt-json-file", default="") 21 | parser.add_argument("--dt-json-file", default="") 22 | parser.add_argument("--iou-type", default="boundary") 23 | parser.add_argument("--dilation-ratio", default="0.020", type=float) 24 | args = parser.parse_args() 25 | print(args) 26 | 27 | annFile = args.gt_json_file 28 | resFile = args.dt_json_file 29 | dilation_ratio = args.dilation_ratio 30 | if args.iou_type == "boundary": 31 | get_boundary = True 32 | else: 33 | get_boundary = False 34 | cocoGt = COCO(annFile, get_boundary=get_boundary, dilation_ratio=dilation_ratio) 35 | 36 | # remove box predictions 37 | resFile = json.load(open(resFile)) 38 | for c in resFile: 39 | c.pop("bbox", None) 40 | 41 | cocoDt = cocoGt.loadRes(resFile) 42 | cocoEval = COCOeval(cocoGt, cocoDt, iouType=args.iou_type, dilation_ratio=dilation_ratio) 43 | cocoEval.evaluate() 44 | cocoEval.accumulate() 45 | cocoEval.summarize() 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /denseflow/nn/layers/encoding/positional_encoding_image.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class PositionalEncodingImage(nn.Module): 7 | ''' 8 | Learning positional embeddings for images. 9 | Embeddings for channel, height and width are added to form the full positional embedding. 10 | These encodings correspond to the ones from Sparse Transformers (https://arxiv.org/abs/1904.10509). 11 | 12 | Args: 13 | image_shape: Iterable, the shape of the image. 14 | embedding_dim: int, the size of each embedding vector. 15 | ''' 16 | 17 | def __init__(self, image_shape, embedding_dim): 18 | super(PositionalEncodingImage, self).__init__() 19 | assert len(image_shape) == 3, 'image_shape should have length 3: (C,H,W)' 20 | self.image_shape = image_shape 21 | self.embedding_dim = embedding_dim 22 | 23 | c, h, w = image_shape 24 | self.encode_c = nn.Parameter(torch.Tensor(1, c, 1, 1, embedding_dim)) 25 | self.encode_h = nn.Parameter(torch.Tensor(1, 1, h, 1, embedding_dim)) 26 | self.encode_w = nn.Parameter(torch.Tensor(1, 1, 1, w, embedding_dim)) 27 | self.reset_parameters() 28 | 29 | def reset_parameters(self): 30 | # Initialize position embedding layers to N(0, 0.125/sqrt(3*d)) 31 | # as described in paragraph 3 of Section "6. Training": 32 | nn.init.normal_(self.encode_c, std=0.125/math.sqrt(3*self.embedding_dim)) 33 | nn.init.normal_(self.encode_h, std=0.125/math.sqrt(3*self.embedding_dim)) 34 | nn.init.normal_(self.encode_w, std=0.125/math.sqrt(3*self.embedding_dim)) 35 | 36 | def forward(self, x): 37 | return x + self.encode_c + self.encode_h + self.encode_w 38 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to maskformer2 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis. 7 | 8 | ## Pull Requests 9 | We actively welcome your pull requests. 10 | 11 | 1. Fork the repo and create your branch from `main`. 12 | 2. If you've added code that should be tested, add tests. 13 | 3. If you've changed APIs, update the documentation. 14 | 4. Ensure the test suite passes. 15 | 5. Make sure your code lints. 16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 17 | 18 | ## Contributor License Agreement ("CLA") 19 | In order to accept your pull request, we need you to submit a CLA. You only need 20 | to do this once to work on any of Facebook's open source projects. 21 | 22 | Complete your CLA here: 23 | 24 | ## Issues 25 | We use GitHub issues to track public bugs. Please ensure your description is 26 | clear and has sufficient instructions to be able to reproduce the issue. 27 | 28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 29 | disclosure of security bugs. In those cases, please go through the process 30 | outlined on that page and do not file a public issue. 31 | 32 | ## Coding Style 33 | * 4 spaces for indentation rather than tabs 34 | * 80 character line length 35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/) 36 | 37 | ## License 38 | By contributing to MaskFormer, you agree that your contributions will be licensed 39 | under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/augment.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.distributions import ConditionalDistribution 4 | from denseflow.transforms.surjections import Surjection 5 | 6 | 7 | class Augment(Surjection): 8 | ''' 9 | A simple augmentation layer which augments the input with additional elements. 10 | This is useful for constructing augmented normalizing flows [1, 2]. 11 | 12 | References: 13 | [1] Augmented Normalizing Flows: Bridging the Gap Between Generative Flows and Latent Variable Models, 14 | Huang et al., 2020, https://arxiv.org/abs/2002.07101 15 | [2] VFlow: More Expressive Generative Flows with Variational Data Augmentation, 16 | Chen et al., 2020, https://arxiv.org/abs/2002.09741 17 | ''' 18 | stochastic_forward = True 19 | 20 | def __init__(self, encoder, x_size, split_dim=1): 21 | super(Augment, self).__init__() 22 | assert split_dim >= 1 23 | self.encoder = encoder 24 | self.split_dim = split_dim 25 | self.x_size = x_size 26 | self.cond = isinstance(self.encoder, ConditionalDistribution) 27 | 28 | def split_z(self, z): 29 | split_proportions = (self.x_size, z.shape[self.split_dim] - self.x_size) 30 | return torch.split(z, split_proportions, dim=self.split_dim) 31 | 32 | def forward(self, x): 33 | if self.cond: z2, logqz2 = self.encoder.sample_with_log_prob(context=x) 34 | else: z2, logqz2 = self.encoder.sample_with_log_prob(num_samples=x.shape[0]) 35 | z = torch.cat([x, z2], dim=self.split_dim) 36 | ldj = -logqz2 37 | return z, ldj 38 | 39 | def inverse(self, z): 40 | x, z2 = self.split_z(z) 41 | return x 42 | -------------------------------------------------------------------------------- /configs/cityscapes/semantic-segmentation/Base-Cityscapes-SemanticSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | BACKBONE: 3 | FREEZE_AT: 0 4 | NAME: "build_resnet_backbone" 5 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 6 | PIXEL_MEAN: [123.675, 116.280, 103.530] 7 | PIXEL_STD: [58.395, 57.120, 57.375] 8 | RESNETS: 9 | DEPTH: 50 10 | STEM_TYPE: "basic" # not used 11 | STEM_OUT_CHANNELS: 64 12 | STRIDE_IN_1X1: False 13 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 14 | NORM: "SyncBN" # use syncbn for cityscapes dataset 15 | RES5_MULTI_GRID: [1, 1, 1] # not used 16 | DATASETS: 17 | TRAIN: ("cityscapes_fine_sem_seg_train",) 18 | TEST: ("cityscapes_fine_sem_seg_val",) 19 | SOLVER: 20 | IMS_PER_BATCH: 16 21 | BASE_LR: 0.0001 22 | MAX_ITER: 90000 23 | WARMUP_FACTOR: 1.0 24 | WARMUP_ITERS: 0 25 | WEIGHT_DECAY: 0.05 26 | OPTIMIZER: "ADAMW" 27 | LR_SCHEDULER_NAME: "WarmupPolyLR" 28 | BACKBONE_MULTIPLIER: 0.1 29 | CLIP_GRADIENTS: 30 | ENABLED: True 31 | CLIP_TYPE: "full_model" 32 | CLIP_VALUE: 0.01 33 | NORM_TYPE: 2.0 34 | AMP: 35 | ENABLED: True 36 | INPUT: 37 | MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 1024) for x in range(5, 21)]"] 38 | MIN_SIZE_TRAIN_SAMPLING: "choice" 39 | MIN_SIZE_TEST: 1024 40 | MAX_SIZE_TRAIN: 4096 41 | MAX_SIZE_TEST: 2048 42 | CROP: 43 | ENABLED: True 44 | TYPE: "absolute" 45 | SIZE: (512, 1024) 46 | SINGLE_CATEGORY_MAX_AREA: 1.0 47 | COLOR_AUG_SSD: True 48 | SIZE_DIVISIBILITY: -1 49 | FORMAT: "RGB" 50 | DATASET_MAPPER_NAME: "mask_former_semantic" 51 | TEST: 52 | EVAL_PERIOD: 5000 53 | AUG: 54 | ENABLED: False 55 | MIN_SIZES: [512, 768, 1024, 1280, 1536, 1792] 56 | MAX_SIZE: 4096 57 | FLIP: True 58 | DATALOADER: 59 | FILTER_EMPTY_ANNOTATIONS: True 60 | NUM_WORKERS: 4 61 | VERSION: 2 62 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | - Linux or macOS with Python ≥ 3.6 5 | - PyTorch ≥ 1.9 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. 6 | Install them together at [pytorch.org](https://pytorch.org) to make sure of this. Note, please check 7 | PyTorch version matches that is required by Detectron2. 8 | - Detectron2: follow [Detectron2 installation instructions](https://detectron2.readthedocs.io/tutorials/install.html). 9 | - OpenCV is optional but needed by demo and visualization 10 | - `pip install -r requirements.txt` 11 | 12 | ### CUDA kernel for MSDeformAttn 13 | After preparing the required environment, run the following command to compile CUDA kernel for MSDeformAttn: 14 | 15 | `CUDA_HOME` must be defined and points to the directory of the installed CUDA toolkit. 16 | 17 | ```bash 18 | cd mask2former/modeling/pixel_decoder/ops 19 | sh make.sh 20 | ``` 21 | 22 | #### Building on another system 23 | To build on a system that does not have a GPU device but provide the drivers: 24 | ```bash 25 | TORCH_CUDA_ARCH_LIST='8.0' FORCE_CUDA=1 python setup.py build install 26 | ``` 27 | 28 | ### Example conda environment setup 29 | ```bash 30 | conda create --name mask2former python=3.8 -y 31 | conda activate mask2former 32 | conda install pytorch==1.9.0 torchvision==0.10.0 cudatoolkit=11.1 -c pytorch -c nvidia 33 | pip install -U opencv-python 34 | 35 | # under your working directory 36 | git clone git@github.com:facebookresearch/detectron2.git 37 | cd detectron2 38 | pip install -e . 39 | pip install git+https://github.com/cocodataset/panopticapi.git 40 | pip install git+https://github.com/mcordts/cityscapesScripts.git 41 | 42 | cd .. 43 | git clone git@github.com:facebookresearch/Mask2Former.git 44 | cd Open-set-M2F 45 | pip install -r requirements.txt 46 | cd mask2former/modeling/pixel_decoder/ops 47 | sh make.sh 48 | ``` 49 | -------------------------------------------------------------------------------- /mask2former/data/datasets/register_road_anomaly.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | import glob 4 | from detectron2.data import DatasetCatalog, MetadataCatalog 5 | from detectron2.data.datasets import load_sem_seg 6 | 7 | SMIYC_SEM_SEG_CATEGORIES = [ 8 | { 9 | "color": [0, 0, 0], 10 | "instances": True, 11 | "readable": "Inliers", 12 | "name": "inliers", 13 | "evaluate": True, 14 | }, 15 | { 16 | "color": [255, 255, 255], 17 | "instances": True, 18 | "readable": "Outlier", 19 | "name": "outlier", 20 | "evaluate": True, 21 | } 22 | ] 23 | 24 | def _get_ra_meta(): 25 | stuff_classes = [k["readable"] for k in SMIYC_SEM_SEG_CATEGORIES if k["evaluate"]] 26 | stuff_colors = [k["color"] for k in SMIYC_SEM_SEG_CATEGORIES if k["evaluate"]] 27 | ret = { 28 | "stuff_classes": stuff_classes, 29 | "stuff_colors": stuff_colors, 30 | } 31 | return ret 32 | 33 | def load_road_anomaly(root): 34 | 35 | image_files = list(sorted(glob.glob(f"{root}/frames/*.jpg"))) 36 | examples = [] 37 | 38 | for im_file in image_files: 39 | examples.append({ 40 | "file_name": im_file, 41 | "sem_seg_file_name": im_file.replace('.jpg', '.labels/labels_semantic.png'), 42 | "height": 720, 43 | "width": 1280, 44 | }) 45 | return examples 46 | def register_road_anomaly(root): 47 | root = os.path.join(root, "road_anomaly") 48 | meta = _get_ra_meta() 49 | 50 | DatasetCatalog.register( 51 | "road_anomaly", lambda x=root: load_road_anomaly(x) 52 | ) 53 | 54 | MetadataCatalog.get("road_anomaly").set( 55 | evaluator_type="ood_detection", 56 | ignore_label=255, 57 | **meta, 58 | ) 59 | 60 | 61 | _root = os.getenv("DETECTRON2_DATASETS", "datasets") 62 | register_road_anomaly(_root) 63 | -------------------------------------------------------------------------------- /mask2former/data/datasets/register_fs_static.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | import glob 4 | from detectron2.data import DatasetCatalog, MetadataCatalog 5 | from detectron2.data.datasets import load_sem_seg 6 | 7 | FS_STATIC_SEM_SEG_CATEGORIES = [ 8 | { 9 | "color": [0, 0, 0], 10 | "instances": True, 11 | "readable": "Inliers", 12 | "name": "inliers", 13 | "evaluate": True, 14 | }, 15 | { 16 | "color": [255, 255, 255], 17 | "instances": True, 18 | "readable": "Outlier", 19 | "name": "outlier", 20 | "evaluate": True, 21 | } 22 | ] 23 | 24 | def _get_fs_static_meta(): 25 | stuff_classes = [k["readable"] for k in FS_STATIC_SEM_SEG_CATEGORIES if k["evaluate"]] 26 | stuff_colors = [k["color"] for k in FS_STATIC_SEM_SEG_CATEGORIES if k["evaluate"]] 27 | ret = { 28 | "stuff_classes": stuff_classes, 29 | "stuff_colors": stuff_colors, 30 | } 31 | return ret 32 | 33 | def load_fs_static_val(root): 34 | image_files = list(sorted(glob.glob(root + '/*.jpg'))) 35 | examples = [] 36 | 37 | for im_file in image_files: 38 | examples.append({ 39 | "file_name": im_file, 40 | "sem_seg_file_name": im_file.replace('_rgb.jpg', '_labels.png'), 41 | "height": 1024, 42 | "width": 2048, 43 | }) 44 | return examples 45 | 46 | 47 | def register_all_fs_static(root): 48 | root = os.path.join(root, "fs_static_val") 49 | meta = _get_fs_static_meta() 50 | 51 | DatasetCatalog.register( 52 | "fs_static_val", lambda x=root: load_fs_static_val(x) 53 | ) 54 | MetadataCatalog.get("fs_static_val").set( 55 | evaluator_type="ood_detection", 56 | ignore_label=255, 57 | **meta, 58 | ) 59 | 60 | 61 | _root = os.getenv("DETECTRON2_DATASETS", "datasets") 62 | register_all_fs_static(_root) 63 | -------------------------------------------------------------------------------- /datasets/ade20k_instance_catid_mapping.txt: -------------------------------------------------------------------------------- 1 | Instacne100 SceneParse150 FullADE20K 2 | 1 8 165 3 | 2 9 3055 4 | 3 11 350 5 | 4 13 1831 6 | 5 15 774 7 | 5 15 783 8 | 6 16 2684 9 | 7 19 687 10 | 8 20 471 11 | 9 21 401 12 | 10 23 1735 13 | 11 24 2473 14 | 12 25 2329 15 | 13 28 1564 16 | 14 31 57 17 | 15 32 2272 18 | 16 33 907 19 | 17 34 724 20 | 18 36 2985 21 | 18 36 533 22 | 19 37 1395 23 | 20 38 155 24 | 21 39 2053 25 | 22 40 689 26 | 23 42 266 27 | 24 43 581 28 | 25 44 2380 29 | 26 45 491 30 | 27 46 627 31 | 28 48 2388 32 | 29 50 943 33 | 30 51 2096 34 | 31 54 2530 35 | 32 56 420 36 | 33 57 1948 37 | 34 58 1869 38 | 35 59 2251 39 | 36 63 239 40 | 37 65 571 41 | 38 66 2793 42 | 39 67 978 43 | 40 68 236 44 | 41 70 181 45 | 42 71 629 46 | 43 72 2598 47 | 44 73 1744 48 | 45 74 1374 49 | 46 75 591 50 | 47 76 2679 51 | 48 77 223 52 | 49 79 47 53 | 50 81 327 54 | 51 82 2821 55 | 52 83 1451 56 | 53 84 2880 57 | 54 86 480 58 | 55 87 77 59 | 56 88 2616 60 | 57 89 246 61 | 57 89 247 62 | 58 90 2733 63 | 59 91 14 64 | 60 93 38 65 | 61 94 1936 66 | 62 96 120 67 | 63 98 1702 68 | 64 99 249 69 | 65 103 2928 70 | 66 104 2337 71 | 67 105 1023 72 | 68 108 2989 73 | 69 109 1930 74 | 70 111 2586 75 | 71 112 131 76 | 72 113 146 77 | 73 116 95 78 | 74 117 1563 79 | 75 119 1708 80 | 76 120 103 81 | 77 121 1002 82 | 78 122 2569 83 | 79 124 2833 84 | 80 125 1551 85 | 81 126 1981 86 | 82 127 29 87 | 83 128 187 88 | 84 130 747 89 | 85 131 2254 90 | 86 133 2262 91 | 87 134 1260 92 | 88 135 2243 93 | 89 136 2932 94 | 90 137 2836 95 | 91 138 2850 96 | 92 139 64 97 | 93 140 894 98 | 94 143 1919 99 | 95 144 1583 100 | 96 145 318 101 | 97 147 2046 102 | 98 148 1098 103 | 99 149 530 104 | 100 150 954 105 | -------------------------------------------------------------------------------- /denseflow/utils/tensors.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def sum_except_batch(x, num_dims=1): 4 | ''' 5 | Sums all dimensions except the first. 6 | 7 | Args: 8 | x: Tensor, shape (batch_size, ...) 9 | num_dims: int, number of batch dims (default=1) 10 | 11 | Returns: 12 | x_sum: Tensor, shape (batch_size,) 13 | ''' 14 | return x.reshape(*x.shape[:num_dims], -1).sum(-1) 15 | 16 | 17 | def mean_except_batch(x, num_dims=1): 18 | ''' 19 | Averages all dimensions except the first. 20 | 21 | Args: 22 | x: Tensor, shape (batch_size, ...) 23 | num_dims: int, number of batch dims (default=1) 24 | 25 | Returns: 26 | x_mean: Tensor, shape (batch_size,) 27 | ''' 28 | return x.reshape(*x.shape[:num_dims], -1).mean(-1) 29 | 30 | 31 | def split_leading_dim(x, shape): 32 | """Reshapes the leading dim of `x` to have the given shape.""" 33 | new_shape = torch.Size(shape) + x.shape[1:] 34 | return torch.reshape(x, new_shape) 35 | 36 | 37 | def merge_leading_dims(x, num_dims=2): 38 | """Reshapes the tensor `x` such that the first `num_dims` dimensions are merged to one.""" 39 | new_shape = torch.Size([-1]) + x.shape[num_dims:] 40 | return torch.reshape(x, new_shape) 41 | 42 | 43 | def repeat_rows(x, num_reps): 44 | """Each row of tensor `x` is repeated `num_reps` times along leading dimension.""" 45 | shape = x.shape 46 | x = x.unsqueeze(1) 47 | x = x.expand(shape[0], num_reps, *shape[1:]) 48 | return merge_leading_dims(x, num_dims=2) 49 | 50 | def orthogonalize_tensor(tensor): 51 | assert len(tensor.shape) == 2 52 | # flattened = tensor.new(rows, cols).normal_(0, 1) 53 | 54 | # Compute the qr factorization 55 | q, r = torch.qr(tensor) 56 | # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf 57 | d = torch.diag(r, 0) 58 | ph = d.sign() 59 | q *= ph 60 | tensor.view_as(q).copy_(q) 61 | return tensor -------------------------------------------------------------------------------- /mask2former/data/datasets/register_fs_laf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | import glob 4 | from detectron2.data import DatasetCatalog, MetadataCatalog 5 | from detectron2.data.datasets import load_sem_seg 6 | 7 | FS_LAF_SEM_SEG_CATEGORIES = [ 8 | { 9 | "color": [0, 0, 0], 10 | "instances": True, 11 | "readable": "Inliers", 12 | "name": "inliers", 13 | "evaluate": True, 14 | }, 15 | { 16 | "color": [255, 255, 255], 17 | "instances": True, 18 | "readable": "Outlier", 19 | "name": "outlier", 20 | "evaluate": True, 21 | } 22 | ] 23 | 24 | def _get_fs_laf_meta(): 25 | stuff_classes = [k["readable"] for k in FS_LAF_SEM_SEG_CATEGORIES if k["evaluate"]] 26 | stuff_colors = [k["color"] for k in FS_LAF_SEM_SEG_CATEGORIES if k["evaluate"]] 27 | ret = { 28 | "stuff_classes": stuff_classes, 29 | "stuff_colors": stuff_colors, 30 | } 31 | return ret 32 | 33 | def load_fs_laf_val(root): 34 | image_files = list(sorted(glob.glob(root + '/validation/leftImg8bit/*.png'))) 35 | examples = [] 36 | 37 | for im_file in image_files: 38 | examples.append({ 39 | "file_name": im_file, 40 | "sem_seg_file_name": im_file.replace('_leftImg8bit.png', '_labels.png').replace('leftImg8bit', 'gtFine'), 41 | "height": 1024, 42 | "width": 2048, 43 | }) 44 | return examples 45 | 46 | 47 | def register_all_fs_laf(root): 48 | root = os.path.join(root, "fs_lost_found") 49 | meta = _get_fs_laf_meta() 50 | 51 | DatasetCatalog.register( 52 | "fs_laf_val", lambda x=root: load_fs_laf_val(x) 53 | ) 54 | MetadataCatalog.get("fs_laf_val").set( 55 | evaluator_type="ood_detection", 56 | ignore_label=255, 57 | **meta, 58 | ) 59 | 60 | 61 | _root = os.getenv("DETECTRON2_DATASETS", "datasets") 62 | register_all_fs_laf(_root) 63 | -------------------------------------------------------------------------------- /denseflow/flows/flow.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from collections.abc import Iterable 5 | from denseflow.distributions import Distribution 6 | from denseflow.transforms import Transform 7 | 8 | 9 | class Flow(Distribution): 10 | """ 11 | Base class for Flow. 12 | Flows use the forward transforms to transform data to noise. 13 | The inverse transforms can subsequently be used for sampling. 14 | These are typically useful as generative models of data. 15 | """ 16 | 17 | def __init__(self, base_dist, transforms, coef=1.): 18 | super(Flow, self).__init__() 19 | assert isinstance(base_dist, Distribution) 20 | if isinstance(transforms, Transform): transforms = [transforms] 21 | assert isinstance(transforms, Iterable) 22 | assert all(isinstance(transform, Transform) for transform in transforms) 23 | self.base_dist = base_dist 24 | self.transforms = nn.ModuleList(transforms) 25 | self.lower_bound = any(transform.lower_bound for transform in transforms) 26 | self.coef = coef 27 | 28 | 29 | def log_prob(self, x, return_z=False): 30 | 31 | log_prob = torch.zeros(x.shape[0], device=x.device) 32 | for transform in self.transforms: 33 | x, ldj = transform(x) 34 | log_prob += ldj 35 | # log_prob = log_prob / self.base_dist.scale 36 | log_prob += self.base_dist.log_prob(x) 37 | log_prob = log_prob / self.coef 38 | 39 | if return_z: 40 | return x, log_prob 41 | return log_prob 42 | 43 | def sample(self, num_samples): 44 | z = self.base_dist.sample(num_samples) 45 | for transform in reversed(self.transforms): 46 | z = transform.inverse(z) 47 | return z 48 | 49 | def sample_with_log_prob(self, num_samples): 50 | raise RuntimeError("Flow does not support sample_with_log_prob, see InverseFlow instead.") 51 | -------------------------------------------------------------------------------- /denseflow/distributions/base.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class Distribution(nn.Module): 7 | """Distribution base class.""" 8 | 9 | def log_prob(self, x): 10 | """Calculate log probability under the distribution. 11 | 12 | Args: 13 | x: Tensor, shape (batch_size, ...) 14 | 15 | Returns: 16 | log_prob: Tensor, shape (batch_size,) 17 | """ 18 | raise NotImplementedError() 19 | 20 | def sample(self, num_samples): 21 | """Generates samples from the distribution. 22 | 23 | Args: 24 | num_samples: int, number of samples to generate. 25 | 26 | Returns: 27 | samples: Tensor, shape (num_samples, ...) 28 | """ 29 | raise NotImplementedError() 30 | 31 | def sample_with_log_prob(self, num_samples): 32 | """Generates samples from the distribution together with their log probability. 33 | 34 | Args: 35 | num_samples: int, number of samples to generate. 36 | 37 | Returns: 38 | samples: Tensor, shape (num_samples, ...) 39 | log_prob: Tensor, shape (num_samples,) 40 | """ 41 | samples = self.sample(num_samples) 42 | log_prob = self.log_prob(samples) 43 | return samples, log_prob 44 | 45 | def forward(self, *args, mode, **kwargs): 46 | ''' 47 | To allow Distribution objects to be wrapped by DataParallelDistribution, 48 | which parallelizes .forward() of replicas on subsets of data. 49 | 50 | DataParallelDistribution.log_prob() calls DataParallel.forward(). 51 | DataParallel.forward() calls Distribution.forward() for different 52 | data subsets on each device and returns the combined outputs. 53 | ''' 54 | if mode == 'log_prob': 55 | return self.log_prob(*args, **kwargs) 56 | else: 57 | raise RuntimeError("Mode {} not supported.".format(mode)) 58 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/gaussian_mixture.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.distributions import Normal 4 | from denseflow.transforms.bijections.functional.iterative_inversion import bisection_inverse 5 | 6 | 7 | def gaussian_mixture_transform(inputs, logit_weights, means, log_scales, eps=1e-10, max_iters=100, inverse=False): 8 | ''' 9 | Univariate mixture of Gaussians transform. 10 | 11 | Args: 12 | inputs: torch.Tensor, shape (shape,) 13 | logit_weights: torch.Tensor, shape (shape, num_mixtures) 14 | means: torch.Tensor, shape (shape, num_mixtures) 15 | log_scales: torch.Tensor, shape (shape, num_mixtures) 16 | eps: float, tolerance for bisection |f(x) - z_est| < eps 17 | max_iters: int, maximum iterations for bisection 18 | inverse: bool, if True, return inverse 19 | ''' 20 | 21 | log_weights = F.log_softmax(logit_weights, dim=-1) 22 | dist = Normal(means, log_scales.exp()) 23 | 24 | def mix_cdf(x): 25 | return torch.sum(log_weights.exp() * dist.cdf(x.unsqueeze(-1)), dim=-1) 26 | 27 | def mix_log_pdf(x): 28 | return torch.logsumexp(log_weights + dist.log_prob(x.unsqueeze(-1)), dim=-1) 29 | 30 | if inverse: 31 | max_scales = torch.sum(torch.exp(log_scales), dim=-1, keepdim=True) 32 | init_lower, _ = (means - 20 * max_scales).min(dim=-1) 33 | init_upper, _ = (means + 20 * max_scales).max(dim=-1) 34 | return bisection_inverse(fn=lambda x: mix_cdf(x), 35 | z=inputs, 36 | init_x=torch.zeros_like(inputs), 37 | init_lower=init_lower, 38 | init_upper=init_upper, 39 | eps=eps, 40 | max_iters=max_iters) 41 | else: 42 | z = mix_cdf(inputs) 43 | ldj = mix_log_pdf(inputs) 44 | return z, ldj 45 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/logistic_mixture_censored.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from denseflow.transforms.bijections.functional.iterative_inversion import bisection_inverse 4 | from denseflow.transforms.bijections.functional.mixtures.utils_logistic_censored import cmol_cdf, cmol_log_pdf 5 | 6 | 7 | def censored_logistic_mixture_transform(inputs, logit_weights, means, log_scales, num_bins=256, eps=1e-10, max_iters=100, inverse=False): 8 | ''' 9 | Censored univariate mixture of logistics transform. 10 | 11 | Args: 12 | inputs: torch.Tensor, shape (shape,) 13 | logit_weights: torch.Tensor, shape (shape, num_mixtures) 14 | means: torch.Tensor, shape (shape, num_mixtures) 15 | log_scales: torch.Tensor, shape (shape, num_mixtures) 16 | num_bins: int, the number of bins 17 | eps: float, tolerance for bisection |f(x) - z_est| < eps 18 | max_iters: int, maximum iterations for bisection 19 | inverse: bool, if True, return inverse 20 | ''' 21 | 22 | log_weights = F.log_softmax(logit_weights, dim=-1) 23 | log_scales = log_scales.clamp(min=-7.0) 24 | 25 | def mix_cdf(x): 26 | return torch.sum(log_weights.exp() * cmol_cdf(x, means, log_scales, num_bins), dim=-1) 27 | 28 | def mix_log_pdf(x): 29 | return torch.logsumexp(log_weights + cmol_log_pdf(x, means, log_scales, num_bins), dim=-1) 30 | 31 | if inverse: 32 | return bisection_inverse(fn=lambda x: mix_cdf(x), 33 | z=inputs, 34 | init_x=torch.ones_like(inputs) * 0.5, 35 | init_lower=torch.zeros_like(inputs), 36 | init_upper=torch.ones_like(inputs), 37 | eps=eps, 38 | max_iters=max_iters) 39 | else: 40 | z = mix_cdf(inputs) 41 | ldj = mix_log_pdf(inputs) 42 | return z, ldj 43 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/linear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from denseflow.transforms.bijections import Bijection 6 | 7 | 8 | class Linear(Bijection): 9 | """ 10 | Linear bijection y=Wx. 11 | 12 | Costs: 13 | forward = O(BD^2) 14 | inverse = O(BD^2 + D^3) 15 | ldj = O(D^3) 16 | where: 17 | B = batch size 18 | D = number of features 19 | 20 | Args: 21 | num_features: int, Number of features in the input and output. 22 | orthogonal_init: bool, if True initialize weights to be a random orthogonal matrix (default=True). 23 | bias: bool, if True a bias is included (default=False). 24 | """ 25 | def __init__(self, num_features, orthogonal_init=True, bias=False): 26 | super(Linear, self).__init__() 27 | self.num_features = num_features 28 | self.weight = nn.Parameter(torch.Tensor(num_features, num_features)) 29 | if bias: 30 | self.bias = nn.Parameter(torch.Tensor(num_features)) 31 | else: 32 | self.register_parameter('bias', None) 33 | self.reset_parameters(orthogonal_init) 34 | 35 | def reset_parameters(self, orthogonal_init): 36 | self.orthogonal_init = orthogonal_init 37 | 38 | if self.orthogonal_init: 39 | nn.init.orthogonal_(self.weight) 40 | else: 41 | bound = 1.0 / np.sqrt(self.num_features) 42 | nn.init.uniform_(self.weight, -bound, bound) 43 | 44 | if self.bias is not None: 45 | nn.init.zeros_(self.bias) 46 | 47 | def forward(self, x): 48 | z = F.linear(x, self.weight, self.bias) 49 | _, ldj = torch.slogdet(self.weight) 50 | ldj = ldj.expand([x.shape[0]]) 51 | return z, ldj 52 | 53 | def inverse(self, z): 54 | weight_inv = torch.inverse(self.weight) 55 | if self.bias is not None: z = z - self.bias 56 | x = F.linear(z, weight_inv) 57 | return x 58 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/dequantization_uniform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.surjections import Surjection 3 | 4 | 5 | class UniformDequantization(Surjection): 6 | ''' 7 | A uniform dequantization layer. 8 | This is useful for converting discrete variables to continuous [1, 2]. 9 | 10 | Forward: 11 | `z = (x+u)/K, u~Unif(0,1)^D` 12 | where `x` is discrete, `x \in {0,1,2,...,K-1}^D`. 13 | Inverse: 14 | `x = Quantize(z, K)` 15 | 16 | Args: 17 | num_bits: int, number of bits in quantization, 18 | i.e. 8 for `x \in {0,1,2,...,255}^D` 19 | or 5 for `x \in {0,1,2,...,31}^D`. 20 | 21 | References: 22 | [1] RNADE: The real-valued neural autoregressive density-estimator, 23 | Uria et al., 2013, https://arxiv.org/abs/1306.0186 24 | [2] Flow++: Improving Flow-Based Generative Models with Variational Dequantization and Architecture Design, 25 | Ho et al., 2019, https://arxiv.org/abs/1902.00275 26 | ''' 27 | 28 | stochastic_forward = True 29 | 30 | def __init__(self, num_bits=8, coef=1): 31 | super(UniformDequantization, self).__init__() 32 | self.num_bits = num_bits 33 | self.quantization_bins = 2**num_bits 34 | self.register_buffer('ldj_per_dim', -torch.log(torch.tensor(self.quantization_bins, dtype=torch.float))) 35 | self.coef = coef 36 | 37 | def _ldj(self, shape): 38 | batch_size = shape[0] 39 | num_dims = shape[1:].numel() 40 | ldj = self.ldj_per_dim * num_dims * self.coef 41 | return ldj.repeat(batch_size) 42 | 43 | def forward(self, x): 44 | u = torch.rand(x.shape, device=self.ldj_per_dim.device, dtype=self.ldj_per_dim.dtype) 45 | z = (x.type(u.dtype) + u) / self.quantization_bins 46 | ldj = self._ldj(z.shape) 47 | return z, ldj 48 | 49 | def inverse(self, z): 50 | z = self.quantization_bins * z 51 | return z.floor().clamp(min=0, max=self.quantization_bins-1).long() 52 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/logistic_mixture.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from denseflow.transforms.bijections.functional.iterative_inversion import bisection_inverse 4 | from denseflow.transforms.bijections.functional.mixtures.utils_logistic import logistic_log_cdf, logistic_log_pdf 5 | 6 | 7 | def logistic_mixture_transform(inputs, logit_weights, means, log_scales, eps=1e-10, max_iters=100, inverse=False): 8 | ''' 9 | Univariate mixture of logistics transform. 10 | 11 | Args: 12 | inputs: torch.Tensor, shape (shape,) 13 | logit_weights: torch.Tensor, shape (shape, num_mixtures) 14 | means: torch.Tensor, shape (shape, num_mixtures) 15 | log_scales: torch.Tensor, shape (shape, num_mixtures) 16 | eps: float, tolerance for bisection |f(x) - z_est| < eps 17 | max_iters: int, maximum iterations for bisection 18 | inverse: bool, if True, return inverse 19 | ''' 20 | 21 | log_weights = F.log_softmax(logit_weights, dim=-1) 22 | 23 | def mix_cdf(x): 24 | return torch.logsumexp(log_weights + logistic_log_cdf(x.unsqueeze(-1), means, log_scales), dim=-1).exp() 25 | 26 | def mix_log_pdf(x): 27 | return torch.logsumexp(log_weights + logistic_log_pdf(x.unsqueeze(-1), means, log_scales), dim=-1) 28 | 29 | if inverse: 30 | max_scales = torch.sum(torch.exp(log_scales), dim=-1, keepdim=True) 31 | init_lower, _ = (means - 20 * max_scales).min(dim=-1) 32 | init_upper, _ = (means + 20 * max_scales).max(dim=-1) 33 | return bisection_inverse(fn=lambda x: mix_cdf(x), 34 | z=inputs, 35 | init_x=torch.zeros_like(inputs), 36 | init_lower=init_lower, 37 | init_upper=init_upper, 38 | eps=eps, 39 | max_iters=max_iters) 40 | else: 41 | z = mix_cdf(inputs) 42 | ldj = mix_log_pdf(inputs) 43 | return z, ldj 44 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/rotate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Bijection 3 | 4 | 5 | class Rotate(Bijection): 6 | """ 7 | Rotates inputs 90, 180 or 270 degrees around given dimensions dim1 and dim2. 8 | For input with shape (B,C,H,W), dim1=2, dim2=3 will rotate on (H,W). 9 | 10 | Args: 11 | degrees: int, shape (dim_size) 12 | dim1: int, dimension 1 to permute 13 | dim2: int, dimension 2 to permute 14 | """ 15 | 16 | def __init__(self, degrees, dim1, dim2): 17 | super(Rotate, self).__init__() 18 | assert isinstance(degrees, int), 'degrees must be an integer' 19 | assert isinstance(dim1, int), 'dim1 must be an integer' 20 | assert isinstance(dim2, int), 'dim2 must be an integer' 21 | assert degrees in {90,180,270} 22 | assert dim1 != 0 23 | assert dim2 != 0 24 | assert dim1 != dim2 25 | 26 | self.degrees = degrees 27 | self.dim1 = dim1 28 | self.dim2 = dim2 29 | 30 | def _rotate90(self, x): 31 | return x.transpose(self.dim1, self.dim2).flip(self.dim1) 32 | 33 | def _rotate90_inv(self, z): 34 | return z.flip(self.dim1).transpose(self.dim1, self.dim2) 35 | 36 | def _rotate180(self, x): 37 | return x.flip(self.dim1).flip(self.dim2) 38 | 39 | def _rotate180_inv(self, z): 40 | return z.flip(self.dim2).flip(self.dim1) 41 | 42 | def _rotate270(self, x): 43 | return x.transpose(self.dim1, self.dim2).flip(self.dim2) 44 | 45 | def _rotate270_inv(self, z): 46 | return z.flip(self.dim2).transpose(self.dim1, self.dim2) 47 | 48 | def forward(self, x): 49 | if self.degrees == 90: x = self._rotate90(x) 50 | elif self.degrees == 180: x = self._rotate180(x) 51 | elif self.degrees == 270: x = self._rotate270(x) 52 | return x, torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 53 | 54 | def inverse(self, z): 55 | if self.degrees == 90: z = self._rotate90_inv(z) 56 | elif self.degrees == 180: z = self._rotate180_inv(z) 57 | elif self.degrees == 270: z = self._rotate270_inv(z) 58 | return z 59 | -------------------------------------------------------------------------------- /denseflow/dense_distribution.py: -------------------------------------------------------------------------------- 1 | from denseflow.distributions import Distribution 2 | from denseflow.utils import sum_except_batch 3 | import torch 4 | import math 5 | 6 | class StandardNormal(Distribution): 7 | """A multivariate Normal with zero mean and unit covariance.""" 8 | 9 | def __init__(self, shape, scale): 10 | super(StandardNormal, self).__init__() 11 | self.shape = torch.Size(shape) 12 | self.register_buffer('buffer', torch.zeros(1)) 13 | self.scale = scale 14 | 15 | def log_prob(self, x): 16 | log_base = - 0.5 * math.log(2 * math.pi) 17 | log_inner = - 0.5 * x**2 18 | return sum_except_batch(log_base+log_inner) / self.scale 19 | 20 | def sample(self, num_samples): 21 | if type(num_samples) == int: 22 | return torch.randn(num_samples, *self.shape, device=self.buffer.device, dtype=self.buffer.dtype) 23 | else: 24 | return torch.randn(num_samples, device=self.buffer.device, dtype=self.buffer.dtype) 25 | 26 | class DiagonalNormal(Distribution): 27 | """A multivariate Normal with diagonal covariance.""" 28 | 29 | def __init__(self, shape): 30 | super(DiagonalNormal, self).__init__() 31 | self.shape = torch.Size(shape) 32 | self.loc = nn.Parameter(torch.zeros(shape)) 33 | self.log_scale = nn.Parameter(torch.zeros(shape)) 34 | self.scale = 1. 35 | 36 | def log_prob(self, x): 37 | log_base = - 0.5 * math.log(2 * math.pi) - self.log_scale 38 | log_inner = - 0.5 * torch.exp(-2 * self.log_scale) * ((x - self.loc) ** 2) 39 | return sum_except_batch(log_base+log_inner) / self.scale 40 | 41 | def sample(self, num_samples): 42 | eps = torch.randn(num_samples, *self.shape, device=self.loc.device, dtype=self.loc.dtype) 43 | return self.loc + self.log_scale.exp() * eps 44 | 45 | 46 | class ConvNormal2d(DiagonalNormal): 47 | def __init__(self, shape, scale): 48 | super(DiagonalNormal, self).__init__() 49 | assert len(shape) == 3 50 | self.shape = torch.Size(shape) 51 | self.loc = torch.nn.Parameter(torch.zeros(1, shape[0], 1, 1)) 52 | self.log_scale = torch.nn.Parameter(torch.zeros(1, shape[0], 1, 1)) 53 | self.scale = scale -------------------------------------------------------------------------------- /denseflow/transforms/bijections/autoregressive/autoregressive.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections.abc import Iterable 3 | from denseflow.transforms.bijections import Bijection 4 | 5 | 6 | class AutoregressiveBijection(Bijection): 7 | """Transforms each input variable with an invertible elementwise bijection. 8 | 9 | The parameters of each invertible elementwise bijection can be functions of previous input 10 | variables, but they must not depend on the current or any following input variables. 11 | 12 | NOTE: Calculating the inverse transform is D times slower than calculating the 13 | forward transform, where D is the dimensionality of the input to the transform. 14 | 15 | Args: 16 | autoregressive_net: nn.Module, an autoregressive network such that 17 | elementwise_params = autoregressive_net(x) 18 | autoregressive_order: str or Iterable, the order in which to sample. 19 | One of `{'ltr'}` 20 | """ 21 | def __init__(self, autoregressive_net, autoregressive_order='ltr'): 22 | super(AutoregressiveBijection, self).__init__() 23 | assert isinstance(autoregressive_order, str) or isinstance(autoregressive_order, Iterable) 24 | assert autoregressive_order in {'ltr'} 25 | self.autoregressive_net = autoregressive_net 26 | self.autoregressive_order = autoregressive_order 27 | 28 | def forward(self, x): 29 | elementwise_params = self.autoregressive_net(x) 30 | z, ldj = self._elementwise_forward(x, elementwise_params) 31 | return z, ldj 32 | 33 | def inverse(self, z): 34 | with torch.no_grad(): 35 | if self.autoregressive_order == 'ltr': return self._inverse_ltr(z) 36 | 37 | def _inverse_ltr(self, z): 38 | x = torch.zeros_like(z) 39 | for d in range(x.shape[1]): 40 | elementwise_params = self.autoregressive_net(x) 41 | x[:,d] = self._elementwise_inverse(z[:,d], elementwise_params[:,d]) 42 | return x 43 | 44 | def _output_dim_multiplier(self): 45 | raise NotImplementedError() 46 | 47 | def _elementwise_forward(self, x, elementwise_params): 48 | raise NotImplementedError() 49 | 50 | def _elementwise_inverse(self, z, elementwise_params): 51 | raise NotImplementedError() 52 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/src/ms_deform_attn.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #pragma once 17 | 18 | #include "cpu/ms_deform_attn_cpu.h" 19 | 20 | #ifdef WITH_CUDA 21 | #include "cuda/ms_deform_attn_cuda.h" 22 | #endif 23 | 24 | 25 | at::Tensor 26 | ms_deform_attn_forward( 27 | const at::Tensor &value, 28 | const at::Tensor &spatial_shapes, 29 | const at::Tensor &level_start_index, 30 | const at::Tensor &sampling_loc, 31 | const at::Tensor &attn_weight, 32 | const int im2col_step) 33 | { 34 | if (value.type().is_cuda()) 35 | { 36 | #ifdef WITH_CUDA 37 | return ms_deform_attn_cuda_forward( 38 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); 39 | #else 40 | AT_ERROR("Not compiled with GPU support"); 41 | #endif 42 | } 43 | AT_ERROR("Not implemented on the CPU"); 44 | } 45 | 46 | std::vector 47 | ms_deform_attn_backward( 48 | const at::Tensor &value, 49 | const at::Tensor &spatial_shapes, 50 | const at::Tensor &level_start_index, 51 | const at::Tensor &sampling_loc, 52 | const at::Tensor &attn_weight, 53 | const at::Tensor &grad_output, 54 | const int im2col_step) 55 | { 56 | if (value.type().is_cuda()) 57 | { 58 | #ifdef WITH_CUDA 59 | return ms_deform_attn_cuda_backward( 60 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); 61 | #else 62 | AT_ERROR("Not compiled with GPU support"); 63 | #endif 64 | } 65 | AT_ERROR("Not implemented on the CPU"); 66 | } 67 | 68 | -------------------------------------------------------------------------------- /denseflow/nn/nets/matching/multiscale_densenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from denseflow.nn.nets.matching import DenseNet 4 | 5 | 6 | class MultiscaleDenseNet(nn.Module): 7 | def __init__(self, in_channels, out_channels, num_scales, 8 | num_blocks, mid_channels, depth, growth, dropout, 9 | gated_conv=False, zero_init=False): 10 | super(MultiscaleDenseNet, self).__init__() 11 | assert num_scales > 1 12 | self.num_scales = num_scales 13 | 14 | def get_densenet(cin, cout, zinit=False): 15 | return DenseNet(in_channels=cin, 16 | out_channels=cout, 17 | num_blocks=num_blocks, 18 | mid_channels=mid_channels, 19 | depth=depth, 20 | growth=growth, 21 | dropout=dropout, 22 | gated_conv=gated_conv, 23 | zero_init=zinit) 24 | 25 | # Down in 26 | self.down_in = get_densenet(in_channels, mid_channels) 27 | 28 | # Down 29 | down = [] 30 | for i in range(num_scales - 1): 31 | down.append(nn.Sequential(nn.Conv2d(mid_channels, mid_channels, kernel_size=2, padding=0, stride=2), 32 | get_densenet(mid_channels, mid_channels))) 33 | self.down = nn.ModuleList(down) 34 | 35 | # Up 36 | up = [] 37 | for i in range(num_scales - 1): 38 | up.append(nn.Sequential(get_densenet(mid_channels, mid_channels), 39 | nn.ConvTranspose2d(mid_channels, mid_channels, kernel_size=2, padding=0, stride=2))) 40 | self.up = nn.ModuleList(up) 41 | 42 | # Up out 43 | self.up_out = get_densenet(mid_channels, out_channels, zinit=zero_init) 44 | 45 | def forward(self, x): 46 | 47 | # Down in 48 | d = [self.down_in(x)] 49 | 50 | # Down 51 | for down_layer in self.down: 52 | d.append(down_layer(d[-1])) 53 | 54 | # Up 55 | u = [d[-1]] 56 | for i, up_layer in enumerate(self.up): 57 | u.append(up_layer(u[-1]) + d[self.num_scales - 2 - i]) # 58 | 59 | # Up out 60 | return self.up_out(u[-1]) 61 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/permute.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections.abc import Iterable 3 | from denseflow.transforms.bijections import Bijection 4 | 5 | 6 | class Permute(Bijection): 7 | """ 8 | Permutes inputs on a given dimension using a given permutation. 9 | 10 | Args: 11 | permutation: Tensor or Iterable, shape (dim_size) 12 | dim: int, dimension to permute (excluding batch_dimension) 13 | """ 14 | 15 | def __init__(self, permutation, dim=1): 16 | super(Permute, self).__init__() 17 | assert isinstance(dim, int), 'dim must be an integer' 18 | assert dim >= 1, 'dim must be >= 1 (0 corresponds to batch dimension)' 19 | assert isinstance(permutation, torch.Tensor) or isinstance(permutation, Iterable), 'permutation must be a torch.Tensor or Iterable' 20 | if isinstance(permutation, torch.Tensor): 21 | assert permutation.ndimension() == 1, 'permutation must be a 1D tensor, but was of shape {}'.format(permutation.shape) 22 | else: 23 | permutation = torch.tensor(permutation) 24 | 25 | self.dim = dim 26 | self.register_buffer('permutation', permutation) 27 | 28 | @property 29 | def inverse_permutation(self): 30 | return torch.argsort(self.permutation) 31 | 32 | def forward(self, x): 33 | return torch.index_select(x, self.dim, self.permutation), torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 34 | 35 | def inverse(self, z): 36 | return torch.index_select(z, self.dim, self.inverse_permutation) 37 | 38 | 39 | class Shuffle(Permute): 40 | """ 41 | Permutes inputs on a given dimension using a random, but fixed, permutation. 42 | 43 | Args: 44 | dim_size: int, number of elements on dimension dim 45 | dim: int, dimension to permute (excluding batch_dimension) 46 | """ 47 | 48 | def __init__(self, dim_size, dim=1): 49 | super(Shuffle, self).__init__(torch.randperm(dim_size), dim) 50 | 51 | 52 | class Reverse(Permute): 53 | """ 54 | Reverses inputs on a given dimension. 55 | 56 | Args: 57 | dim_size: int, number of elements on dimension dim 58 | dim: int, dimension to permute (excluding batch_dimension) 59 | """ 60 | 61 | def __init__(self, dim_size, dim=1): 62 | super(Reverse, self).__init__(torch.arange(dim_size - 1, -1, -1), dim) 63 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/dequantization_variational.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.distributions import ConditionalDistribution 3 | from denseflow.transforms.surjections import Surjection 4 | 5 | 6 | class VariationalDequantization(Surjection): 7 | ''' 8 | A variational dequantization layer. 9 | This is useful for converting discrete variables to continuous [1, 2]. 10 | 11 | Forward: 12 | `z = (x+u)/K, u~encoder(x)` 13 | where `x` is discrete, `x \in {0,1,2,...,K-1}^D` 14 | and `encoder` is a conditional distribution. 15 | Inverse: 16 | `x = Quantize(z, K)` 17 | 18 | Args: 19 | encoder: ConditionalDistribution, a conditional distribution/flow which 20 | outputs samples in `[0,1]^D` conditioned on `x`. 21 | num_bits: int, number of bits in quantization, 22 | i.e. 8 for `x \in {0,1,2,...,255}^D` 23 | or 5 for `x \in {0,1,2,...,31}^D`. 24 | 25 | References: 26 | [1] RNADE: The real-valued neural autoregressive density-estimator, 27 | Uria et al., 2013, https://arxiv.org/abs/1306.0186 28 | [2] Flow++: Improving Flow-Based Generative Models with Variational Dequantization and Architecture Design, 29 | Ho et al., 2019, https://arxiv.org/abs/1902.00275 30 | ''' 31 | 32 | stochastic_forward = True 33 | 34 | def __init__(self, encoder, num_bits=8, coef=1.): 35 | super(VariationalDequantization, self).__init__() 36 | assert isinstance(encoder, ConditionalDistribution) 37 | self.num_bits = num_bits 38 | self.quantization_bins = 2**num_bits 39 | self.register_buffer('ldj_per_dim', -torch.log(torch.tensor(self.quantization_bins, dtype=torch.float))) 40 | self.encoder = encoder 41 | self.coef = coef 42 | 43 | def _ldj(self, shape): 44 | batch_size = shape[0] 45 | num_dims = shape[1:].numel() 46 | ldj = self.ldj_per_dim * num_dims * self.coef 47 | return ldj.repeat(batch_size) 48 | 49 | def forward(self, x): 50 | u, qu = self.encoder.sample_with_log_prob(context=x) 51 | z = (x.type(u.dtype) + u) / self.quantization_bins 52 | ldj = self._ldj(z.shape) - qu 53 | return z, ldj 54 | 55 | def inverse(self, z): 56 | z = self.quantization_bins * z 57 | return z.floor().clamp(min=0, max=self.quantization_bins-1).long() 58 | -------------------------------------------------------------------------------- /denseflow/distributions/normal.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from denseflow.distributions import Distribution 6 | from denseflow.utils import sum_except_batch 7 | 8 | 9 | class StandardNormal(Distribution): 10 | """A multivariate Normal with zero mean and unit covariance.""" 11 | 12 | def __init__(self, shape): 13 | super(StandardNormal, self).__init__() 14 | self.shape = torch.Size(shape) 15 | self.register_buffer('buffer', torch.zeros(1)) 16 | 17 | def log_prob(self, x): 18 | log_base = - 0.5 * math.log(2 * math.pi) 19 | log_inner = - 0.5 * x**2 20 | return sum_except_batch(log_base+log_inner) 21 | 22 | def sample(self, num_samples): 23 | if type(num_samples) == int: 24 | return torch.randn(num_samples, *self.shape, device=self.buffer.device, dtype=self.buffer.dtype) 25 | else: 26 | N, _, H, W = num_samples 27 | shape = (N, self.shape[0], H, W) 28 | return torch.randn(shape, device=self.buffer.device, dtype=self.buffer.dtype) 29 | 30 | class DiagonalNormal(Distribution): 31 | """A multivariate Normal with diagonal covariance.""" 32 | 33 | def __init__(self, shape): 34 | super(DiagonalNormal, self).__init__() 35 | self.shape = torch.Size(shape) 36 | self.loc = nn.Parameter(torch.zeros(shape)) 37 | self.log_scale = nn.Parameter(torch.zeros(shape)) 38 | 39 | def log_prob(self, x): 40 | log_base = - 0.5 * math.log(2 * math.pi) - self.log_scale 41 | log_inner = - 0.5 * torch.exp(-2 * self.log_scale) * ((x - self.loc) ** 2) 42 | return sum_except_batch(log_base+log_inner) 43 | 44 | def sample(self, num_samples): 45 | if type(num_samples) == int: 46 | eps = torch.randn(num_samples, *self.shape, device=self.loc.device, dtype=self.loc.dtype) 47 | else: 48 | N, _, H, W = num_samples 49 | shape = (N, self.shape[0], H, W) 50 | eps = torch.randn(*shape, device=self.loc.device, dtype=self.loc.dtype) 51 | return self.loc + self.log_scale.exp() * eps 52 | 53 | 54 | class ConvNormal2d(DiagonalNormal): 55 | def __init__(self, shape): 56 | super(DiagonalNormal, self).__init__() 57 | assert len(shape) == 3 58 | self.shape = torch.Size(shape) 59 | self.loc = torch.nn.Parameter(torch.zeros(1, shape[0], 1, 1)) 60 | self.log_scale = torch.nn.Parameter(torch.zeros(1, shape[0], 1, 1)) 61 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/autoregressive/autoregressive_linear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.transforms.bijections.autoregressive import AutoregressiveBijection 4 | 5 | 6 | class AdditiveAutoregressiveBijection(AutoregressiveBijection): 7 | '''Additive autoregressive bijection.''' 8 | 9 | def _output_dim_multiplier(self): 10 | return 1 11 | 12 | def _elementwise_forward(self, x, elementwise_params): 13 | return x + elementwise_params, torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 14 | 15 | def _elementwise_inverse(self, z, elementwise_params): 16 | return z - elementwise_params 17 | 18 | 19 | class AffineAutoregressiveBijection(AutoregressiveBijection): 20 | ''' 21 | Affine autoregressive bijection. 22 | 23 | Args: 24 | autoregressive_net: nn.Module, an autoregressive network such that 25 | elementwise_params = autoregressive_net(x) 26 | autoregressive_order: str or Iterable, the order in which to sample. 27 | One of `{'ltr'}` 28 | scale_fn: callable, the transform to obtain the scale. 29 | ''' 30 | 31 | def __init__(self, autoregressive_net, autoregressive_order='ltr', scale_fn=lambda s: torch.exp(s)): 32 | super(AffineAutoregressiveBijection, self).__init__(autoregressive_net=autoregressive_net, autoregressive_order=autoregressive_order) 33 | assert callable(scale_fn) 34 | self.scale_fn = scale_fn 35 | 36 | def _output_dim_multiplier(self): 37 | return 2 38 | 39 | def _elementwise_forward(self, x, elementwise_params): 40 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 41 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 42 | scale = self.scale_fn(unconstrained_scale) 43 | z = scale * x + shift 44 | ldj = sum_except_batch(torch.log(scale)) 45 | return z, ldj 46 | 47 | def _elementwise_inverse(self, z, elementwise_params): 48 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 49 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 50 | scale = self.scale_fn(unconstrained_scale) 51 | x = (z - shift) / scale 52 | return x 53 | 54 | def _unconstrained_scale_and_shift(self, elementwise_params): 55 | unconstrained_scale = elementwise_params[..., 0] 56 | shift = elementwise_params[..., 1] 57 | return unconstrained_scale, shift 58 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/coupling/coupling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Bijection 3 | 4 | 5 | class CouplingBijection(Bijection): 6 | """Transforms each input variable with an invertible elementwise bijection. 7 | 8 | This input variables are split in two parts. The second part is transformed conditioned on the first part. 9 | The coupling network takes the first part as input and outputs trasnformations for the second part. 10 | 11 | Args: 12 | coupling_net: nn.Module, a coupling network such that for x = [x1,x2] 13 | elementwise_params = coupling_net(x1) 14 | split_dim: int, dimension to split the input (default=1). 15 | num_condition: int or None, number of parameters to condition on. 16 | If None, the first half is conditioned on: 17 | - For even inputs (1,2,3,4), (1,2) will be conditioned on. 18 | - For odd inputs (1,2,3,4,5), (1,2,3) will be conditioned on. 19 | """ 20 | 21 | def __init__(self, coupling_net, split_dim=1, num_condition=None): 22 | super(CouplingBijection, self).__init__() 23 | assert split_dim >= 1 24 | self.coupling_net = coupling_net 25 | self.split_dim = split_dim 26 | self.num_condition = num_condition 27 | 28 | def split_input(self, input): 29 | if self.num_condition: 30 | split_proportions = (self.num_condition, input.shape[self.split_dim] - self.num_condition) 31 | return torch.split(input, split_proportions, dim=self.split_dim) 32 | else: 33 | return torch.chunk(input, 2, dim=self.split_dim) 34 | 35 | def forward(self, x): 36 | id, x2 = self.split_input(x) 37 | elementwise_params = self.coupling_net(id) 38 | z2, ldj = self._elementwise_forward(x2, elementwise_params) 39 | z = torch.cat([id, z2], dim=self.split_dim) 40 | return z, ldj 41 | 42 | def inverse(self, z): 43 | with torch.no_grad(): 44 | id, z2 = self.split_input(z) 45 | elementwise_params = self.coupling_net(id) 46 | x2 = self._elementwise_inverse(z2, elementwise_params) 47 | x = torch.cat([id, x2], dim=self.split_dim) 48 | return x 49 | 50 | def _output_dim_multiplier(self): 51 | raise NotImplementedError() 52 | 53 | def _elementwise_forward(self, x, elementwise_params): 54 | raise NotImplementedError() 55 | 56 | def _elementwise_inverse(self, z, elementwise_params): 57 | raise NotImplementedError() 58 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/maxpool2d.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from denseflow.distributions import Distribution 4 | from denseflow.transforms.surjections import Surjection 5 | 6 | 7 | class SimpleMaxPoolSurjection2d(Surjection): 8 | ''' 9 | An max pooling layer. 10 | 11 | Args: 12 | decoder: Distribution, a distribution of shape (3*c, h//2, w//2) with non-negative elements. 13 | ''' 14 | 15 | stochastic_forward = False 16 | 17 | def __init__(self, decoder): 18 | super(SimpleMaxPoolSurjection2d, self).__init__() 19 | assert isinstance(decoder, Distribution) 20 | self.decoder = decoder 21 | 22 | def _squeeze(self, x): 23 | b,c,h,w = x.shape 24 | t = x.view(b, c, h // 2, 2, w // 2, 2) 25 | t = t.permute(0, 1, 2, 4, 3, 5).contiguous() 26 | xr = t.view(b, c, h // 2, w // 2, 4) 27 | return xr 28 | 29 | def _unsqueeze(self, xr): 30 | b,c,h,w,_ = xr.shape 31 | t = xr.view(b, c, h, w, 2, 2) 32 | t = t.permute(0, 1, 2, 4, 3, 5).contiguous() 33 | x = t.view(b, c, h * 2, w * 2) 34 | return x 35 | 36 | def _k_mask(self, k): 37 | idx_all = torch.arange(4).view(1,1,4).expand(k.shape+(4,)).to(k.device) 38 | mask=k.unsqueeze(-1).expand_as(idx_all)==idx_all 39 | return mask 40 | 41 | def _deconstruct_x(self, x): 42 | xs = self._squeeze(x) 43 | z, k = xs.max(-1) 44 | mask = self._k_mask(k) 45 | xr = xs[~mask].view(k.shape+(3,)) 46 | xds = z.unsqueeze(-1)-xr 47 | b,c,h,w,_ = xds.shape 48 | xd = xds.permute(0,1,4,2,3).reshape(b,3*c,h,w) # (B,C,H,W,3)->(B,3*C,H,W) 49 | return z, xd, k 50 | 51 | def _construct_x(self, z, xd, k): 52 | b,c,h,w = xd.shape 53 | xds = xd.reshape(b,c//3,3,h,w).permute(0,1,3,4,2) # (B,3*C,H,W)->(B,C,H,W,3) 54 | xr = z.unsqueeze(-1)-xds 55 | mask = self._k_mask(k) 56 | xs = z.new_zeros(z.shape+(4,)) 57 | xs.masked_scatter_(mask, z) 58 | xs.masked_scatter_(~mask, xr) 59 | x = self._unsqueeze(xs) 60 | return x 61 | 62 | def forward(self, x): 63 | z, xd, k = self._deconstruct_x(x) 64 | ldj_k = - math.log(4) * z.shape[1:].numel() 65 | ldj = self.decoder.log_prob(xd) + ldj_k 66 | return z, ldj 67 | 68 | def inverse(self, z): 69 | k = torch.randint(0, 4, z.shape, device=z.device) 70 | xd = self.decoder.sample(z.shape[0]) 71 | x = self._construct_x(z, xd, k) 72 | return x 73 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/autoregressive/autoregressive_linear_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.transforms.bijections.autoregressive import AutoregressiveBijection2d 4 | 5 | 6 | class AdditiveAutoregressiveBijection2d(AutoregressiveBijection2d): 7 | '''Additive autoregressive bijection.''' 8 | 9 | def _output_dim_multiplier(self): 10 | return 1 11 | 12 | def _elementwise_forward(self, x, elementwise_params): 13 | return x + elementwise_params, torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 14 | 15 | def _elementwise_inverse(self, z, elementwise_params): 16 | return z - elementwise_params 17 | 18 | 19 | class AffineAutoregressiveBijection2d(AutoregressiveBijection2d): 20 | ''' 21 | Affine autoregressive bijection. 22 | 23 | Args: 24 | autoregressive_net: nn.Module, an autoregressive network such that 25 | elementwise_params = autoregressive_net(x) 26 | autoregressive_order: str or Iterable, the order in which to sample. 27 | One of `{'raster_cwh', 'raster_wh'}` 28 | scale_fn: callable, the transform to obtain the scale. 29 | ''' 30 | 31 | def __init__(self, autoregressive_net, autoregressive_order='raster_cwh', scale_fn=lambda s: torch.exp(s)): 32 | super(AffineAutoregressiveBijection2d, self).__init__(autoregressive_net=autoregressive_net, autoregressive_order=autoregressive_order) 33 | assert callable(scale_fn) 34 | self.scale_fn = scale_fn 35 | 36 | def _output_dim_multiplier(self): 37 | return 2 38 | 39 | def _elementwise_forward(self, x, elementwise_params): 40 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 41 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 42 | scale = self.scale_fn(unconstrained_scale) 43 | z = scale * x + shift 44 | ldj = sum_except_batch(torch.log(scale)) 45 | return z, ldj 46 | 47 | def _elementwise_inverse(self, z, elementwise_params): 48 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 49 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 50 | scale = self.scale_fn(unconstrained_scale) 51 | x = (z - shift) / scale 52 | return x 53 | 54 | def _unconstrained_scale_and_shift(self, elementwise_params): 55 | unconstrained_scale = elementwise_params[..., 0] 56 | shift = elementwise_params[..., 1] 57 | return unconstrained_scale, shift 58 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, "Mask2Former") 3 | import tempfile 4 | from pathlib import Path 5 | import numpy as np 6 | import cv2 7 | import cog 8 | 9 | # import some common detectron2 utilities 10 | from detectron2.config import CfgNode as CN 11 | from detectron2.engine import DefaultPredictor 12 | from detectron2.config import get_cfg 13 | from detectron2.utils.visualizer import Visualizer, ColorMode 14 | from detectron2.data import MetadataCatalog 15 | from detectron2.projects.deeplab import add_deeplab_config 16 | 17 | # import Mask2Former project 18 | from mask2former import add_maskformer2_config 19 | 20 | 21 | class Predictor(cog.Predictor): 22 | def setup(self): 23 | cfg = get_cfg() 24 | add_deeplab_config(cfg) 25 | add_maskformer2_config(cfg) 26 | cfg.merge_from_file("Mask2Former/configs/coco/panoptic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs16_100ep.yaml") 27 | cfg.MODEL.WEIGHTS = 'model_final_f07440.pkl' 28 | cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True 29 | cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = True 30 | cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = True 31 | self.predictor = DefaultPredictor(cfg) 32 | self.coco_metadata = MetadataCatalog.get("coco_2017_val_panoptic") 33 | 34 | 35 | @cog.input( 36 | "image", 37 | type=Path, 38 | help="Input image for segmentation. Output will be the concatenation of Panoptic segmentation (top), " 39 | "instance segmentation (middle), and semantic segmentation (bottom).", 40 | ) 41 | def predict(self, image): 42 | im = cv2.imread(str(image)) 43 | outputs = self.predictor(im) 44 | v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW) 45 | panoptic_result = v.draw_panoptic_seg(outputs["panoptic_seg"][0].to("cpu"), 46 | outputs["panoptic_seg"][1]).get_image() 47 | v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW) 48 | instance_result = v.draw_instance_predictions(outputs["instances"].to("cpu")).get_image() 49 | v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW) 50 | semantic_result = v.draw_sem_seg(outputs["sem_seg"].argmax(0).to("cpu")).get_image() 51 | result = np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1] 52 | out_path = Path(tempfile.mkdtemp()) / "out.png" 53 | cv2.imwrite(str(out_path), result) 54 | return out_path 55 | -------------------------------------------------------------------------------- /denseflow/flows/cond_flow.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from collections.abc import Iterable 4 | from denseflow.utils import context_size 5 | from denseflow.distributions import Distribution, ConditionalDistribution 6 | from denseflow.transforms import Transform, ConditionalTransform 7 | 8 | 9 | class ConditionalFlow(ConditionalDistribution): 10 | """ 11 | Base class for ConditionalFlow. 12 | Flows use the forward transforms to transform data to noise. 13 | The inverse transforms can subsequently be used for sampling. 14 | These are typically useful as generative models of data. 15 | """ 16 | 17 | def __init__(self, base_dist, transforms, context_init=None): 18 | super(ConditionalFlow, self).__init__() 19 | assert isinstance(base_dist, Distribution) 20 | if isinstance(transforms, Transform): transforms = [transforms] 21 | assert isinstance(transforms, Iterable) 22 | assert all(isinstance(transform, Transform) for transform in transforms) 23 | self.base_dist = base_dist 24 | self.transforms = nn.ModuleList(transforms) 25 | self.context_init = context_init 26 | self.lower_bound = any(transform.lower_bound for transform in transforms) 27 | 28 | def log_prob(self, x, context): 29 | if self.context_init: context = self.context_init(context) 30 | log_prob = torch.zeros(x.shape[0], device=x.device) 31 | for transform in self.transforms: 32 | if isinstance(transform, ConditionalTransform): 33 | x, ldj = transform(x, context) 34 | else: 35 | x, ldj = transform(x) 36 | log_prob += ldj 37 | if isinstance(self.base_dist, ConditionalDistribution): 38 | log_prob += self.base_dist.log_prob(x, context) 39 | else: 40 | log_prob += self.base_dist.log_prob(x) 41 | return log_prob 42 | 43 | def sample(self, context): 44 | if self.context_init: context = self.context_init(context) 45 | if isinstance(self.base_dist, ConditionalDistribution): 46 | z = self.base_dist.sample(context) 47 | else: 48 | z = self.base_dist.sample(context_size(context)) 49 | for transform in reversed(self.transforms): 50 | if isinstance(transform, ConditionalTransform): 51 | z = transform.inverse(z, context) 52 | else: 53 | z = transform.inverse(z) 54 | return z 55 | 56 | def sample_with_log_prob(self, context): 57 | raise RuntimeError("ConditionalFlow does not support sample_with_log_prob, see ConditionalInverseFlow instead.") 58 | -------------------------------------------------------------------------------- /denseflow/flows/cond_inverse_flow.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from collections.abc import Iterable 4 | from denseflow.utils import context_size 5 | from denseflow.distributions import Distribution, ConditionalDistribution 6 | from denseflow.transforms import Transform, ConditionalTransform 7 | 8 | 9 | class ConditionalInverseFlow(ConditionalDistribution): 10 | """ 11 | Base class for ConditionalFlow. 12 | Inverse flows use the forward transforms to transform noise to samples. 13 | These are typically useful as variational distributions. 14 | Here, we are not interested in the log probability of novel samples. 15 | However, using .sample_with_log_prob(), samples can be obtained together 16 | with their log probability. 17 | """ 18 | 19 | def __init__(self, base_dist, transforms, context_init=None): 20 | super(ConditionalInverseFlow, self).__init__() 21 | assert isinstance(base_dist, Distribution) 22 | if isinstance(transforms, Transform): transforms = [transforms] 23 | assert isinstance(transforms, Iterable) 24 | assert all(isinstance(transform, Transform) for transform in transforms) 25 | self.base_dist = base_dist 26 | self.transforms = nn.ModuleList(transforms) 27 | self.context_init = context_init 28 | 29 | def log_prob(self, x, context): 30 | raise RuntimeError("ConditionalInverseFlow does not support log_prob, see ConditionalFlow instead.") 31 | 32 | def sample(self, context): 33 | if self.context_init: context = self.context_init(context) 34 | if isinstance(self.base_dist, ConditionalDistribution): 35 | z = self.base_dist.sample(context) 36 | else: 37 | z = self.base_dist.sample(context_size(context)) 38 | for transform in self.transforms: 39 | if isinstance(transform, ConditionalTransform): 40 | z, _ = transform(z, context) 41 | else: 42 | z, _ = transform(z) 43 | return z 44 | 45 | def sample_with_log_prob(self, context): 46 | if self.context_init: context = self.context_init(context) 47 | if isinstance(self.base_dist, ConditionalDistribution): 48 | z, log_prob = self.base_dist.sample_with_log_prob(context) 49 | else: 50 | z, log_prob = self.base_dist.sample_with_log_prob(context_size(context)) 51 | for transform in self.transforms: 52 | if isinstance(transform, ConditionalTransform): 53 | z, ldj = transform(z, context) 54 | else: 55 | z, ldj = transform(z) 56 | log_prob -= ldj 57 | return z, log_prob 58 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/autoregressive/autoregressive_linear_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.transforms.bijections.conditional.autoregressive import ConditionalAutoregressiveBijection2d 4 | 5 | 6 | class ConditionalAdditiveAutoregressiveBijection2d(ConditionalAutoregressiveBijection2d): 7 | '''Additive autoregressive bijection.''' 8 | 9 | def _output_dim_multiplier(self): 10 | return 1 11 | 12 | def _elementwise_forward(self, x, elementwise_params): 13 | return x + elementwise_params, torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 14 | 15 | def _elementwise_inverse(self, z, elementwise_params): 16 | return z - elementwise_params 17 | 18 | 19 | class ConditionalAffineAutoregressiveBijection2d(ConditionalAutoregressiveBijection2d): 20 | ''' 21 | Affine autoregressive bijection. 22 | 23 | Args: 24 | autoregressive_net: nn.Module, an autoregressive network such that 25 | elementwise_params = autoregressive_net(x) 26 | autoregressive_order: str or Iterable, the order in which to sample. 27 | One of `{'raster_cwh', 'raster_wh'}` 28 | scale_fn: callable, the transform to obtain the scale. 29 | ''' 30 | 31 | def __init__(self, autoregressive_net, autoregressive_order='raster_cwh', scale_fn=lambda s: torch.exp(s)): 32 | super(ConditionalAffineAutoregressiveBijection2d, self).__init__(autoregressive_net=autoregressive_net, autoregressive_order=autoregressive_order) 33 | assert callable(scale_fn) 34 | self.scale_fn = scale_fn 35 | 36 | def _output_dim_multiplier(self): 37 | return 2 38 | 39 | def _elementwise_forward(self, x, elementwise_params): 40 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 41 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 42 | scale = self.scale_fn(unconstrained_scale) 43 | z = scale * x + shift 44 | ldj = sum_except_batch(torch.log(scale)) 45 | return z, ldj 46 | 47 | def _elementwise_inverse(self, z, elementwise_params): 48 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 49 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 50 | scale = self.scale_fn(unconstrained_scale) 51 | x = (z - shift) / scale 52 | return x 53 | 54 | def _unconstrained_scale_and_shift(self, elementwise_params): 55 | unconstrained_scale = elementwise_params[..., 0] 56 | shift = elementwise_params[..., 1] 57 | return unconstrained_scale, shift 58 | -------------------------------------------------------------------------------- /denseflow/nn/blocks/resblock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class ResidualLayer(nn.Module): 5 | def __init__(self, in_channels, dropout): 6 | super(ResidualLayer, self).__init__() 7 | 8 | layers = [] 9 | 10 | layers.extend([ 11 | nn.Conv2d(in_channels, in_channels, kernel_size=3, 12 | stride=1, padding=1, bias=True), 13 | nn.ReLU(inplace=True), 14 | ]) 15 | 16 | if dropout > 0.: 17 | layers.append(nn.Dropout(p=dropout)) 18 | 19 | layers.extend([ 20 | nn.Conv2d(in_channels, in_channels, kernel_size=3, 21 | stride=1, padding=1, bias=True), 22 | nn.ReLU(inplace=True) 23 | ]) 24 | 25 | self.nn = nn.Sequential(*layers) 26 | 27 | def forward(self, x): 28 | h = self.nn(x) 29 | return h 30 | 31 | 32 | class GatedConv2d(nn.Module): 33 | def __init__(self, in_channels, out_channels, kernel_size, padding): 34 | super(GatedConv2d, self).__init__() 35 | self.in_channels = in_channels 36 | self.conv = nn.Conv2d(in_channels, out_channels * 3, 37 | kernel_size=kernel_size, padding=padding) 38 | 39 | def forward(self, x): 40 | h = self.conv(x) 41 | a, b, c = torch.chunk(h, chunks=3, dim=1) 42 | return a + b * torch.sigmoid(c) 43 | 44 | 45 | class Residual(nn.Sequential): 46 | def __init__(self, in_channels, out_channels, 47 | dropout=0.0, gated_conv=False, zero_init=False): 48 | 49 | layers = [ResidualLayer(in_channels, dropout)] 50 | 51 | if gated_conv: 52 | layers.append(GatedConv2d(in_channels, out_channels, kernel_size=1, padding=0)) 53 | else: 54 | layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0)) 55 | 56 | if zero_init: 57 | nn.init.zeros_(layers[-1].weight) 58 | if hasattr(layers[-1], 'bias'): 59 | nn.init.zeros_(layers[-1].bias) 60 | 61 | super(Residual, self).__init__(*layers) 62 | 63 | 64 | class ResidualBlock(nn.Module): 65 | def __init__(self, in_channels, out_channels, 66 | dropout=0.0, gated_conv=False, zero_init=False): 67 | super(ResidualBlock, self).__init__() 68 | 69 | self.rb = Residual(in_channels=in_channels, 70 | out_channels=out_channels, 71 | dropout=dropout, 72 | gated_conv=gated_conv, 73 | zero_init=zero_init) 74 | 75 | def forward(self, x): 76 | return x + self.rb(x) 77 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/linear_lowrank.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from denseflow.transforms.bijections import Bijection 6 | 7 | 8 | class LinearLowRank(Bijection): 9 | """ 10 | Linear bijection z = (D + UV)x, where D is diagonal, 11 | U.shape = (num_features, rank) and V.shape = (rank, num_features). 12 | 13 | Args: 14 | num_features: int, Number of features in the input and output. 15 | rank: int, the rank of the low-rank matrix. 16 | bias: bool, if True a bias is included (default=False). 17 | """ 18 | def __init__(self, num_features, rank, bias=False): 19 | super(LinearLowRank, self).__init__() 20 | assert rank >= 1 and rank <= num_features, 'rank should be 1 <= rank <= num_features, but got rank {}'.format(rank) 21 | self.num_features = num_features 22 | self.rank = rank 23 | self.d = nn.Parameter(torch.Tensor(num_features)) 24 | self.U = nn.Parameter(torch.Tensor(num_features, rank)) 25 | self.V = nn.Parameter(torch.Tensor(rank, num_features)) 26 | if bias: 27 | self.bias = nn.Parameter(torch.Tensor(num_features)) 28 | else: 29 | self.register_parameter('bias', None) 30 | self.reset_parameters() 31 | 32 | def reset_parameters(self): 33 | nn.init.uniform_(self.d, 1-0.001, 1+0.001) 34 | nn.init.uniform_(self.U, -0.001, 0.001) 35 | nn.init.uniform_(self.V, -0.001, 0.001) 36 | if self.bias is not None: 37 | nn.init.zeros_(self.bias) 38 | 39 | @property 40 | def K(self): 41 | '''K = I + VD^{-1}U''' 42 | I = torch.eye(self.rank, dtype=self.d.dtype, device=self.d.device) 43 | VDinvU = torch.einsum('vd,d,du->vu', self.V, 1/self.d, self.U) 44 | return I + VDinvU 45 | 46 | def forward(self, x): 47 | ''' 48 | z = Dx + UV^Tx 49 | ldj = sum(log(abs(D))) + log(abs(det(K))) 50 | ''' 51 | z = self.d * x + torch.einsum('dr,br->bd', self.U, torch.einsum('rd,bd->br', self.V, x)) 52 | if self.bias is not None: z = z + self.bias 53 | ldj = self.d.abs().log().sum() + torch.slogdet(self.K)[1] 54 | ldj = ldj.expand([x.shape[0]]) 55 | return z, ldj 56 | 57 | def inverse(self, z): 58 | '''x = D^{-1}z - D^{-1}UK^{-1}VD^{-1}z''' 59 | if self.bias is not None: z = z - self.bias 60 | VDiz = torch.einsum('rd,bd->br', self.V, z / self.d) 61 | KiVDiz = torch.solve(VDiz.t(), self.K)[0].t() 62 | UKiVDiz = torch.einsum('dr,br->bd', self.U, KiVDiz) 63 | x = (z - UKiVDiz) / self.d 64 | return x 65 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/coupling/coupling_linear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.transforms.bijections.coupling import CouplingBijection 4 | 5 | 6 | class AdditiveCouplingBijection(CouplingBijection): 7 | '''Additive coupling bijection.''' 8 | 9 | def _output_dim_multiplier(self): 10 | return 1 11 | 12 | def _elementwise_forward(self, x, elementwise_params): 13 | return x + elementwise_params, torch.zeros(x.shape[0], device=x.device, dtype=x.dtype) 14 | 15 | def _elementwise_inverse(self, z, elementwise_params): 16 | return z - elementwise_params 17 | 18 | 19 | class AffineCouplingBijection(CouplingBijection): 20 | ''' 21 | Affine coupling bijection. 22 | 23 | Args: 24 | coupling_net: nn.Module, a coupling network such that for x = [x1,x2] 25 | elementwise_params = coupling_net(x1) 26 | split_dim: int, dimension to split the input (default=1). 27 | num_condition: int or None, number of parameters to condition on. 28 | If None, the first half is conditioned on: 29 | - For even inputs (1,2,3,4), (1,2) will be conditioned on. 30 | - For odd inputs (1,2,3,4,5), (1,2,3) will be conditioned on. 31 | scale_fn: callable, the transform to obtain the scale. 32 | ''' 33 | 34 | def __init__(self, coupling_net, split_dim=1, num_condition=None, scale_fn=lambda s: torch.exp(s)): 35 | super(AffineCouplingBijection, self).__init__(coupling_net=coupling_net, split_dim=split_dim, num_condition=num_condition) 36 | assert callable(scale_fn) 37 | self.scale_fn = scale_fn 38 | 39 | def _output_dim_multiplier(self): 40 | return 2 41 | 42 | def _elementwise_forward(self, x, elementwise_params): 43 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 44 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 45 | scale = self.scale_fn(unconstrained_scale) 46 | z = scale * x + shift 47 | ldj = sum_except_batch(torch.log(scale)) 48 | return z, ldj 49 | 50 | def _elementwise_inverse(self, z, elementwise_params): 51 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 52 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 53 | scale = self.scale_fn(unconstrained_scale) 54 | x = (z - shift) / scale 55 | return x 56 | 57 | def _unconstrained_scale_and_shift(self, elementwise_params): 58 | unconstrained_scale = elementwise_params[..., 0] 59 | shift = elementwise_params[..., 1] 60 | return unconstrained_scale, shift 61 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/functional/mixtures/utils_logistic_censored.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from denseflow.transforms.bijections.functional.mixtures.utils_logistic import logistic_log_cdf, logistic_log_one_minus_cdf, logistic_log_pdf 4 | 5 | 6 | def cmol_cdf(x, means, log_scales, K): 7 | x = x.unsqueeze(-1) 8 | # For logistic_eval_x: 9 | # Matching the eval. locations of the logistic distribution from the PixelCNN++ code base: 10 | # Lower: From {0,1,...,255}/256 -> {0,1,...,255}/255 -> 2 * {0,1,...,255}/255 - 1 -> 2 * {0,1,...,255}/255 - 1 - 1/255 (min = -1-1/255) 11 | # Upper: From {1,2,...,256}/256 -> {1,2,...,256}/255 -> 2 * {1,2,...,256}/255 - 1 -> 2 * {1,2,...,256}/255 - 1 - 1/255 (max = 1+1/255) 12 | logistic_eval_x = K/(K-1) *(2 * x - 1) # From [0,1] -> [-1-1/255,1+1/255] 13 | logistic_eval_lower = - (1 + 1/(K-1)) * torch.ones(torch.Size(), dtype=x.dtype, device=x.device)[(None,)*x.dim()] 14 | logistic_eval_upper = (1 + 1/(K-1)) * torch.ones(torch.Size(), dtype=x.dtype, device=x.device)[(None,)*x.dim()] 15 | cdf_mid = logistic_log_cdf(logistic_eval_x, means, log_scales).exp() 16 | cdf_lower = (1-K*x) * logistic_log_cdf(logistic_eval_lower, means, log_scales).exp() * torch.lt(x, 1/K).type(x.dtype) 17 | cdf_upper = (K*x-(K-1)) * logistic_log_one_minus_cdf(logistic_eval_upper, means, log_scales).exp() * torch.gt(x, (K-1)/K).type(x.dtype) 18 | return cdf_mid + cdf_upper - cdf_lower 19 | 20 | 21 | def cmol_log_pdf(x, means, log_scales, K): 22 | x = x.unsqueeze(-1) 23 | # For logistic_eval_x: 24 | # Matching the eval. locations of the logistic distribution from the PixelCNN++ code base: 25 | # Lower: From {0,1,...,255}/256 -> {0,1,...,255}/255 -> 2 * {0,1,...,255}/255 - 1 -> 2 * {0,1,...,255}/255 - 1 - 1/255 (min = -1-1/255) 26 | # Upper: From {1,2,...,256}/256 -> {1,2,...,256}/255 -> 2 * {1,2,...,256}/255 - 1 -> 2 * {1,2,...,256}/255 - 1 - 1/255 (max = 1+1/255) 27 | logistic_eval_x = K/(K-1) *(2 * x - 1) # From [0,1] -> [-1-1/255,1+1/255] 28 | logistic_eval_lower = - (1 + 1/(K-1)) * torch.ones(torch.Size(), dtype=x.dtype, device=x.device)[(None,)*x.dim()] 29 | logistic_eval_upper = (1 + 1/(K-1)) * torch.ones(torch.Size(), dtype=x.dtype, device=x.device)[(None,)*x.dim()] 30 | log_pdf_mid = math.log(2) + math.log(K) - math.log(K-1) + logistic_log_pdf(logistic_eval_x, means, log_scales) 31 | log_pdf_lower = math.log(K) + logistic_log_cdf(logistic_eval_lower, means, log_scales) 32 | log_pdf_upper = math.log(K) + logistic_log_one_minus_cdf(logistic_eval_upper, means, log_scales) 33 | log_pdf_lower.masked_fill_(~torch.lt(x, 1/K), value=-float('inf')) 34 | log_pdf_upper.masked_fill_(~torch.gt(x, (K-1)/K), value=-float('inf')) 35 | log_pdf_stack = torch.stack([log_pdf_lower, log_pdf_mid, log_pdf_upper], dim=-1) 36 | return torch.logsumexp(log_pdf_stack, dim=-1) 37 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/autoregressive/autoregressive_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections import Bijection 3 | 4 | 5 | class AutoregressiveBijection2d(Bijection): 6 | """Transforms each input variable with an invertible elementwise bijection. 7 | 8 | The parameters of each invertible elementwise bijection can be functions of previous input 9 | variables, but they must not depend on the current or any following input variables. 10 | 11 | NOTE: Calculating the inverse transform is D times slower than calculating the 12 | forward transform, where D is the dimensionality of the input to the transform. 13 | 14 | Args: 15 | autoregressive_net: nn.Module, an autoregressive network such that 16 | elementwise_params = autoregressive_net(x) 17 | autoregressive_order: str or Iterable, the order in which to sample. 18 | One of `{'raster_cwh', 'raster_wh'}` 19 | """ 20 | def __init__(self, autoregressive_net, autoregressive_order='raster_cwh'): 21 | super(AutoregressiveBijection2d, self).__init__() 22 | assert isinstance(autoregressive_order, str) or isinstance(autoregressive_order, Iterable) 23 | assert autoregressive_order in {'raster_cwh', 'raster_wh'} 24 | self.autoregressive_net = autoregressive_net 25 | self.autoregressive_order = autoregressive_order 26 | 27 | def forward(self, x): 28 | elementwise_params = self.autoregressive_net(x) 29 | z, ldj = self._elementwise_forward(x, elementwise_params) 30 | return z, ldj 31 | 32 | def inverse(self, z): 33 | with torch.no_grad(): 34 | if self.autoregressive_order == 'raster_cwh': return self._inverse_raster_cwh(z) 35 | if self.autoregressive_order == 'raster_wh': return self._inverse_raster_wh(z) 36 | 37 | def _inverse_raster_cwh(self, z): 38 | x = torch.zeros_like(z) 39 | for h in range(x.shape[2]): 40 | for w in range(x.shape[3]): 41 | for c in range(x.shape[1]): 42 | elementwise_params = self.autoregressive_net(x) 43 | x[:,c,h,w] = self._elementwise_inverse(z[:,c,h,w], elementwise_params[:,c,h,w]) 44 | return x 45 | 46 | def _inverse_raster_wh(self, z): 47 | x = torch.zeros_like(z) 48 | for h in range(x.shape[2]): 49 | for w in range(x.shape[3]): 50 | elementwise_params = self.autoregressive_net(x) 51 | x[:,:,h,w] = self._elementwise_inverse(z[:,:,h,w], elementwise_params[:,:,h,w]) 52 | return x 53 | 54 | def _output_dim_multiplier(self): 55 | raise NotImplementedError() 56 | 57 | def _elementwise_forward(self, x, elementwise_params): 58 | raise NotImplementedError() 59 | 60 | def _elementwise_inverse(self, z, elementwise_params): 61 | raise NotImplementedError() 62 | -------------------------------------------------------------------------------- /mask2former/modeling/transformer_decoder/position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py 3 | """ 4 | Various positional encodings for the transformer. 5 | """ 6 | import math 7 | 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class PositionEmbeddingSine(nn.Module): 13 | """ 14 | This is a more standard version of the position embedding, very similar to the one 15 | used by the Attention is all you need paper, generalized to work on images. 16 | """ 17 | 18 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): 19 | super().__init__() 20 | self.num_pos_feats = num_pos_feats 21 | self.temperature = temperature 22 | self.normalize = normalize 23 | if scale is not None and normalize is False: 24 | raise ValueError("normalize should be True if scale is passed") 25 | if scale is None: 26 | scale = 2 * math.pi 27 | self.scale = scale 28 | 29 | def forward(self, x, mask=None): 30 | if mask is None: 31 | mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) 32 | not_mask = ~mask 33 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 34 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 35 | if self.normalize: 36 | eps = 1e-6 37 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 38 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 39 | 40 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 41 | # dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 42 | dim_t = self.temperature ** (2 * torch.div(dim_t, 2, rounding_mode='trunc') / self.num_pos_feats) 43 | 44 | pos_x = x_embed[:, :, :, None] / dim_t 45 | pos_y = y_embed[:, :, :, None] / dim_t 46 | pos_x = torch.stack( 47 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 48 | ).flatten(3) 49 | pos_y = torch.stack( 50 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 51 | ).flatten(3) 52 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 53 | return pos 54 | 55 | def __repr__(self, _repr_indent=4): 56 | head = "Positional encoding " + self.__class__.__name__ 57 | body = [ 58 | "num_pos_feats: {}".format(self.num_pos_feats), 59 | "temperature: {}".format(self.temperature), 60 | "normalize: {}".format(self.normalize), 61 | "scale: {}".format(self.scale), 62 | ] 63 | # _repr_indent = 4 64 | lines = [head] + [" " * _repr_indent + line for line in body] 65 | return "\n".join(lines) 66 | -------------------------------------------------------------------------------- /denseflow/transforms/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from collections.abc import Iterable 4 | 5 | 6 | class Transform(nn.Module): 7 | """Base class for Transform""" 8 | 9 | has_inverse = True 10 | 11 | @property 12 | def bijective(self): 13 | raise NotImplementedError() 14 | 15 | @property 16 | def stochastic_forward(self): 17 | raise NotImplementedError() 18 | 19 | @property 20 | def stochastic_inverse(self): 21 | raise NotImplementedError() 22 | 23 | @property 24 | def lower_bound(self): 25 | return self.stochastic_forward 26 | 27 | def forward(self, x): 28 | """ 29 | Forward transform. 30 | Computes `z <- x` and the log-likelihood contribution term `log C` 31 | such that `log p(x) = log p(z) + log C`. 32 | 33 | Args: 34 | x: Tensor, shape (batch_size, ...) 35 | 36 | Returns: 37 | z: Tensor, shape (batch_size, ...) 38 | ldj: Tensor, shape (batch_size,) 39 | """ 40 | raise NotImplementedError() 41 | 42 | def inverse(self, z): 43 | """ 44 | Inverse transform. 45 | Computes `x <- z`. 46 | 47 | Args: 48 | z: Tensor, shape (batch_size, ...) 49 | 50 | Returns: 51 | x: Tensor, shape (batch_size, ...) 52 | """ 53 | raise NotImplementedError() 54 | 55 | 56 | class SequentialTransform(Transform): 57 | """ 58 | Chains multiple Transform objects sequentially. 59 | 60 | Args: 61 | transforms: Transform or iterable with each element being a Transform object 62 | """ 63 | 64 | def __init__(self, transforms): 65 | super(SequentialTransform, self).__init__() 66 | if isinstance(transforms, Transform): transforms = [transforms] 67 | assert isinstance(transforms, Iterable) 68 | assert all(isinstance(transform, Transform) for transform in transforms) 69 | self.has_inverse = all(transform.has_inverse for transform in transforms) 70 | self.transforms = nn.ModuleList(transforms) 71 | 72 | @property 73 | def bijective(self): 74 | return all(transform.bijective for transform in self.transforms) 75 | 76 | @property 77 | def stochastic_forward(self): 78 | return any(transform.stochastic_forward for transform in self.transforms) 79 | 80 | @property 81 | def stochastic_inverse(self): 82 | return any(transform.stochastic_inverse for transform in self.transforms) 83 | 84 | def forward(self, x): 85 | batch_size = x.shape[0] 86 | x, ldj = self.transforms[0].forward(x) 87 | for transform in self.transforms[1:]: 88 | x, l = transform.forward(x) 89 | ldj += l 90 | return x, ldj 91 | 92 | def inverse(self, z): 93 | for transform in reversed(self.transforms): 94 | z = transform.inverse(z) 95 | return z 96 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/coupling/coupling_linear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.utils import sum_except_batch 3 | from denseflow.transforms.bijections.conditional.coupling import ConditionalCouplingBijection 4 | 5 | 6 | class ConditionalAdditiveCouplingBijection(ConditionalCouplingBijection): 7 | '''Conditional additive coupling bijection.''' 8 | 9 | def _output_dim_multiplier(self): 10 | return 1 11 | 12 | def _elementwise_forward(self, x, elementwise_params): 13 | return x + elementwise_params, torch.zeros(x.shape[0], device=x.device) 14 | 15 | def _elementwise_inverse(self, z, elementwise_params): 16 | return z - elementwise_params 17 | 18 | 19 | class ConditionalAffineCouplingBijection(ConditionalCouplingBijection): 20 | ''' 21 | Conditional affine coupling bijection. 22 | 23 | Args: 24 | coupling_net: nn.Module, a coupling network such that for x = [x1,x2] 25 | elementwise_params = coupling_net([x1,context]) 26 | context_net: nn.Module or None, a network to process the context. 27 | split_dim: int, dimension to split the input (default=1). 28 | num_condition: int or None, number of parameters to condition on. 29 | If None, the first half is conditioned on: 30 | - For even inputs (1,2,3,4), (1,2) will be conditioned on. 31 | - For odd inputs (1,2,3,4,5), (1,2,3) will be conditioned on. 32 | scale_fn: callable, the transform to obtain the scale. 33 | ''' 34 | 35 | def __init__(self, coupling_net, context_net=None, split_dim=1, num_condition=None, scale_fn=lambda s: torch.exp(s)): 36 | super(ConditionalAffineCouplingBijection, self).__init__(coupling_net=coupling_net, context_net=context_net, split_dim=split_dim, num_condition=num_condition) 37 | assert callable(scale_fn) 38 | self.scale_fn = scale_fn 39 | 40 | def _output_dim_multiplier(self): 41 | return 2 42 | 43 | def _elementwise_forward(self, x, elementwise_params): 44 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 45 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 46 | scale = self.scale_fn(unconstrained_scale) 47 | z = scale * x + shift 48 | ldj = sum_except_batch(torch.log(scale)) 49 | return z, ldj 50 | 51 | def _elementwise_inverse(self, z, elementwise_params): 52 | assert elementwise_params.shape[-1] == self._output_dim_multiplier() 53 | unconstrained_scale, shift = self._unconstrained_scale_and_shift(elementwise_params) 54 | scale = self.scale_fn(unconstrained_scale) 55 | x = (z - shift) / scale 56 | return x 57 | 58 | def _unconstrained_scale_and_shift(self, elementwise_params): 59 | unconstrained_scale = elementwise_params[..., 0] 60 | shift = elementwise_params[..., 1] 61 | return unconstrained_scale, shift 62 | -------------------------------------------------------------------------------- /denseflow/transforms/surjections/sort.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.surjections import Surjection 3 | 4 | 5 | class SimpleSortSurjection(Surjection): 6 | ''' 7 | A sorting layer. Sorts along `dim` for element extracted using `lambd`. 8 | The inverse is a random permutation. 9 | 10 | Args: 11 | dim: int, the dimension along which the tensor is sorted. 12 | lambd: callable, a callable which extracts a subset of x which is used to determine the sorting order. 13 | 14 | Example for (1,4) tensor using (dim=1, lambd=lambda x: x): 15 | # Input x, output z 16 | tensor([[0.6268, 0.0913, 0.8587, 0.2548]]) 17 | tensor([[0.0913, 0.2548, 0.6268, 0.8587]]) 18 | 19 | Example for (1,4,2) tensor using (dim=1, lambd=lambda x: x[:,:,0]): 20 | # Input x, output z 21 | tensor([[[0.6601, 0.0948], 22 | [0.9293, 0.1715], 23 | [0.5511, 0.7153], 24 | [0.3567, 0.7232]]]) 25 | tensor([[[0.3567, 0.7232], 26 | [0.5511, 0.7153], 27 | [0.6601, 0.0948], 28 | [0.9293, 0.1715]]]) 29 | 30 | ''' 31 | stochastic_forward = False 32 | 33 | def __init__(self, dim=1, lambd=lambda x: x): 34 | super(SimpleSortSurjection, self).__init__() 35 | self.register_buffer('buffer', torch.zeros(1)) 36 | self.dim = dim 37 | self.lambd = lambd 38 | 39 | def forward(self, x): 40 | x_order = self.lambd(x) 41 | assert x_order.dim() == 2, 'lambd should return a tensor of shape (batch_size, dim_size) = ({}, {}), not {}'.format(x.shape[0], x.shape[self.dim], x_order.shape) 42 | assert x_order.shape[0] == x.shape[0], 'lambd should return a tensor of shape (batch_size, dim_size) = ({}, {}), not {}'.format(x.shape[0], x.shape[self.dim], x_order.shape) 43 | assert x_order.shape[1] == x.shape[self.dim], 'lambd should return a tensor of shape (batch_size, dim_size) = ({}, {}), not {}'.format(x.shape[0], x.shape[self.dim], x_order.shape) 44 | permutation = torch.argsort(x_order, dim=1) 45 | for d in range(1, self.dim): 46 | permutation = permutation.unsqueeze(1) 47 | for d in range(self.dim+1, x.dim()): 48 | permutation = permutation.unsqueeze(-1) 49 | permutation = permutation.expand_as(x) 50 | z = torch.gather(x, self.dim, permutation) 51 | ldj = - self.buffer.new_ones(x.shape[0]) * torch.arange(1, 1+x.shape[self.dim]).type(self.buffer.dtype).log().sum() 52 | return z, ldj 53 | 54 | def inverse(self, z): 55 | rand = torch.rand(z.shape[0], z.shape[self.dim], device=z.device) 56 | permutation = rand.argsort(dim=1) 57 | for d in range(1, self.dim): 58 | permutation = permutation.unsqueeze(1) 59 | for d in range(self.dim+1, z.dim()): 60 | permutation = permutation.unsqueeze(-1) 61 | permutation = permutation.expand_as(z) 62 | x = torch.gather(z, self.dim, permutation) 63 | return x 64 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/autoregressive/autoregressive_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections.conditional import ConditionalBijection 3 | 4 | 5 | class ConditionalAutoregressiveBijection2d(ConditionalBijection): 6 | """Transforms each input variable with an invertible elementwise bijection. 7 | 8 | The parameters of each invertible elementwise bijection can be functions of previous input 9 | variables, but they must not depend on the current or any following input variables. 10 | 11 | NOTE: Calculating the inverse transform is D times slower than calculating the 12 | forward transform, where D is the dimensionality of the input to the transform. 13 | 14 | Args: 15 | autoregressive_net: nn.Module, an autoregressive network such that 16 | elementwise_params = autoregressive_net(x) 17 | autoregressive_order: str or Iterable, the order in which to sample. 18 | One of `{'raster_cwh', 'raster_wh'}` 19 | """ 20 | def __init__(self, autoregressive_net, autoregressive_order='raster_cwh'): 21 | super(ConditionalAutoregressiveBijection2d, self).__init__() 22 | assert isinstance(autoregressive_order, str) or isinstance(autoregressive_order, Iterable) 23 | assert autoregressive_order in {'raster_cwh', 'raster_wh'} 24 | self.autoregressive_net = autoregressive_net 25 | self.autoregressive_order = autoregressive_order 26 | 27 | def forward(self, x, context): 28 | elementwise_params = self.autoregressive_net(x, context=context) 29 | z, ldj = self._elementwise_forward(x, elementwise_params) 30 | return z, ldj 31 | 32 | def inverse(self, z, context): 33 | with torch.no_grad(): 34 | if self.autoregressive_order == 'raster_cwh': return self._inverse_raster_cwh(z, context=context) 35 | if self.autoregressive_order == 'raster_wh': return self._inverse_raster_wh(z, context=context) 36 | 37 | def _inverse_raster_cwh(self, z, context): 38 | x = torch.zeros_like(z) 39 | for h in range(x.shape[2]): 40 | for w in range(x.shape[3]): 41 | for c in range(x.shape[1]): 42 | elementwise_params = self.autoregressive_net(x, context=context) 43 | x[:,c,h,w] = self._elementwise_inverse(z[:,c,h,w], elementwise_params[:,c,h,w]) 44 | return x 45 | 46 | def _inverse_raster_wh(self, z, context): 47 | x = torch.zeros_like(z) 48 | for h in range(x.shape[2]): 49 | for w in range(x.shape[3]): 50 | elementwise_params = self.autoregressive_net(x, context=context) 51 | x[:,:,h,w] = self._elementwise_inverse(z[:,:,h,w], elementwise_params[:,:,h,w]) 52 | return x 53 | 54 | def _output_dim_multiplier(self): 55 | raise NotImplementedError() 56 | 57 | def _elementwise_forward(self, x, elementwise_params): 58 | raise NotImplementedError() 59 | 60 | def _elementwise_inverse(self, z, elementwise_params): 61 | raise NotImplementedError() 62 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | This directory contains few tools for MaskFormer. 2 | 3 | * `convert-torchvision-to-d2.py` 4 | 5 | Tool to convert torchvision pre-trained weights for D2. 6 | 7 | ``` 8 | wget https://download.pytorch.org/models/resnet101-63fe2227.pth 9 | python tools/convert-torchvision-to-d2.py resnet101-63fe2227.pth R-101.pkl 10 | ``` 11 | 12 | * `convert-pretrained-swin-model-to-d2.py` 13 | 14 | Tool to convert Swin Transformer pre-trained weights for D2. 15 | 16 | ``` 17 | pip install timm 18 | 19 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth 20 | python tools/convert-pretrained-swin-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl 21 | 22 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth 23 | python tools/convert-pretrained-swin-model-to-d2.py swin_small_patch4_window7_224.pth swin_small_patch4_window7_224.pkl 24 | 25 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth 26 | python tools/convert-pretrained-swin-model-to-d2.py swin_base_patch4_window12_384_22k.pth swin_base_patch4_window12_384_22k.pkl 27 | 28 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth 29 | python tools/convert-pretrained-swin-model-to-d2.py swin_large_patch4_window12_384_22k.pth swin_large_patch4_window12_384_22k.pkl 30 | ``` 31 | 32 | * `evaluate_pq_for_semantic_segmentation.py` 33 | 34 | Tool to evaluate PQ (PQ-stuff) for semantic segmentation predictions. 35 | 36 | Usage: 37 | 38 | ``` 39 | python tools/evaluate_pq_for_semantic_segmentation.py --dataset-name ade20k_sem_seg_val --json-file OUTPUT_DIR/inference/sem_seg_predictions.json 40 | ``` 41 | 42 | where `OUTPUT_DIR` is set in the config file. 43 | 44 | * `evaluate_coco_boundary_ap.py` 45 | 46 | Tool to evaluate Boundary AP for instance segmentation predictions. 47 | 48 | Usage: 49 | 50 | ``` 51 | python tools/coco_instance_evaluation.py --gt-json-file COCO_GT_JSON --dt-json-file COCO_DT_JSON 52 | ``` 53 | 54 | To install Boundary IoU API, run: 55 | 56 | ``` 57 | pip install git+https://github.com/bowenc0221/boundary-iou-api.git 58 | ``` 59 | 60 | * `analyze_model.py` 61 | 62 | Tool to analyze model parameters and flops. 63 | 64 | Usage for semantic segmentation (ADE20K only, use with caution!): 65 | 66 | ``` 67 | python tools/analyze_model.py --num-inputs 1 --tasks flop --use-fixed-input-size --config-file CONFIG_FILE 68 | ``` 69 | 70 | Note that, for semantic segmentation (ADE20K only), we use a dummy image with fixed size that equals to `cfg.INPUT.CROP.SIZE[0] x cfg.INPUT.CROP.SIZE[0]`. 71 | Please do not use `--use-fixed-input-size` for calculating FLOPs on other datasets like Cityscapes! 72 | 73 | Usage for panoptic and instance segmentation: 74 | 75 | ``` 76 | python tools/analyze_model.py --num-inputs 100 --tasks flop --config-file CONFIG_FILE 77 | ``` 78 | 79 | Note that, for panoptic and instance segmentation, we compute the average flops over 100 real validation images. 80 | -------------------------------------------------------------------------------- /denseflow/dense_flow.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from denseflow.flows import Flow 4 | from denseflow.transforms import UniformDequantization, VariationalDequantization, ScalarAffineBijection, Squeeze2d, Conv1x1, Slice, SimpleMaxPoolSurjection2d, ActNormBijection2d, WaveletSqueeze2d 5 | from denseflow.distributions import StandardNormal, ConvNormal2d 6 | from .flow_modules import InvertibleDenseBlock, InvertibleTransition 7 | from .dequantization import DequantizationFlow 8 | 9 | 10 | def parameter_count(module): 11 | trainable, non_trainable = 0, 0 12 | for p in module.parameters(): 13 | if p.requires_grad: 14 | trainable += p.numel() 15 | else: 16 | non_trainable += p.numel() 17 | return trainable, non_trainable 18 | 19 | def dim_from_shape(x): 20 | return x[0] * x[1] * x[2] 21 | 22 | class DenseFlow(Flow): 23 | 24 | def __init__(self, data_shape=(3, 64, 64), block_config=[3, 2, 1], layers_config=[3, 4, 8], layer_mid_chnls=[32, 32, 32], growth_rate=6, num_bits=8, checkpointing=True): 25 | 26 | transforms = [] 27 | current_shape = data_shape 28 | 29 | # Change range from [0,1]^D to [-0.5, 0.5]^D 30 | transforms.append(ScalarAffineBijection(shift=-0.5)) 31 | 32 | # Initial squeeze 33 | transforms.append(Squeeze2d()) 34 | current_shape = (current_shape[0] * 4, 35 | current_shape[1] // 2, 36 | current_shape[2] // 2) 37 | 38 | # scale = 1 39 | dim_initial = dim_from_shape(data_shape) 40 | dim_output = 0 41 | for i, num_layers in enumerate(block_config): 42 | idbt = InvertibleDenseBlock(current_shape[0], num_layers, layers_config[i], layer_mid_chnls[i], 43 | growth_rate=growth_rate, checkpointing=checkpointing) 44 | transforms.append(idbt) 45 | 46 | chnls = current_shape[0] + growth_rate * (num_layers - 1) 47 | current_shape = (chnls, 48 | current_shape[1], 49 | current_shape[2]) 50 | 51 | if i != len(block_config) - 1: 52 | transforms.append(InvertibleTransition(current_shape[0])) 53 | 54 | d0 = dim_from_shape(current_shape) 55 | 56 | current_shape = (current_shape[0] * 2, 57 | current_shape[1] // 2, 58 | current_shape[2] // 2) 59 | d1 = dim_from_shape(current_shape) 60 | dim_output += (d0 - d1) 61 | 62 | dim_output += dim_from_shape(current_shape) 63 | coef = 1. 64 | transforms = [UniformDequantization(num_bits=num_bits, coef=coef), *transforms] 65 | # transforms = [VariationalDequantization(encoder=DequantizationFlow(data_shape, num_bits=num_bits), num_bits=num_bits, coef=coef), *transforms] 66 | 67 | super(DenseFlow, self).__init__(base_dist=ConvNormal2d(current_shape), 68 | transforms=transforms, coef=coef) 69 | self.out_shape = current_shape 70 | 71 | -------------------------------------------------------------------------------- /denseflow/nn/nets/autoregressive/transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from denseflow.nn.layers import LambdaLayer 6 | from denseflow.nn.layers.encoding import PositionalEncodingImage 7 | from denseflow.nn.layers.autoregressive import AutoregressiveShift, Image2Seq, Seq2Image 8 | from denseflow.nn.blocks.autoregressive import DecoderOnlyTransformer 9 | 10 | 11 | class DecoderOnlyTransformer2d(nn.Module): 12 | '''An implementation of Decoder-only Transformers.''' 13 | 14 | def __init__(self, image_shape, output_dim, num_bits, 15 | autoregressive_order='cwh', d_model=512, nhead=8, 16 | num_layers=6, dim_feedforward=2048, dropout=0.1, 17 | activation="relu", kdim=None, vdim=None, 18 | attn_bias=True, output_bias=True, 19 | checkpoint_blocks=False, 20 | in_lambda=lambda x: x, 21 | out_lambda=lambda x: x): 22 | super(DecoderOnlyTransformer2d, self).__init__() 23 | self.image_shape = torch.Size(image_shape) 24 | self.autoregressive_order = autoregressive_order 25 | self.d_model = d_model 26 | self.num_layers = num_layers 27 | 28 | # Encoding layers 29 | self.encode = nn.Sequential(LambdaLayer(in_lambda), 30 | nn.Embedding(2**num_bits, d_model), 31 | PositionalEncodingImage(image_shape=image_shape, embedding_dim=d_model)) 32 | 33 | self.im2seq = Image2Seq(autoregressive_order, image_shape) 34 | self.seq2im = Seq2Image(autoregressive_order, image_shape) 35 | self.ar_shift = AutoregressiveShift(d_model) 36 | 37 | self.transformer = DecoderOnlyTransformer(d_model=d_model, 38 | nhead=nhead, 39 | num_layers=num_layers, 40 | dim_feedforward=dim_feedforward, 41 | dropout=dropout, 42 | activation=activation, 43 | kdim=kdim, 44 | vdim=vdim, 45 | attn_bias=attn_bias, 46 | checkpoint_blocks=checkpoint_blocks) 47 | 48 | self.out_linear = nn.Linear(d_model, output_dim, bias=output_bias) 49 | self.out_lambda = LambdaLayer(out_lambda) 50 | 51 | self._reset_parameters() 52 | 53 | def _reset_parameters(self): 54 | r"""Initiate parameters in the transformer model.""" 55 | 56 | for p in self.parameters(): 57 | if p.dim() > 1: 58 | nn.init.xavier_uniform_(p) 59 | 60 | def forward(self, x): 61 | x = self.encode(x) 62 | x = self.im2seq(x) 63 | x = self.ar_shift(x) 64 | x = self.transformer(x) 65 | x = self.out_linear(x) 66 | x = self.seq2im(x) 67 | return self.out_lambda(x) 68 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conditional/coupling/coupling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from denseflow.transforms.bijections.conditional import ConditionalBijection 3 | 4 | 5 | class ConditionalCouplingBijection(ConditionalBijection): 6 | """Transforms each input variable with an invertible elementwise bijection. 7 | 8 | This input variables are split in two parts. The second part is transformed conditioned on the first part. 9 | The coupling network takes the first part as input and outputs trasnformations for the second part. 10 | 11 | Args: 12 | coupling_net: nn.Module, a coupling network such that for x = [x1,x2] 13 | elementwise_params = coupling_net([x1,context]) 14 | context_net: nn.Module or None, a network to process the context. 15 | split_dim: int, dimension to split the input (default=1). 16 | num_condition: int or None, number of parameters to condition on. 17 | If None, the first half is conditioned on: 18 | - For even inputs (1,2,3,4), (1,2) will be conditioned on. 19 | - For odd inputs (1,2,3,4,5), (1,2,3) will be conditioned on. 20 | """ 21 | 22 | def __init__(self, coupling_net, context_net=None, split_dim=1, num_condition=None): 23 | super(ConditionalCouplingBijection, self).__init__() 24 | assert split_dim >= 1 25 | self.coupling_net = coupling_net 26 | self.context_net = context_net 27 | self.split_dim = split_dim 28 | self.num_condition = num_condition 29 | 30 | def split_input(self, input): 31 | if self.num_condition: 32 | split_proportions = (self.num_condition, input.shape[self.split_dim] - self.num_condition) 33 | return torch.split(input, split_proportions, dim=self.split_dim) 34 | else: 35 | return torch.chunk(input, 2, dim=self.split_dim) 36 | 37 | def forward(self, x, context): 38 | # if not x.requires_grad: 39 | # x.requires_grad = True 40 | # if not context.requires_grad: 41 | # context.requires_grad = True 42 | if self.context_net: context = self.context_net(context) 43 | id, x2 = self.split_input(x) 44 | context = torch.cat([id, context], dim=self.split_dim) 45 | elementwise_params = self.coupling_net(context) 46 | z2, ldj = self._elementwise_forward(x2, elementwise_params) 47 | z = torch.cat([id, z2], dim=self.split_dim) 48 | return z, ldj 49 | 50 | def inverse(self, z, context): 51 | if self.context_net: context = self.context_net(context) 52 | id, z2 = self.split_input(z) 53 | context = torch.cat([id, context], dim=self.split_dim) 54 | elementwise_params = self.coupling_net(context) 55 | x2 = self._elementwise_inverse(z2, elementwise_params) 56 | x = torch.cat([id, x2], dim=self.split_dim) 57 | return x 58 | 59 | def _output_dim_multiplier(self): 60 | raise NotImplementedError() 61 | 62 | def _elementwise_forward(self, x, elementwise_params): 63 | raise NotImplementedError() 64 | 65 | def _elementwise_inverse(self, z, elementwise_params): 66 | raise NotImplementedError() 67 | -------------------------------------------------------------------------------- /denseflow/transforms/bijections/conv1x1.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from operator import mul 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from denseflow.transforms.bijections import Bijection 8 | 9 | 10 | class Conv1x1(Bijection): 11 | """ 12 | Invertible 1x1 Convolution [1]. 13 | The weight matrix is initialized as a random rotation matrix 14 | as described in Section 3.2 of [1]. 15 | 16 | Args: 17 | num_channels (int): Number of channels in the input and output. 18 | orthogonal_init (bool): If True, initialize weights to be a random orthogonal matrix (default=True). 19 | slogdet_cpu (bool): If True, compute slogdet on cpu (default=True). 20 | 21 | Note: 22 | torch.slogdet appears to run faster on CPU than on GPU. 23 | slogdet_cpu is thus set to True by default. 24 | 25 | References: 26 | [1] Glow: Generative Flow with Invertible 1×1 Convolutions, 27 | Kingma & Dhariwal, 2018, https://arxiv.org/abs/1807.03039 28 | """ 29 | def __init__(self, num_channels, orthogonal_init=True, slogdet_cpu=True, trainable=True): 30 | super(Conv1x1, self).__init__() 31 | self.num_channels = num_channels 32 | self.slogdet_cpu = slogdet_cpu 33 | if trainable: 34 | self.weight = nn.Parameter(torch.Tensor(num_channels, num_channels)) 35 | else: 36 | self.register_buffer('weight', torch.Tensor(num_channels, num_channels)) 37 | self.reset_parameters(orthogonal_init) 38 | 39 | def reset_parameters(self, orthogonal_init): 40 | self.orthogonal_init = orthogonal_init 41 | 42 | if self.orthogonal_init: 43 | nn.init.orthogonal_(self.weight) 44 | else: 45 | bound = 1.0 / np.sqrt(self.num_channels) 46 | nn.init.uniform_(self.weight, -bound, bound) 47 | 48 | def _conv(self, weight, v): 49 | 50 | # Get tensor dimensions 51 | _, channel, *features = v.shape 52 | n_feature_dims = len(features) 53 | 54 | # expand weight matrix 55 | fill = (1,) * n_feature_dims 56 | weight = weight.view(channel, channel, *fill) 57 | 58 | if n_feature_dims == 1: 59 | return F.conv1d(v, weight) 60 | elif n_feature_dims == 2: 61 | return F.conv2d(v, weight) 62 | elif n_feature_dims == 3: 63 | return F.conv3d(v, weight) 64 | else: 65 | raise ValueError(f'Got {n_feature_dims}d tensor, expected 1d, 2d, or 3d') 66 | 67 | def _logdet(self, x_shape): 68 | b, c, *dims = x_shape 69 | if self.slogdet_cpu: 70 | _, ldj_per_pixel = torch.slogdet(self.weight.to('cpu')) 71 | else: 72 | _, ldj_per_pixel = torch.slogdet(self.weight) 73 | ldj = ldj_per_pixel * reduce(mul, dims) 74 | return ldj.expand([b]).to(self.weight.device) 75 | 76 | def forward(self, x): 77 | z = self._conv(self.weight, x) 78 | ldj = self._logdet(x.shape) 79 | return z, ldj 80 | 81 | def inverse(self, z): 82 | weight_inv = torch.inverse(self.weight) 83 | x = self._conv(weight_inv, z) 84 | return x 85 | -------------------------------------------------------------------------------- /mask2former/modeling/pixel_decoder/ops/setup.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | import os 13 | import glob 14 | 15 | import torch 16 | 17 | from torch.utils.cpp_extension import CUDA_HOME 18 | from torch.utils.cpp_extension import CppExtension 19 | from torch.utils.cpp_extension import CUDAExtension 20 | 21 | from setuptools import find_packages 22 | from setuptools import setup 23 | 24 | requirements = ["torch", "torchvision"] 25 | 26 | def get_extensions(): 27 | this_dir = os.path.dirname(os.path.abspath(__file__)) 28 | extensions_dir = os.path.join(this_dir, "src") 29 | 30 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 31 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 32 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 33 | 34 | sources = main_file + source_cpu 35 | extension = CppExtension 36 | extra_compile_args = {"cxx": []} 37 | define_macros = [] 38 | 39 | # Force cuda since torch ask for a device, not if cuda is in fact available. 40 | if (os.environ.get('FORCE_CUDA') or torch.cuda.is_available()) and CUDA_HOME is not None: 41 | extension = CUDAExtension 42 | sources += source_cuda 43 | define_macros += [("WITH_CUDA", None)] 44 | extra_compile_args["nvcc"] = [ 45 | "-DCUDA_HAS_FP16=1", 46 | "-D__CUDA_NO_HALF_OPERATORS__", 47 | "-D__CUDA_NO_HALF_CONVERSIONS__", 48 | "-D__CUDA_NO_HALF2_OPERATORS__", 49 | ] 50 | else: 51 | if CUDA_HOME is None: 52 | raise NotImplementedError('CUDA_HOME is None. Please set environment variable CUDA_HOME.') 53 | else: 54 | raise NotImplementedError('No CUDA runtime is found. Please set FORCE_CUDA=1 or test it by running torch.cuda.is_available().') 55 | 56 | sources = [os.path.join(extensions_dir, s) for s in sources] 57 | include_dirs = [extensions_dir] 58 | ext_modules = [ 59 | extension( 60 | "MultiScaleDeformableAttention", 61 | sources, 62 | include_dirs=include_dirs, 63 | define_macros=define_macros, 64 | extra_compile_args=extra_compile_args, 65 | ) 66 | ] 67 | return ext_modules 68 | 69 | setup( 70 | name="MultiScaleDeformableAttention", 71 | version="1.0", 72 | author="Weijie Su", 73 | url="https://github.com/fundamentalvision/Deformable-DETR", 74 | description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention", 75 | packages=find_packages(exclude=("configs", "tests",)), 76 | ext_modules=get_extensions(), 77 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 78 | ) 79 | -------------------------------------------------------------------------------- /denseflow/nn/layers/autoregressive/masked_conv_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from denseflow.nn.layers.autoregressive.utils import mask_conv2d_spatial, mask_conv2d 5 | 6 | 7 | class _MaskedConv2d(nn.Conv2d): 8 | """ 9 | A masked version of nn.Conv2d. 10 | """ 11 | 12 | def register_mask(self, mask): 13 | """ 14 | Registers mask to be used in forward pass. 15 | 16 | Input: 17 | mask: torch.FloatTensor 18 | Shape needs to be broadcastable with self.weight. 19 | """ 20 | self.register_buffer('mask', mask) 21 | 22 | def forward(self, x): 23 | self.weight.data *= self.mask 24 | return super(_MaskedConv2d, self).forward(x) 25 | 26 | 27 | class SpatialMaskedConv2d(_MaskedConv2d): 28 | """ 29 | A version of nn.Conv2d masked to be autoregressive in the spatial dimensions. 30 | Uses mask of shape (1, 1, height, width). 31 | 32 | Input: 33 | *args: Arguments passed to the constructor of nn.Conv2d. 34 | mask_type: str 35 | Either 'A' or 'B'. 'A' for first layer of network, 'B' for all others. 36 | **kwargs: Keyword arguments passed to the constructor of nn.Conv2d. 37 | """ 38 | 39 | def __init__(self, *args, mask_type, **kwargs): 40 | super(SpatialMaskedConv2d, self).__init__(*args, **kwargs) 41 | assert mask_type in {'A', 'B'} 42 | _, _, height, width = self.weight.size() 43 | mask = mask_conv2d_spatial(mask_type, height, width) 44 | self.register_mask(mask) 45 | 46 | 47 | class MaskedConv2d(_MaskedConv2d): 48 | """ 49 | A version of nn.Conv2d masked to be autoregressive in 50 | the spatial dimensions and in the channel dimension. 51 | This is constructed specifically for data that 52 | has any number of input channels. 53 | Uses mask of shape (out_channels, in_channels, height, width). 54 | 55 | Input: 56 | *args: Arguments passed to the constructor of nn.Conv2d. 57 | mask_type: str 58 | Either 'A' or 'B'. 'A' for first layer of network, 'B' for all others. 59 | data_channels: int 60 | Number of channels in the input data, e.g. 3 for RGB images. Default: 3. 61 | This will be used to mask channels throughout the newtork such that 62 | all feature maps will have order (R, G, B, R, G, B, ...). 63 | In the case of mask_type B, for the central pixel: 64 | Outputs in position R can only access inputs in position R. 65 | Outputs in position G can access inputs in position R and G. 66 | Outputs in position B can access inputs in position R, G and B. 67 | In the case of mask_type A, for the central pixel: 68 | Outputs in position G can only access inputs in position R. 69 | Outputs in position B can access inputs in position R and G. 70 | **kwargs: Keyword arguments passed to the constructor of nn.Conv2d. 71 | """ 72 | 73 | def __init__(self, *args, mask_type, data_channels=3, **kwargs): 74 | super(MaskedConv2d, self).__init__(*args, **kwargs) 75 | assert mask_type in {'A', 'B'} 76 | out_channels, in_channels, height, width = self.weight.size() 77 | mask = mask_conv2d(mask_type, in_channels, out_channels, height, width, data_channels) 78 | self.register_mask(mask) 79 | -------------------------------------------------------------------------------- /datasets/prepare_coco_semantic_annos_from_panoptic_annos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | import functools 6 | import json 7 | import multiprocessing as mp 8 | import numpy as np 9 | import os 10 | import time 11 | from fvcore.common.download import download 12 | from panopticapi.utils import rgb2id 13 | from PIL import Image 14 | 15 | from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES 16 | 17 | 18 | def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map): 19 | panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32) 20 | panoptic = rgb2id(panoptic) 21 | output = np.zeros_like(panoptic, dtype=np.uint8) + 255 22 | for seg in segments: 23 | cat_id = seg["category_id"] 24 | new_cat_id = id_map[cat_id] 25 | output[panoptic == seg["id"]] = new_cat_id 26 | Image.fromarray(output).save(output_semantic) 27 | 28 | 29 | def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories): 30 | """ 31 | Create semantic segmentation annotations from panoptic segmentation 32 | annotations, to be used by PanopticFPN. 33 | It maps all thing categories to class 0, and maps all unlabeled pixels to class 255. 34 | It maps all stuff categories to contiguous ids starting from 1. 35 | Args: 36 | panoptic_json (str): path to the panoptic json file, in COCO's format. 37 | panoptic_root (str): a directory with panoptic annotation files, in COCO's format. 38 | sem_seg_root (str): a directory to output semantic annotation files 39 | categories (list[dict]): category metadata. Each dict needs to have: 40 | "id": corresponds to the "category_id" in the json annotations 41 | "isthing": 0 or 1 42 | """ 43 | os.makedirs(sem_seg_root, exist_ok=True) 44 | 45 | id_map = {} # map from category id to id in the output semantic annotation 46 | assert len(categories) <= 254 47 | for i, k in enumerate(categories): 48 | id_map[k["id"]] = i 49 | # what is id = 0? 50 | # id_map[0] = 255 51 | print(id_map) 52 | 53 | with open(panoptic_json) as f: 54 | obj = json.load(f) 55 | 56 | pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) 57 | 58 | def iter_annotations(): 59 | for anno in obj["annotations"]: 60 | file_name = anno["file_name"] 61 | segments = anno["segments_info"] 62 | input = os.path.join(panoptic_root, file_name) 63 | output = os.path.join(sem_seg_root, file_name) 64 | yield input, output, segments 65 | 66 | print("Start writing to {} ...".format(sem_seg_root)) 67 | start = time.time() 68 | pool.starmap( 69 | functools.partial(_process_panoptic_to_semantic, id_map=id_map), 70 | iter_annotations(), 71 | chunksize=100, 72 | ) 73 | print("Finished. time: {:.2f}s".format(time.time() - start)) 74 | 75 | 76 | if __name__ == "__main__": 77 | dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco") 78 | for s in ["val2017", "train2017"]: 79 | separate_coco_semantic_from_panoptic( 80 | os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)), 81 | os.path.join(dataset_dir, "panoptic_{}".format(s)), 82 | os.path.join(dataset_dir, "panoptic_semseg_{}".format(s)), 83 | COCO_CATEGORIES, 84 | ) 85 | -------------------------------------------------------------------------------- /denseflow/nn/layers/autoregressive/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def mask_conv2d_spatial(mask_type, height, width): 5 | """ 6 | Creates a mask for Conv2d such that it becomes autoregressive in 7 | the spatial dimensions. 8 | 9 | Input: 10 | mask_type: str 11 | Either 'A' or 'B'. 'A' for first layer of network, 'B' for all others. 12 | height: int 13 | Kernel height for layer. 14 | width: int 15 | Kernel width for layer. 16 | Output: 17 | mask: torch.FloatTensor 18 | Shape (1, 1, height, width). 19 | A mask with 0 in places for masked elements. 20 | """ 21 | mask = torch.ones([1, 1, height, width]) 22 | mask[:, :, height // 2, width // 2 + (mask_type == 'B'):] = 0 23 | mask[:, :, height // 2 + 1:] = 0 24 | return mask 25 | 26 | 27 | def mask_channels(mask_type, in_channels, out_channels, data_channels=3): 28 | """ 29 | Creates an autoregressive channel mask. 30 | 31 | Input: 32 | mask_type: str 33 | Either 'A' or 'B'. 'A' for first layer of network, 'B' for all others. 34 | in_channels: int 35 | Number of input channels to layer. 36 | out_channels: int 37 | Number of output channels of layer. 38 | data_channels: int 39 | Number of channels in the input data, e.g. 3 for RGB images. (default = 3). 40 | Output: 41 | mask: torch.FloatTensor 42 | Shape (out_channels, in_channels). 43 | A mask with 0 in places for masked elements. 44 | """ 45 | in_factor = in_channels // data_channels + 1 46 | out_factor = out_channels // data_channels + 1 47 | 48 | base_mask = torch.ones([data_channels,data_channels]) 49 | if mask_type == 'A': 50 | base_mask = base_mask.tril(-1) 51 | else: 52 | base_mask = base_mask.tril(0) 53 | 54 | mask_p1 = torch.cat([base_mask]*in_factor, dim=1) 55 | mask_p2 = torch.cat([mask_p1]*out_factor, dim=0) 56 | 57 | mask = mask_p2[0:out_channels,0:in_channels] 58 | return mask 59 | 60 | 61 | def mask_conv2d(mask_type, in_channels, out_channels, height, width, data_channels=3): 62 | r""" 63 | Creates a mask for Conv2d such that it becomes autoregressive in both 64 | the spatial dimensions and the channel dimension. 65 | 66 | Input: 67 | mask_type: str 68 | Either 'A' or 'B'. 'A' for first layer of network, 'B' for all others. 69 | in_channels: int 70 | Number of input channels to layer. 71 | out_channels: int 72 | Number of output channels of layer. 73 | height: int 74 | Kernel height for layer. 75 | width: int 76 | Kernel width for layer. 77 | data_channels: int 78 | Number of channels in the input data, e.g. 3 for RGB images. (default = 3). 79 | Output: 80 | mask: torch.FloatTensor 81 | Shape (out_channels, in_channels, height, width). 82 | A mask with 0 in places for masked elements. 83 | """ 84 | mask = torch.ones([out_channels,in_channels,height,width]) 85 | # RGB masking in central pixel 86 | mask[:, :, height // 2, width // 2] = mask_channels(mask_type, in_channels, out_channels, data_channels) 87 | # Masking all pixels to the right of the central pixel 88 | mask[:, :, height // 2, width // 2 + 1:] = 0 89 | # Masking all pixels below the central pixel 90 | mask[:, :, height // 2 + 1:] = 0 91 | return mask 92 | -------------------------------------------------------------------------------- /denseflow/distributions/conditional/normal.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | from torch.distributions import Normal 5 | from denseflow.distributions.conditional import ConditionalDistribution 6 | from denseflow.utils import sum_except_batch 7 | 8 | 9 | class ConditionalMeanNormal(ConditionalDistribution): 10 | """A multivariate Normal with conditional mean and fixed std.""" 11 | 12 | def __init__(self, net, scale=1.0): 13 | super(ConditionalMeanNormal, self).__init__() 14 | self.net = net 15 | self.scale = scale 16 | 17 | def cond_dist(self, context): 18 | mean = self.net(context) 19 | return Normal(loc=mean, scale=self.scale) 20 | 21 | def log_prob(self, x, context): 22 | dist = self.cond_dist(context) 23 | return sum_except_batch(dist.log_prob(x)) 24 | 25 | def sample(self, context): 26 | dist = self.cond_dist(context) 27 | return dist.rsample() 28 | 29 | def sample_with_log_prob(self, context): 30 | dist = self.cond_dist(context) 31 | z = dist.rsample() 32 | log_prob = dist.log_prob(z) 33 | log_prob = sum_except_batch(log_prob) 34 | return z, log_prob 35 | 36 | def mean(self, context): 37 | return self.cond_dist(context).mean 38 | 39 | 40 | class ConditionalMeanStdNormal(ConditionalDistribution): 41 | """A multivariate Normal with conditional mean and learned std.""" 42 | 43 | def __init__(self, net, scale_shape): 44 | super(ConditionalMeanStdNormal, self).__init__() 45 | self.net = net 46 | self.log_scale = nn.Parameter(torch.zeros(scale_shape)) 47 | 48 | def cond_dist(self, context): 49 | mean = self.net(context) 50 | return Normal(loc=mean, scale=self.log_scale.exp()) 51 | 52 | def log_prob(self, x, context): 53 | dist = self.cond_dist(context) 54 | return sum_except_batch(dist.log_prob(x)) 55 | 56 | def sample(self, context): 57 | dist = self.cond_dist(context) 58 | return dist.rsample() 59 | 60 | def sample_with_log_prob(self, context): 61 | dist = self.cond_dist(context) 62 | z = dist.rsample() 63 | log_prob = dist.log_prob(z) 64 | log_prob = sum_except_batch(log_prob) 65 | return z, log_prob 66 | 67 | def mean(self, context): 68 | return self.cond_dist(context).mean 69 | 70 | 71 | class ConditionalNormal(ConditionalDistribution): 72 | """A multivariate Normal with conditional mean and log_std.""" 73 | 74 | def __init__(self, net, split_dim=-1): 75 | super(ConditionalNormal, self).__init__() 76 | self.net = net 77 | self.split_dim = split_dim 78 | 79 | def cond_dist(self, context): 80 | params = self.net(context) 81 | mean, log_std = torch.chunk(params, chunks=2, dim=self.split_dim) 82 | return Normal(loc=mean, scale=log_std.exp()) 83 | 84 | def log_prob(self, x, context): 85 | dist = self.cond_dist(context) 86 | return sum_except_batch(dist.log_prob(x)) 87 | 88 | def sample(self, context): 89 | dist = self.cond_dist(context) 90 | return dist.rsample() 91 | 92 | def sample_with_log_prob(self, context): 93 | dist = self.cond_dist(context) 94 | z = dist.rsample() 95 | log_prob = dist.log_prob(z) 96 | log_prob = sum_except_batch(log_prob) 97 | return z, log_prob 98 | 99 | def mean(self, context): 100 | return self.cond_dist(context).mean 101 | 102 | def mean_stddev(self, context): 103 | dist = self.cond_dist(context) 104 | return dist.mean, dist.stddev 105 | --------------------------------------------------------------------------------