17 |
18 | ## Steps to reproduce
19 |
20 | ### Install requirements
21 | * Python 3.7+
22 | ```bash
23 | pip install -r requirements.txt
24 | ```
25 |
26 | ### Download Cityscapes
27 |
28 | From https://www.cityscapes-dataset.com/downloads/ download:
29 | * leftImg8bit_trainvaltest.zip (11GB)
30 | * gtFine_trainvaltest.zip (241MB)
31 |
32 | Either download and extract to `datasets/` or create a symbolic link `datasets/Cityscapes`
33 | Expected dataset structure for Cityscapes is:
34 | ```
35 | labels/
36 | train/
37 | aachen/
38 | aachen_000000_000019.png
39 | ...
40 | ...
41 | val/
42 | ...
43 | rgb/
44 | train/
45 | aachen/
46 | aachen_000000_000019.png
47 | ...
48 | ...
49 | val/
50 | ...
51 | ```
52 |
53 |
54 | ### Evaluate
55 | ##### Pre-trained Cityscapes models [available](https://drive.google.com/drive/folders/1DqX-N-nMtGG9QfMY_cKtULCKTfEuV4WT?usp=sharing)
56 | * Download and extract to `weights` directory.
57 |
58 | Set `evaluating = True` inside config file (eg. `configs/rn18_single_scale.py`) and run:
59 | ```bash
60 | python eval.py configs/rn18_single_scale.py
61 | ```
62 |
63 | ### Train
64 | ```bash
65 | python train.py configs/rn18_single_scale.py --store_dir=/path/to/store/experiments
66 | ```
67 |
--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orsic/swiftnet/2b88990e1ab674e8ef7cb533a1d8d49ef34ac93d/configs/__init__.py
--------------------------------------------------------------------------------
/configs/rn18_pyramid.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.utils.data import DataLoader
4 | from torchvision.transforms import Compose
5 | import torch.optim as optim
6 | from pathlib import Path
7 | import os
8 | import numpy as np
9 |
10 | from models.semseg import SemsegModel
11 | from models.resnet.resnet_pyramid import *
12 | from models.loss import BoundaryAwareFocalLoss
13 | from data.transform import *
14 | from data.cityscapes import Cityscapes
15 | from evaluation import StorePreds
16 |
17 | from models.util import get_n_params
18 |
19 | path = os.path.abspath(__file__)
20 | dir_path = os.path.dirname(path)
21 | root = Path.home() / Path('datasets/Cityscapes')
22 |
23 | evaluating = False
24 | random_crop_size = 768
25 |
26 | scale = 1
27 | mean = [73.15, 82.90, 72.3]
28 | std = [47.67, 48.49, 47.73]
29 | mean_rgb = tuple(np.uint8(scale * np.array(mean)))
30 |
31 | num_classes = Cityscapes.num_classes
32 | ignore_id = Cityscapes.num_classes
33 | class_info = Cityscapes.class_info
34 | color_info = Cityscapes.color_info
35 |
36 | num_levels = 3
37 | ostride = 4
38 | target_size_crops = (random_crop_size, random_crop_size)
39 | target_size_crops_feats = (random_crop_size // ostride, random_crop_size // ostride)
40 |
41 | eval_each = 4
42 | dist_trans_bins = (16, 64, 128)
43 | dist_trans_alphas = (8., 4., 2., 1.)
44 | target_size = (2048, 1024)
45 | target_size_feats = (2048 // ostride, 1024 // ostride)
46 |
47 | trans_val = Compose(
48 | [Open(),
49 | SetTargetSize(target_size=target_size, target_size_feats=target_size_feats),
50 | Tensor(),
51 | ]
52 | )
53 |
54 | if evaluating:
55 | trans_train = trans_train_val = trans_val
56 | else:
57 | trans_train = Compose(
58 | [Open(),
59 | RandomFlip(),
60 | RandomSquareCropAndScale(random_crop_size, ignore_id=ignore_id, mean=mean_rgb),
61 | SetTargetSize(target_size=target_size_crops, target_size_feats=target_size_crops_feats),
62 | LabelDistanceTransform(num_classes=num_classes, reduce=True, bins=dist_trans_bins, alphas=dist_trans_alphas),
63 | Tensor(),
64 | ])
65 |
66 | dataset_train = Cityscapes(root, transforms=trans_train, subset='train')
67 | dataset_val = Cityscapes(root, transforms=trans_val, subset='val')
68 |
69 | backbone = resnet18(pretrained=True,
70 | pyramid_levels=num_levels,
71 | k_upsample=3,
72 | scale=scale,
73 | mean=mean,
74 | std=std,
75 | k_bneck=1,
76 | output_stride=ostride,
77 | efficient=True)
78 | model = SemsegModel(backbone, num_classes, k=1, bias=True)
79 | if evaluating:
80 | model.load_state_dict(torch.load('weights/rn18_pyramid/model_best.pt'), strict=False)
81 | else:
82 | model.criterion = BoundaryAwareFocalLoss(gamma=.5, num_classes=num_classes, ignore_id=ignore_id)
83 |
84 | bn_count = 0
85 | for m in model.modules():
86 | if isinstance(m, nn.BatchNorm2d):
87 | bn_count += 1
88 | print(f'Num BN layers: {bn_count}')
89 |
90 | if not evaluating:
91 | lr = 4e-4
92 | lr_min = 1e-6
93 | fine_tune_factor = 4
94 | weight_decay = 1e-4
95 | epochs = 250
96 |
97 | optim_params = [
98 | {'params': model.random_init_params(), 'lr': lr, 'weight_decay': weight_decay},
99 | {'params': model.fine_tune_params(), 'lr': lr / fine_tune_factor,
100 | 'weight_decay': weight_decay / fine_tune_factor},
101 | ]
102 |
103 | optimizer = optim.Adam(optim_params, betas=(0.9, 0.99))
104 | lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, lr_min)
105 |
106 | batch_size = bs = 14
107 | print(f'Batch size: {bs}')
108 | nw = 4
109 |
110 | loader_val = DataLoader(dataset_val, batch_size=1, collate_fn=custom_collate, num_workers=nw)
111 | if evaluating:
112 | loader_train = DataLoader(dataset_train, batch_size=1, collate_fn=custom_collate, num_workers=nw)
113 | else:
114 | loader_train = DataLoader(dataset_train, batch_size=batch_size, num_workers=nw, pin_memory=True,
115 | drop_last=True, collate_fn=custom_collate, shuffle=True)
116 |
117 | total_params = get_n_params(model.parameters())
118 | ft_params = get_n_params(model.fine_tune_params())
119 | ran_params = get_n_params(model.random_init_params())
120 | assert total_params == (ft_params + ran_params)
121 | print(f'Num params: {total_params:,} = {ran_params:,}(random init) + {ft_params:,}(fine tune)')
122 |
123 | if evaluating:
124 | eval_loaders = [(loader_val, 'val'), (loader_train, 'train')]
125 | store_dir = f'{dir_path}/out/'
126 | for d in ['', 'val', 'train']:
127 | os.makedirs(store_dir + d, exist_ok=True)
128 | to_color = ColorizeLabels(color_info)
129 | to_image = Compose([Numpy(), to_color])
130 | eval_observers = [StorePreds(store_dir, to_image, to_color)]
131 |
--------------------------------------------------------------------------------
/configs/rn18_single_scale.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import DataLoader
3 | from torchvision.transforms import Compose
4 | import torch.optim as optim
5 | from pathlib import Path
6 | import numpy as np
7 | import os
8 |
9 | from models.semseg import SemsegModel
10 | from models.resnet.resnet_single_scale import *
11 | from models.loss import SemsegCrossEntropy
12 | from data.transform import *
13 | from data.cityscapes import Cityscapes
14 | from evaluation import StorePreds
15 |
16 | from models.util import get_n_params
17 |
18 | root = Path.home() / Path('datasets/Cityscapes')
19 | path = os.path.abspath(__file__)
20 | dir_path = os.path.dirname(path)
21 |
22 | evaluating = False
23 | random_crop_size = 768
24 |
25 | scale = 1
26 | mean = [73.15, 82.90, 72.3]
27 | std = [47.67, 48.49, 47.73]
28 | mean_rgb = tuple(np.uint8(scale * np.array(mean)))
29 |
30 | num_classes = Cityscapes.num_classes
31 | ignore_id = Cityscapes.num_classes
32 | class_info = Cityscapes.class_info
33 | color_info = Cityscapes.color_info
34 |
35 | target_size_crops = (random_crop_size, random_crop_size)
36 | target_size_crops_feats = (random_crop_size // 4, random_crop_size // 4)
37 | target_size = (2048, 1024)
38 | target_size_feats = (2048 // 4, 1024 // 4)
39 |
40 | eval_each = 4
41 |
42 |
43 | trans_val = Compose(
44 | [Open(),
45 | SetTargetSize(target_size=target_size, target_size_feats=target_size_feats),
46 | Tensor(),
47 | ]
48 | )
49 |
50 | if evaluating:
51 | trans_train = trans_val
52 | else:
53 | trans_train = Compose(
54 | [Open(),
55 | RandomFlip(),
56 | RandomSquareCropAndScale(random_crop_size, ignore_id=num_classes, mean=mean_rgb),
57 | SetTargetSize(target_size=target_size_crops, target_size_feats=target_size_crops_feats),
58 | Tensor(),
59 | ]
60 | )
61 |
62 | dataset_train = Cityscapes(root, transforms=trans_train, subset='train')
63 | dataset_val = Cityscapes(root, transforms=trans_val, subset='val')
64 |
65 | resnet = resnet18(pretrained=True, efficient=False, mean=mean, std=std, scale=scale)
66 | model = SemsegModel(resnet, num_classes)
67 | if evaluating:
68 | model.load_state_dict(torch.load('weights/rn18_single_scale/model_best.pt'))
69 | else:
70 | model.criterion = SemsegCrossEntropy(num_classes=num_classes, ignore_id=ignore_id)
71 | lr = 4e-4
72 | lr_min = 1e-6
73 | fine_tune_factor = 4
74 | weight_decay = 1e-4
75 | epochs = 250
76 |
77 | optim_params = [
78 | {'params': model.random_init_params(), 'lr': lr, 'weight_decay': weight_decay},
79 | {'params': model.fine_tune_params(), 'lr': lr / fine_tune_factor,
80 | 'weight_decay': weight_decay / fine_tune_factor},
81 | ]
82 |
83 | optimizer = optim.Adam(optim_params, betas=(0.9, 0.99))
84 | lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, lr_min)
85 |
86 | batch_size = 14
87 | print(f'Batch size: {batch_size}')
88 |
89 | if evaluating:
90 | loader_train = DataLoader(dataset_train, batch_size=1, collate_fn=custom_collate)
91 | else:
92 | loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=4,
93 | pin_memory=True,
94 | drop_last=True, collate_fn=custom_collate)
95 | loader_val = DataLoader(dataset_val, batch_size=1, collate_fn=custom_collate)
96 |
97 | total_params = get_n_params(model.parameters())
98 | ft_params = get_n_params(model.fine_tune_params())
99 | ran_params = get_n_params(model.random_init_params())
100 | spp_params = get_n_params(model.backbone.spp.parameters())
101 | assert total_params == (ft_params + ran_params)
102 | print(f'Num params: {total_params:,} = {ran_params:,}(random init) + {ft_params:,}(fine tune)')
103 | print(f'SPP params: {spp_params:,}')
104 |
105 | if evaluating:
106 | eval_loaders = [(loader_val, 'val'), (loader_train, 'train')]
107 | store_dir = f'{dir_path}/out/'
108 | for d in ['', 'val', 'train', 'training']:
109 | os.makedirs(store_dir + d, exist_ok=True)
110 | to_color = ColorizeLabels(color_info)
111 | to_image = Compose([DenormalizeTh(scale, mean, std), Numpy(), to_color])
112 | eval_observers = [StorePreds(store_dir, to_image, to_color)]
113 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .cityscapes import Cityscapes
2 | from .mux import *
--------------------------------------------------------------------------------
/data/ade20k/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orsic/swiftnet/2b88990e1ab674e8ef7cb533a1d8d49ef34ac93d/data/ade20k/__init__.py
--------------------------------------------------------------------------------
/data/ade20k/ade20k.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 | from pathlib import Path
3 | from scipy.io import loadmat
4 | import numpy as np
5 |
6 |
7 | def init_ade20k_class_color_info(path: Path):
8 | colors = loadmat(str(path / 'color150.mat'))['colors']
9 | classes = []
10 | with (path / 'object150_info.csv').open('r') as f:
11 | for i, line in enumerate(f.readlines()):
12 | if bool(i):
13 | classes += [line.rstrip().split(',')[-1]]
14 | return classes + ['void'], np.concatenate([colors, np.array([[0, 0, 0]], dtype=colors.dtype)])
15 |
16 |
17 | class_info, color_info = init_ade20k_class_color_info(Path('/home/morsic/datasets/ADE20k'))
18 |
19 |
20 | class ADE20k(Dataset):
21 | class_info = class_info
22 | color_info = color_info
23 | num_classes = 150
24 |
25 | def __init__(self, root: Path, transforms: lambda x: x, subset='training', open_images=True, epoch=None):
26 | self.root = root
27 | self.open_images = open_images
28 | self.images_dir = root / 'ADEChallengeData2016/images/' / subset
29 | self.labels_dir = root / 'ADEChallengeData2016/annotations/' / subset
30 |
31 | self.images = list(sorted(self.images_dir.glob('*.jpg')))
32 | self.labels = list(sorted(self.labels_dir.glob('*.png')))
33 |
34 | self.transforms = transforms
35 | self.subset = subset
36 | self.epoch = epoch
37 |
38 | print(f'Num images: {len(self)}')
39 |
40 | def __len__(self):
41 | return len(self.images)
42 |
43 | def __getitem__(self, item):
44 | ret_dict = {
45 | 'name': self.images[item].stem,
46 | 'subset': self.subset,
47 | 'labels': self.labels[item]
48 | }
49 | if self.open_images:
50 | ret_dict['image'] = self.images[item]
51 | if self.epoch is not None:
52 | ret_dict['epoch'] = int(self.epoch.value)
53 | return self.transforms(ret_dict)
54 |
--------------------------------------------------------------------------------
/data/camvid/__init__.py:
--------------------------------------------------------------------------------
1 | from .camvid import CamVid
--------------------------------------------------------------------------------
/data/camvid/camvid.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 | from pathlib import Path
3 |
4 | class_info = ['building', 'tree', 'sky', 'car', 'sign', 'road', 'pedestrian', 'fence', 'column pole', 'sidewalk',
5 | 'bicyclist']
6 | color_info = [(128, 0, 0), (128, 128, 0), (128, 128, 128), (64, 0, 128), (192, 128, 128), (128, 64, 128), (64, 64, 0),
7 | (64, 74, 128), (192, 192, 128), (0, 0, 192), (0, 128, 192)]
8 |
9 | color_info += [[0, 0, 0]]
10 |
11 |
12 | class CamVid(Dataset):
13 | class_info = class_info
14 | color_info = color_info
15 | num_classes = len(class_info)
16 |
17 | mean = [111.376, 63.110, 83.670]
18 | std = [41.608, 54.237, 68.889]
19 |
20 | def __init__(self, root: Path, transforms: lambda x: x, subset='train'):
21 | self.root = root
22 | self.subset = subset
23 | self.image_names = [line.rstrip() for line in (root / f'{subset}.txt').open('r').readlines()]
24 | name_filter = lambda x: x.name in self.image_names
25 | self.images = list(filter(name_filter, (self.root / 'rgb').iterdir()))
26 | self.labels = list(filter(name_filter, (self.root / 'labels/ids').iterdir()))
27 | self.transforms = transforms
28 | print(f'Num images: {len(self)}')
29 |
30 | def __len__(self):
31 | return len(self.images)
32 |
33 | def __getitem__(self, item):
34 | ret_dict = {
35 | 'image': self.images[item],
36 | 'name': self.images[item].stem,
37 | 'subset': self.subset,
38 | 'labels': self.labels[item]
39 | }
40 | return self.transforms(ret_dict)
41 |
--------------------------------------------------------------------------------
/data/cityscapes/__init__.py:
--------------------------------------------------------------------------------
1 | from .cityscapes import Cityscapes
2 |
--------------------------------------------------------------------------------
/data/cityscapes/cityscapes.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 | from pathlib import Path
3 |
4 | from .labels import labels
5 |
6 | class_info = [label.name for label in labels if label.ignoreInEval is False]
7 | color_info = [label.color for label in labels if label.ignoreInEval is False]
8 |
9 | color_info += [[0, 0, 0]]
10 |
11 | map_to_id = {}
12 | inst_map_to_id = {}
13 | i, j = 0, 0
14 | for label in labels:
15 | if label.ignoreInEval is False:
16 | map_to_id[label.id] = i
17 | i += 1
18 | if label.hasInstances is True:
19 | inst_map_to_id[label.id] = j
20 | j += 1
21 |
22 | id_to_map = {id: i for i, id in map_to_id.items()}
23 | inst_id_to_map = {id: i for i, id in inst_map_to_id.items()}
24 |
25 |
26 | class Cityscapes(Dataset):
27 | class_info = class_info
28 | color_info = color_info
29 | num_classes = 19
30 |
31 | map_to_id = map_to_id
32 | id_to_map = id_to_map
33 |
34 | inst_map_to_id = inst_map_to_id
35 | inst_id_to_map = inst_id_to_map
36 |
37 | mean = [0.485, 0.456, 0.406]
38 | std = [0.229, 0.224, 0.225]
39 |
40 | def __init__(self, root: Path, transforms: lambda x: x, subset='train', open_depth=False, labels_dir='labels', epoch=None):
41 | self.root = root
42 | self.images_dir = self.root / 'rgb' / subset
43 | self.labels_dir = self.root / labels_dir / subset
44 | self.depth_dir = self.root / 'depth' / subset
45 | self.subset = subset
46 | self.has_labels = subset != 'test'
47 | self.open_depth = open_depth
48 | self.images = list(sorted(self.images_dir.glob('*/*.ppm')))
49 | if self.has_labels:
50 | self.labels = list(sorted(self.labels_dir.glob('*/*.png')))
51 | self.transforms = transforms
52 | self.epoch = epoch
53 |
54 | print(f'Num images: {len(self)}')
55 |
56 | def __len__(self):
57 | return len(self.images)
58 |
59 | def __getitem__(self, item):
60 | ret_dict = {
61 | 'image': self.images[item],
62 | 'name': self.images[item].stem,
63 | 'subset': self.subset,
64 | }
65 | if self.has_labels:
66 | ret_dict['labels'] = self.labels[item]
67 | if self.epoch is not None:
68 | ret_dict['epoch'] = int(self.epoch.value)
69 | return self.transforms(ret_dict)
70 |
--------------------------------------------------------------------------------
/data/cityscapes/labels.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | #--------------------------------------------------------------------------------
4 | # Definitions
5 | #--------------------------------------------------------------------------------
6 |
7 | # a label and all meta information
8 | Label = namedtuple( 'Label' , [
9 |
10 | 'name' , # The identifier of this label, e.g. 'car', 'person', ... .
11 | # We use them to uniquely name a class
12 |
13 | 'id' , # An integer ID that is associated with this label.
14 | # The IDs are used to represent the label in ground truth images
15 | # An ID of -1 means that this label does not have an ID and thus
16 | # is ignored when creating ground truth images (e.g. license plate).
17 | # Do not modify these IDs, since exactly these IDs are expected by the
18 | # evaluation server.
19 |
20 | 'trainId' , # Feel free to modify these IDs as suitable for your method. Then create
21 | # ground truth images with train IDs, using the tools provided in the
22 | # 'preparation' folder. However, make sure to validate or submit results
23 | # to our evaluation server using the regular IDs above!
24 | # For trainIds, multiple labels might have the same ID. Then, these labels
25 | # are mapped to the same class in the ground truth images. For the inverse
26 | # mapping, we use the label that is defined first in the list below.
27 | # For example, mapping all void-type classes to the same ID in training,
28 | # might make sense for some approaches.
29 | # Max value is 255!
30 |
31 | 'category' , # The name of the category that this label belongs to
32 |
33 | 'categoryId' , # The ID of this category. Used to create ground truth images
34 | # on category level.
35 |
36 | 'hasInstances', # Whether this label distinguishes between single instances or not
37 |
38 | 'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
39 | # during evaluations or not
40 |
41 | 'color' , # The color of this label
42 | ] )
43 |
44 |
45 | #--------------------------------------------------------------------------------
46 | # A list of all labels
47 | #--------------------------------------------------------------------------------
48 |
49 | # Please adapt the train IDs as appropriate for you approach.
50 | # Note that you might want to ignore labels with ID 255 during training.
51 | # Further note that the current train IDs are only a suggestion. You can use whatever you like.
52 | # Make sure to provide your results using the original IDs and not the training IDs.
53 | # Note that many IDs are ignored in evaluation and thus you never need to predict these!
54 |
55 | labels = [
56 | # name id trainId category catId hasInstances ignoreInEval color
57 | Label( 'unlabeled' , 0 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
58 | Label( 'ego vehicle' , 1 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
59 | Label( 'rectification border' , 2 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
60 | Label( 'out of roi' , 3 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
61 | Label( 'static' , 4 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
62 | Label( 'dynamic' , 5 , 255 , 'void' , 0 , False , True , (111, 74, 0) ),
63 | Label( 'ground' , 6 , 255 , 'void' , 0 , False , True , ( 81, 0, 81) ),
64 | Label( 'road' , 7 , 0 , 'flat' , 1 , False , False , (128, 64,128) ),
65 | Label( 'sidewalk' , 8 , 1 , 'flat' , 1 , False , False , (244, 35,232) ),
66 | Label( 'parking' , 9 , 255 , 'flat' , 1 , False , True , (250,170,160) ),
67 | Label( 'rail track' , 10 , 255 , 'flat' , 1 , False , True , (230,150,140) ),
68 | Label( 'building' , 11 , 2 , 'construction' , 2 , False , False , ( 70, 70, 70) ),
69 | Label( 'wall' , 12 , 3 , 'construction' , 2 , False , False , (102,102,156) ),
70 | Label( 'fence' , 13 , 4 , 'construction' , 2 , False , False , (190,153,153) ),
71 | Label( 'guard rail' , 14 , 255 , 'construction' , 2 , False , True , (180,165,180) ),
72 | Label( 'bridge' , 15 , 255 , 'construction' , 2 , False , True , (150,100,100) ),
73 | Label( 'tunnel' , 16 , 255 , 'construction' , 2 , False , True , (150,120, 90) ),
74 | Label( 'pole' , 17 , 5 , 'object' , 3 , False , False , (153,153,153) ),
75 | Label( 'polegroup' , 18 , 255 , 'object' , 3 , False , True , (153,153,153) ),
76 | Label( 'traffic light' , 19 , 6 , 'object' , 3 , False , False , (250,170, 30) ),
77 | Label( 'traffic sign' , 20 , 7 , 'object' , 3 , False , False , (220,220, 0) ),
78 | Label( 'vegetation' , 21 , 8 , 'nature' , 4 , False , False , (107,142, 35) ),
79 | Label( 'terrain' , 22 , 9 , 'nature' , 4 , False , False , (152,251,152) ),
80 | Label( 'sky' , 23 , 10 , 'sky' , 5 , False , False , ( 70,130,180) ),
81 | Label( 'person' , 24 , 11 , 'human' , 6 , True , False , (220, 20, 60) ),
82 | Label( 'rider' , 25 , 12 , 'human' , 6 , True , False , (255, 0, 0) ),
83 | Label( 'car' , 26 , 13 , 'vehicle' , 7 , True , False , ( 0, 0,142) ),
84 | Label( 'truck' , 27 , 14 , 'vehicle' , 7 , True , False , ( 0, 0, 70) ),
85 | Label( 'bus' , 28 , 15 , 'vehicle' , 7 , True , False , ( 0, 60,100) ),
86 | Label( 'caravan' , 29 , 255 , 'vehicle' , 7 , True , True , ( 0, 0, 90) ),
87 | Label( 'trailer' , 30 , 255 , 'vehicle' , 7 , True , True , ( 0, 0,110) ),
88 | Label( 'train' , 31 , 16 , 'vehicle' , 7 , True , False , ( 0, 80,100) ),
89 | Label( 'motorcycle' , 32 , 17 , 'vehicle' , 7 , True , False , ( 0, 0,230) ),
90 | Label( 'bicycle' , 33 , 18 , 'vehicle' , 7 , True , False , (119, 11, 32) ),
91 | Label( 'license plate' , -1 , -1 , 'vehicle' , 7 , False , True , ( 0, 0,142) ),
92 | ]
93 |
94 |
95 | def get_train_ids():
96 | train_ids = []
97 | for i in labels:
98 | if not i.ignoreInEval:
99 | train_ids.append(i.id)
100 | return train_ids
--------------------------------------------------------------------------------
/data/mux/__init__.py:
--------------------------------------------------------------------------------
1 | from .util import pyramid_sizes
--------------------------------------------------------------------------------
/data/mux/util.py:
--------------------------------------------------------------------------------
1 | from math import ceil
2 |
3 |
4 | def pyramid_sizes(size, alphas, scale=1.0):
5 | w, h = size[0], size[1]
6 | th_sc = lambda wh, alpha: int(ceil(wh / (alpha * scale)))
7 | return [(th_sc(w, a), th_sc(h, a)) for a in alphas]
8 |
--------------------------------------------------------------------------------
/data/transform/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import *
2 | from .border import *
3 | from .flow import *
4 | from .jitter import *
5 | from .labels import *
6 | from .photometric import *
7 | from .class_uniform import *
--------------------------------------------------------------------------------
/data/transform/base.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from torch.utils.data.dataloader import default_collate
3 | import numpy as np
4 | import torch
5 | from PIL import Image as pimg
6 |
7 | from data.transform.flow_utils import readFlow
8 |
9 | RESAMPLE = pimg.BICUBIC
10 | RESAMPLE_D = pimg.BILINEAR
11 |
12 | __all__ = ['Open', 'SetTargetSize', 'Numpy', 'Tensor', 'detection_collate', 'custom_collate', 'RESAMPLE', 'RESAMPLE_D']
13 |
14 |
15 | class Open:
16 | def __init__(self, palette=None, copy_labels=True):
17 | self.palette = palette
18 | self.copy_labels = copy_labels
19 |
20 | def __call__(self, example: dict):
21 | try:
22 | ret_dict = {}
23 | for k in ['image', 'image_next', 'image_prev']:
24 | if k in example:
25 | ret_dict[k] = pimg.open(example[k]).convert('RGB')
26 | if k == 'image':
27 | ret_dict['target_size'] = ret_dict['image'].size
28 | if 'depth' in example:
29 | example['depth'] = pimg.open(example['depth'])
30 | if 'labels' in example:
31 | ret_dict['labels'] = pimg.open(example['labels'])
32 | if self.palette is not None:
33 | ret_dict['labels'].putpalette(self.palette)
34 | if self.copy_labels:
35 | ret_dict['original_labels'] = ret_dict['labels'].copy()
36 | if 'flow' in example:
37 | ret_dict['flow'] = readFlow(example['flow'])
38 | except OSError:
39 | print(example)
40 | raise
41 | return {**example, **ret_dict}
42 |
43 |
44 | class SetTargetSize:
45 | def __init__(self, target_size, target_size_feats, stride=4):
46 | self.target_size = target_size
47 | self.target_size_feats = target_size_feats
48 | self.stride = stride
49 |
50 | def __call__(self, example):
51 | if all([self.target_size, self.target_size_feats]):
52 | example['target_size'] = self.target_size[::-1]
53 | example['target_size_feats'] = self.target_size_feats[::-1]
54 | else:
55 | k = 'original_labels' if 'original_labels' in example else 'image'
56 | example['target_size'] = example[k].shape[-2:]
57 | example['target_size_feats'] = tuple([s // self.stride for s in example[k].shape[-2:]])
58 | example['alphas'] = [-1]
59 | example['target_level'] = 0
60 | return example
61 |
62 |
63 | class Tensor:
64 | def _trans(self, img, dtype):
65 | img = np.array(img, dtype=dtype)
66 | if len(img.shape) == 3:
67 | img = np.ascontiguousarray(np.transpose(img, (2, 0, 1)))
68 | return torch.from_numpy(img)
69 |
70 | def __call__(self, example):
71 | ret_dict = {}
72 | for k in ['image', 'image_next', 'image_prev']:
73 | if k in example:
74 | ret_dict[k] = self._trans(example[k], np.float32)
75 | if 'depth' in example:
76 | ret_dict['depth'] = self._trans(example['depth'], np.uint8)
77 | if 'labels' in example:
78 | ret_dict['labels'] = self._trans(example['labels'], np.int64)
79 | if 'original_labels' in example:
80 | ret_dict['original_labels'] = self._trans(example['original_labels'], np.int64)
81 | if 'depth_hist' in example:
82 | ret_dict['depth_hist'] = [self._trans(d, np.float32) for d in example['depth_hist']] if isinstance(
83 | example['depth_hist'], list) else self._trans(example['depth_hist'], np.float32)
84 | if 'pyramid' in example:
85 | ret_dict['pyramid'] = [self._trans(p, np.float32) for p in example['pyramid']]
86 | if 'pyramid_ms' in example:
87 | ret_dict['pyramid_ms'] = [[self._trans(p, np.float32) for p in pyramids] for pyramids in
88 | example['pyramid_ms']]
89 | if 'mux_indices' in example:
90 | ret_dict['mux_indices'] = torch.stack([torch.from_numpy(midx.flatten()) for midx in example['mux_indices']])
91 | if 'mux_masks' in example:
92 | ret_dict['mux_masks'] = [torch.from_numpy(np.uint8(mi)).unsqueeze(0) for mi in example['mux_masks']]
93 | if 'depth_bins' in example:
94 | ret_dict['depth_bins'] = torch.stack([torch.from_numpy(b) for b in example['depth_bins']])
95 | if 'flow' in example:
96 | # ret_dict['flow'] = torch.from_numpy(example['flow']).permute(2, 0, 1).contiguous()
97 | ret_dict['flow'] = torch.from_numpy(np.ascontiguousarray(example['flow']))
98 | # if 'flow_next' in example:
99 | # ret_dict['flow_next'] = torch.from_numpy(example['flow_next']).permute(2, 0, 1 ).contiguous()
100 | if 'flow_sub' in example:
101 | # ret_dict['flow_sub'] = torch.from_numpy(example['flow_sub']).permute(2, 0, 1).contiguous()
102 | ret_dict['flow_sub'] = torch.from_numpy(np.ascontiguousarray(example['flow_sub']))
103 | if 'flipped' in example:
104 | del example['flipped']
105 | return {**example, **ret_dict}
106 |
107 |
108 | class Numpy:
109 | def __call__(self, example):
110 | image = example['image']
111 | axes = [0, 2, 3, 1] if len(image.shape) == 4 else [1, 2, 0]
112 | ret_dict = {
113 | 'image': image.numpy().transpose(axes)
114 | }
115 | for k in ['labels', 'original_labels']:
116 | if k in example and isinstance(example[k], torch.Tensor):
117 | ret_dict[k] = example[k].numpy()
118 | return {**example, **ret_dict}
119 |
120 |
121 | def detection_collate(batch):
122 | """Custom collate fn for dealing with batches of images that have a different
123 | number of associated object annotations (bounding boxes).
124 |
125 | Arguments:
126 | batch: (tuple) A tuple of tensor images and lists of annotations
127 |
128 | Return:
129 | A tuple containing:
130 | 1) (tensor) batch of images stacked on their 0 dim
131 | 2) (list of tensors) annotations for a given image are stacked on 0 dim
132 | """
133 | custom = defaultdict(list)
134 | custom_keys = ['target_size', ]
135 | for sample in batch:
136 | for k in custom_keys:
137 | custom[k] += [sample[k]]
138 | other = {k: default_collate([b[k] for b in batch]) for k in
139 | filter(lambda x: x not in custom, batch[0].keys())}
140 | return {**other, **custom}
141 |
142 |
143 | def custom_collate(batch, del_orig_labels=False):
144 | keys = ['target_size', 'target_size_feats', 'alphas', 'target_level']
145 | values = {}
146 | for k in keys:
147 | if k in batch[0]:
148 | values[k] = batch[0][k]
149 | for b in batch:
150 | if del_orig_labels: del b['original_labels']
151 | for k in values.keys():
152 | del b[k]
153 | if 'mux_indices' in b:
154 | b['mux_indices'] = b['mux_indices'].view(-1)
155 | batch = default_collate(batch)
156 | # if 'image_next' in batch:
157 | # batch['image'] = torch.cat([batch['image'], batch['image_next']], dim=0).contiguous()
158 | # del batch['image_next']
159 | for k, v in values.items():
160 | batch[k] = v
161 | return batch
162 |
--------------------------------------------------------------------------------
/data/transform/border.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | __all__ = ['LabelDistanceTransform', 'NeighborhoodLabels', 'InstanceBorders']
5 |
6 |
7 | class LabelDistanceTransform:
8 | def __init__(self, num_classes, bins=(4, 16, 64, 128), alphas=(8., 6., 4., 2., 1.), reduce=False,
9 | ignore_id=19):
10 | self.num_classes = num_classes
11 | self.reduce = reduce
12 | self.bins = bins
13 | self.alphas = alphas
14 | self.ignore_id = ignore_id
15 |
16 | def __call__(self, example):
17 | labels = np.array(example['labels'])
18 | present_classes = np.unique(labels)
19 | distances = np.zeros([self.num_classes] + list(labels.shape), dtype=np.float32) - 1.
20 | for i in range(self.num_classes):
21 | if i not in present_classes:
22 | continue
23 | class_mask = labels == i
24 | distances[i][class_mask] = cv2.distanceTransform(np.uint8(class_mask), cv2.DIST_L2, maskSize=5)[class_mask]
25 | if self.reduce:
26 | ignore_mask = labels == self.ignore_id
27 | distances[distances < 0] = 0
28 | distances = distances.sum(axis=0)
29 | label_distance_bins = np.digitize(distances, self.bins)
30 | label_distance_alphas = np.zeros(label_distance_bins.shape, dtype=np.float32)
31 | for idx, alpha in enumerate(self.alphas):
32 | label_distance_alphas[label_distance_bins == idx] = alpha
33 | label_distance_alphas[ignore_mask] = 0
34 | example['label_distance_alphas'] = label_distance_alphas
35 | else:
36 | example['label_distance_transform'] = distances
37 | return example
38 |
39 |
40 | class InstanceBorders:
41 | def __init__(self, instance_classes=8, thresh=.3):
42 | self.instance_classes = instance_classes
43 | self.thresh = thresh
44 |
45 | def __call__(self, example):
46 | shape = [self.instance_classes] + list(example['labels'].size)[::-1]
47 | instance_borders = np.zeros(shape, dtype=np.float32)
48 | instances = example['instances']
49 | for k in instances:
50 | for instance in instances[k]:
51 | dist_trans = cv2.distanceTransform(instance.astype(np.uint8), cv2.DIST_L2, maskSize=5)
52 | dist_trans[instance] = 1. / dist_trans[instance]
53 | dist_trans[dist_trans < self.thresh] = .0
54 | instance_borders[k] += dist_trans
55 | example['instance_borders'] = instance_borders
56 | return example
57 |
58 |
59 | class NeighborhoodLabels:
60 | def __init__(self, num_classes, k=3, stride=1, discrete=False):
61 | self.num_classes = num_classes
62 | self.k = k
63 | self.pad = k // 2
64 | self.stride = stride
65 | self.discrete = discrete
66 |
67 | def __call__(self, example):
68 | labels = np.array(example['labels'])
69 | p = self.pad
70 | labels_padded = self.num_classes * np.ones([1, 1] + [sh + 2 * p for sh in labels.shape], dtype=labels.dtype)
71 | labels_padded[..., p:-p, p:-p] = labels.copy()
72 | label_col = im2col_cython.im2col_cython(labels_padded, self.k, self.k, padding=0, stride=self.stride)
73 | label_col_hist = im2col_cython.hist_from_cols(label_col, self.num_classes).reshape(
74 | [self.num_classes + 1] + list(labels.shape))
75 | label_neighborhood_hist = label_col_hist / np.float32(self.k ** 2)
76 | if self.discrete:
77 | example['label_neighborhood_hist'] = (label_neighborhood_hist[:self.num_classes] > 0.).astype(np.float32)
78 | else:
79 | example['label_neighborhood_hist'] = label_neighborhood_hist
80 | return example
81 |
--------------------------------------------------------------------------------
/data/transform/class_uniform.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | from tqdm import tqdm
4 | import random
5 | from PIL import Image as pimg
6 | from collections import defaultdict
7 | import warnings
8 |
9 | from data.transform import RESAMPLE, RESAMPLE_D
10 | from data.util import bb_intersection_over_union, crop_and_scale_img
11 | from data.transform.flow_utils import crop_and_scale_flow
12 |
13 | __all__ = ['create_class_uniform_strategy', 'ClassUniformSquareCropAndScale']
14 |
15 |
16 | def create_class_uniform_strategy(instances, incidences, epochs=1):
17 | incidences = incidences[:-1] # remove ignore id
18 | num_images = len(instances)
19 | num_classes = incidences.shape[0]
20 | present_in_image = np.zeros((num_images, num_classes), dtype=np.uint32)
21 | image_names = np.array(list(instances.keys()))
22 |
23 | for i, (k, v) in enumerate(tqdm(instances.items(), total=len(instances))):
24 | for idx in v.keys():
25 | if idx >= num_classes:
26 | continue
27 | present_in_image[i, idx] += len(v[idx])
28 |
29 | class_incidence_histogram = incidences / incidences.sum()
30 | indices_by_occurence = np.argsort(class_incidence_histogram)
31 | p_r = class_incidence_histogram.sum() / class_incidence_histogram
32 | p_r[np.logical_or(np.isnan(p_r), np.isinf(p_r))] = 0.
33 | p_r /= p_r.sum()
34 | images_to_sample = np.round(num_images * p_r).astype(np.uint32)
35 |
36 | # weights = ((present_in_image > 0) * p_r.reshape(1, -1)).sum(-1)
37 | weights = (present_in_image * p_r.reshape(1, -1)).sum(-1)
38 |
39 | strategy = []
40 | for e in range(epochs):
41 | chosen_classes = {}
42 | chosen_class = num_classes * np.ones(num_images, dtype=np.uint32)
43 | is_image_chosen = np.zeros(num_images, dtype=np.bool)
44 | for idx in indices_by_occurence:
45 | possibilities = np.where(present_in_image[:, idx] > 0 & ~is_image_chosen)[0]
46 | to_sample = min(images_to_sample[idx], len(possibilities))
47 | chosen = np.random.choice(possibilities, to_sample)
48 | is_image_chosen[chosen] = 1
49 | chosen_class[chosen] = idx
50 | for n, c in zip(image_names, chosen_class):
51 | chosen_classes[n] = c
52 | strategy += [chosen_classes]
53 | statistics = defaultdict(int)
54 | for v in chosen_classes.values():
55 | statistics[v] += 1
56 | return strategy, weights
57 |
58 |
59 | class ClassUniformSquareCropAndScale:
60 | def __init__(self, wh, mean, ignore_id, strategy, class_instances, min=.5, max=2.,
61 | scale_method=lambda scale, wh, size: int(scale * wh), p_true_random_crop=.5):
62 | self.wh = wh
63 | self.min = min
64 | self.max = max
65 | self.mean = mean
66 | self.ignore_id = ignore_id
67 | self.random_gens = [self._rand_location, self._gen_instance_box]
68 | self.scale_method = scale_method
69 | self.strategy = strategy
70 | self.class_instances = class_instances
71 | self.p_true_random_crop = p_true_random_crop
72 |
73 | def _random_instance(self, name, epoch):
74 | instances = self.class_instances[name]
75 | chosen_class = self.strategy[epoch][name]
76 | if chosen_class == self.ignore_id:
77 | return None
78 | try:
79 | return random.choice(instances[chosen_class])
80 | except IndexError:
81 | return None
82 |
83 | def _gen_instance_box(self, W, H, target_wh, name, flipped, epoch):
84 | # warnings.warn(f'ClassUniformSquareCropAndScale, epoch {epoch}')
85 | bbox = self._random_instance(name, epoch)
86 | if bbox is not None:
87 | if not (random.uniform(0, 1) < self.p_true_random_crop):
88 | wmin, wmax, hmin, hmax = bbox
89 | if flipped:
90 | wmin, wmax = W - 1 - wmax, W - 1 - wmin
91 | inst_box = [wmin, hmin, wmax, hmax]
92 | for _ in range(50):
93 | box = self._rand_location(W, H, target_wh)
94 | if bb_intersection_over_union(box, inst_box) > 0.:
95 | break
96 | return box
97 | return self._rand_location(W, H, target_wh)
98 |
99 | def _rand_location(self, W, H, target_wh, *args, **kwargs):
100 | try:
101 | w = np.random.randint(0, W - target_wh + 1)
102 | h = np.random.randint(0, H - target_wh + 1)
103 | except ValueError:
104 | print(f'Exception in RandomSquareCropAndScale: {target_wh}')
105 | w = h = 0
106 | # left, upper, right, lower)
107 | return w, h, w + target_wh, h + target_wh
108 |
109 | def _trans(self, img: pimg, crop_box, target_size, pad_size, resample, blank_value):
110 | return crop_and_scale_img(img, crop_box, target_size, pad_size, resample, blank_value)
111 |
112 | def __call__(self, example):
113 | image = example['image']
114 | scale = np.random.uniform(self.min, self.max)
115 | W, H = image.size
116 | box_size = self.scale_method(scale, self.wh, image.size)
117 | pad_size = (max(box_size, W), max(box_size, H))
118 | target_size = (self.wh, self.wh)
119 | flipped = example['flipped'] if 'flipped' in example else False
120 | crop_box = self._gen_instance_box(pad_size[0], pad_size[1], box_size, example.get('name'), flipped,
121 | example.get('epoch', 0))
122 | ret_dict = {
123 | 'image': self._trans(image, crop_box, target_size, pad_size, RESAMPLE, self.mean),
124 | }
125 | if 'labels' in example:
126 | ret_dict['labels'] = self._trans(example['labels'], crop_box, target_size, pad_size, pimg.NEAREST,
127 | self.ignore_id)
128 | for k in ['image_prev', 'image_next']:
129 | if k in example:
130 | ret_dict[k] = self._trans(example[k], crop_box, target_size, pad_size, RESAMPLE,
131 | self.mean)
132 | if 'depth' in example:
133 | ret_dict['depth'] = self._trans(example['depth'], crop_box, target_size, pad_size, RESAMPLE_D, 0)
134 | if 'flow' in example:
135 | ret_dict['flow'] = crop_and_scale_flow(example['flow'], crop_box, target_size, pad_size, scale)
136 | return {**example, **ret_dict}
137 |
--------------------------------------------------------------------------------
/data/transform/flow.py:
--------------------------------------------------------------------------------
1 | from .flow_utils import subsample_flow
2 |
3 | __all__ = ['SubsampleFlow']
4 |
5 |
6 | class SubsampleFlow:
7 | def __init__(self, subsampling=4):
8 | self.subsampling = subsampling
9 |
10 | def __call__(self, example):
11 | example['flow_sub'] = subsample_flow(example['flow'], self.subsampling)
12 | return example
13 |
--------------------------------------------------------------------------------
/data/transform/flow_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | import cv2
4 | import numpy as np
5 | from PIL import Image as pimg
6 |
7 | from data.util import crop_and_scale_img
8 |
9 | '''
10 | Adapted from https://github.com/NVIDIA/flownet2-pytorch
11 | '''
12 |
13 |
14 | def readFlow(fn):
15 | """ Read .flo file in Middlebury format"""
16 | # Code adapted from:
17 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
18 |
19 | # WARNING: this will work on little-endian architectures (eg Intel x86) only!
20 | # print 'fn = %s'%(fn)
21 | with open(fn, 'rb') as f:
22 | magic = np.fromfile(f, np.float32, count=1)
23 | if 202021.25 != magic:
24 | print('Magic number incorrect. Invalid .flo file')
25 | return None
26 | else:
27 | w = np.fromfile(f, np.int32, count=1)
28 | h = np.fromfile(f, np.int32, count=1)
29 | # print 'Reading %d x %d flo file\n' % (w, h)
30 | data = np.fromfile(f, np.float32, count=2 * int(w) * int(h))
31 | # Reshape data into 3D array (columns, rows, bands)
32 | # The reshape here is for visualization, the original code is (w,h,2)
33 | return np.resize(data, (int(h), int(w), 2))
34 |
35 |
36 | def flow2rgb(flow):
37 | hsv = np.zeros(list(flow.shape[:-1]) + [3], dtype=np.uint8)
38 | hsv[..., 1] = 255
39 | mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
40 | hsv[..., 0] = ang * 180 / np.pi / 2
41 | hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
42 | return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
43 |
44 |
45 | def offset_flow(img, flow):
46 | '''
47 | :param img: torch.FloatTensor of shape NxCxHxW
48 | :param flow: torch.FloatTensor of shape NxHxWx2
49 | :return: torch.FloatTensor of shape NxCxHxW
50 | '''
51 | N, C, H, W = img.shape
52 | # generate identity sampling grid
53 | gx, gy = torch.meshgrid(torch.arange(H), torch.arange(W))
54 | gx = gx.float().div(gx.max() - 1).view(1, H, W, 1)
55 | gy = gy.float().div(gy.max() - 1).view(1, H, W, 1)
56 | grid = torch.cat([gy, gx], dim=-1).mul(2.).sub(1)
57 | # generate normalized flow field
58 | flown = flow.clone()
59 | flown[..., 0] /= W
60 | flown[..., 1] /= H
61 | # calculate offset field
62 | grid += flown
63 | return F.grid_sample(img, grid), grid
64 |
65 |
66 | def backward_warp(x, flo):
67 | """
68 | warp an image/tensor (im2) back to im1, according to the optical flow
69 | x: [B, C, H, W] (im2)
70 | flo: [B, 2, H, W] flow
71 | """
72 | B, C, H, W = x.size()
73 | # mesh grid
74 | xx = torch.arange(0, W).to(x.device).view(1, -1).repeat(H, 1)
75 | yy = torch.arange(0, H).to(x.device).view(-1, 1).repeat(1, W)
76 | xx = xx.view(1, 1, H, W).repeat(B, 1, 1, 1)
77 | yy = yy.view(1, 1, H, W).repeat(B, 1, 1, 1)
78 | grid = torch.cat((xx, yy), 1).float()
79 |
80 | vgrid = grid + flo
81 |
82 | # scale grid to [-1,1]
83 | vgrid[:, 0, :, :] = 2.0 * vgrid[:, 0, :, :].clone() / max(W - 1, 1) - 1.0
84 | vgrid[:, 1, :, :] = 2.0 * vgrid[:, 1, :, :].clone() / max(H - 1, 1) - 1.0
85 |
86 | vgrid = vgrid.permute(0, 2, 3, 1)
87 | output = F.grid_sample(x, vgrid)
88 |
89 | mask = torch.ones_like(x)
90 | mask = F.grid_sample(mask, vgrid)
91 |
92 | mask[mask < 0.9999] = 0
93 | mask[mask > 0] = 1
94 |
95 | return output * mask, mask > 0.
96 |
97 |
98 | def pad_flow(flow, size):
99 | h, w, _ = flow.shape
100 | shape = list(size) + [2]
101 | new_flow = np.zeros(shape, dtype=flow.dtype)
102 | new_flow[:h, :w] = flow
103 |
104 |
105 | def flip_flow_horizontal(flow):
106 | flow = np.flip(flow, axis=1)
107 | flow[..., 0] *= -1
108 | return flow
109 |
110 |
111 | def crop_and_scale_flow(flow, crop_box, target_size, pad_size, scale):
112 | def _trans(uv):
113 | return crop_and_scale_img(uv, crop_box, target_size, pad_size, resample=pimg.NEAREST, blank_value=0)
114 |
115 | u, v = [pimg.fromarray(uv.squeeze()) for uv in np.split(flow * scale, 2, axis=-1)]
116 | dtype = flow.dtype
117 | return np.stack([np.array(_trans(u), dtype=dtype), np.array(_trans(v), dtype=dtype)], axis=-1)
118 |
119 |
120 | def subsample_flow(flow, subsampling):
121 | dtype = flow.dtype
122 | u, v = [pimg.fromarray(uv.squeeze()) for uv in np.split(flow / subsampling, 2, axis=-1)]
123 | size = tuple([int(round(wh / subsampling)) for wh in u.size])
124 | u, v = u.resize(size), v.resize(size)
125 | return np.stack([np.array(u, dtype=dtype), np.array(v, dtype=dtype)], axis=-1)
126 |
--------------------------------------------------------------------------------
/data/transform/jitter.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import random
3 | from math import ceil
4 |
5 | import numpy as np
6 | import torch
7 | from PIL import Image as pimg
8 |
9 | from data.transform import RESAMPLE, RESAMPLE_D
10 | from data.transform.flow_utils import pad_flow, crop_and_scale_flow, flip_flow_horizontal
11 | from data.util import bb_intersection_over_union, crop_and_scale_img
12 |
13 | __all__ = ['Pad', 'PadToFactor', 'Normalize', 'Denormalize', 'DenormalizeTh', 'Resize', 'RandomFlip',
14 | 'RandomSquareCropAndScale', 'ResizeLongerSide', 'Downsample']
15 |
16 |
17 | class Pad:
18 | def __init__(self, size, ignore_id, mean):
19 | self.size = size
20 | self.ignore_id = ignore_id
21 | self.mean = mean
22 |
23 | def _do(self, data, color):
24 | blank = pimg.new(mode=data.mode, size=self.size, color=color)
25 | blank.paste(data)
26 | return blank
27 |
28 | def __call__(self, example):
29 | ret_dict = {}
30 | for k, c in zip(['image', 'labels', 'original_labels', 'image_next', 'image_prev'],
31 | [self.mean, self.ignore_id, self.ignore_id, self.mean, self.mean]):
32 | if k in example:
33 | ret_dict[k] = self._do(example[k], c)
34 | if 'flow' in example:
35 | ret_dict['flow'] = pad_flow(example['flow'], self.size)
36 | return {**example, **ret_dict}
37 |
38 |
39 | class PadToFactor:
40 | def __init__(self, factor, ignore_id, mean):
41 | self.factor = factor
42 | self.ignore_id = ignore_id
43 | self.mean = mean
44 |
45 | def _do(self, data, color, size):
46 | blank = pimg.new(mode=data.mode, size=size, color=color)
47 | blank.paste(data)
48 | return blank
49 |
50 | def __call__(self, example):
51 | ret_dict = {}
52 | size = tuple(map(lambda x: ceil(x / self.factor) * self.factor, example['image'].size))
53 | for k, c in zip(['image', 'labels', 'original_labels', 'image_next', 'image_prev'],
54 | [self.mean, self.ignore_id, self.ignore_id, self.mean]):
55 | if k in example:
56 | ret_dict[k] = self._do(example[k], c, size)
57 | if 'flow' in example:
58 | ret_dict['flow'] = pad_flow(example['flow'], size)
59 | return {**example, **ret_dict}
60 |
61 |
62 | class Norm:
63 | def __init__(self, scale, mean, std):
64 | self.scale = scale
65 | self.mean = mean
66 | self.std = std
67 |
68 | def _trans(self, img):
69 | raise NotImplementedError
70 |
71 | def __call__(self, example):
72 | ret_dict = {
73 | 'image': self._trans(example['image'])
74 | }
75 | for k in ['image_prev', 'image_next']:
76 | if k in example:
77 | ret_dict[k] = self._trans(example[k])
78 | if 'pyramid' in example:
79 | ret_dict['pyramid'] = [self._trans(p) for p in example['pyramid']]
80 | if 'pyramid_ms' in example:
81 | ret_dict['pyramid_ms'] = [[self._trans(p) for p in pyramid] for pyramid in example['pyramid_ms']]
82 | return {**example, **ret_dict}
83 |
84 |
85 | class Normalize(Norm):
86 | def _trans(self, img):
87 | img = np.array(img).astype(np.float32)
88 | if self.scale != 1:
89 | img /= self.scale
90 | img -= self.mean
91 | img /= self.std
92 | return img
93 |
94 |
95 | class Denormalize(Norm):
96 | def _trans(self, img):
97 | img = np.array(img)
98 | img *= self.std
99 | img += self.mean
100 | if self.scale != 1:
101 | img *= self.scale
102 | return img
103 |
104 |
105 | class DenormalizeTh(Norm):
106 | def __init__(self, scale, mean, std):
107 | super(DenormalizeTh, self).__init__(scale, mean, std)
108 | self.mean = torch.FloatTensor(mean).view(1, 3, 1, 1)
109 | self.std = torch.FloatTensor(std).view(1, 3, 1, 1)
110 |
111 | def _trans(self, img):
112 | img *= self.std
113 | img += self.mean
114 | if self.scale != 1:
115 | img *= self.scale
116 | return img
117 |
118 |
119 | class Downsample:
120 | def __init__(self, factor=2):
121 | self.factor = factor
122 |
123 | def __call__(self, example):
124 | if self.factor <= 1:
125 | return example
126 | W, H = example['image'].size
127 | w, h = W // self.factor, H // self.factor
128 | size = (w, h)
129 | ret_dict = {
130 | 'image': example['image'].resize(size, resample=RESAMPLE),
131 | 'labels': example['labels'].resize(size, resample=pimg.NEAREST),
132 | }
133 | if 'depth' in example:
134 | ret_dict['depth'] = example['depth'].resize(size, resample=RESAMPLE)
135 | return {**example, **ret_dict}
136 |
137 |
138 | class RandomSquareCropAndScale:
139 | def __init__(self, wh, mean, ignore_id, min=.5, max=2., class_incidence=None, class_instances=None,
140 | inst_classes=(3, 12, 14, 15, 16, 17, 18), scale_method=lambda scale, wh, size: int(scale * wh)):
141 | self.wh = wh
142 | self.min = min
143 | self.max = max
144 | self.mean = mean
145 | self.ignore_id = ignore_id
146 | self.random_gens = [self._rand_location]
147 | self.scale_method = scale_method
148 |
149 | if class_incidence is not None and class_instances is not None:
150 | self.true_random = False
151 | class_incidence_obj = np.load(class_incidence)
152 | with open(class_instances, 'rb') as f:
153 | self.class_instances = pickle.load(f)
154 | inst_classes = np.array(inst_classes)
155 | class_freq = class_incidence_obj[inst_classes].astype(np.float32)
156 | class_prob = 1. / (class_freq / class_freq.sum())
157 | class_prob /= class_prob.sum()
158 | self.p_class = {k.item(): v.item() for k, v in zip(inst_classes, class_prob)}
159 | self.random_gens += [self._gen_instance_box]
160 | print(f'Instance based random cropping:\n\t{self.p_class}')
161 |
162 | def _random_instance(self, name, W, H):
163 | def weighted_random_choice(choices):
164 | max = sum(choices)
165 | pick = random.uniform(0, max)
166 | key, current = 0, 0.
167 | for key, value in enumerate(choices):
168 | current += value
169 | if current > pick:
170 | return key
171 | key += 1
172 | return key
173 |
174 | instances = self.class_instances[name]
175 | possible_classes = list(set(self.p_class.keys()).intersection(instances.keys()))
176 | roulette = []
177 | flat_instances = []
178 | for c in possible_classes:
179 | flat_instances += instances[c]
180 | roulette += [self.p_class[c]] * len(instances[c])
181 | if len(flat_instances) == 0:
182 | return [0, W - 1, 0, H - 1]
183 | index = weighted_random_choice(roulette)
184 | return flat_instances[index]
185 |
186 | def _gen_instance_box(self, W, H, target_wh, name, flipped):
187 | wmin, wmax, hmin, hmax = self._random_instance(name, W, H)
188 | if flipped:
189 | wmin, wmax = W - 1 - wmax, W - 1 - wmin
190 | inst_box = [wmin, hmin, wmax, hmax]
191 | for _ in range(50):
192 | box = self._rand_location(W, H, target_wh)
193 | if bb_intersection_over_union(box, inst_box) > 0.:
194 | break
195 | return box
196 |
197 | def _rand_location(self, W, H, target_wh, *args, **kwargs):
198 | try:
199 | w = np.random.randint(0, W - target_wh + 1)
200 | h = np.random.randint(0, H - target_wh + 1)
201 | except ValueError:
202 | print(f'Exception in RandomSquareCropAndScale: {target_wh}')
203 | w = h = 0
204 | # left, upper, right, lower)
205 | return w, h, w + target_wh, h + target_wh
206 |
207 | def _trans(self, img: pimg, crop_box, target_size, pad_size, resample, blank_value):
208 | return crop_and_scale_img(img, crop_box, target_size, pad_size, resample, blank_value)
209 |
210 | def __call__(self, example):
211 | image = example['image']
212 | scale = np.random.uniform(self.min, self.max)
213 | W, H = image.size
214 | box_size = self.scale_method(scale, self.wh, image.size)
215 | pad_size = (max(box_size, W), max(box_size, H))
216 | target_size = (self.wh, self.wh)
217 | crop_fn = random.choice(self.random_gens)
218 | flipped = example['flipped'] if 'flipped' in example else False
219 | crop_box = crop_fn(pad_size[0], pad_size[1], box_size, example.get('name'), flipped)
220 | ret_dict = {
221 | 'image': self._trans(image, crop_box, target_size, pad_size, RESAMPLE, self.mean),
222 | }
223 | if 'labels' in example:
224 | ret_dict['labels'] = self._trans(example['labels'], crop_box, target_size, pad_size, pimg.NEAREST, self.ignore_id)
225 | for k in ['image_prev', 'image_next']:
226 | if k in example:
227 | ret_dict[k] = self._trans(example[k], crop_box, target_size, pad_size, RESAMPLE,
228 | self.mean)
229 | if 'depth' in example:
230 | ret_dict['depth'] = self._trans(example['depth'], crop_box, target_size, pad_size, RESAMPLE_D, 0)
231 | if 'flow' in example:
232 | ret_dict['flow'] = crop_and_scale_flow(example['flow'], crop_box, target_size, pad_size, scale)
233 | return {**example, **ret_dict}
234 |
235 |
236 | class RandomFlip:
237 | def _trans(self, img: pimg, flip: bool):
238 | return img.transpose(pimg.FLIP_LEFT_RIGHT) if flip else img
239 |
240 | def __call__(self, example):
241 | flip = np.random.choice([False, True])
242 | ret_dict = {}
243 | for k in ['image', 'image_next', 'image_prev', 'labels', 'depth']:
244 | if k in example:
245 | ret_dict[k] = self._trans(example[k], flip)
246 | if ('flow' in example) and flip:
247 | ret_dict['flow'] = flip_flow_horizontal(example['flow'])
248 | return {**example, **ret_dict}
249 |
250 |
251 | class Resize:
252 | def __init__(self, size):
253 | self.size = size
254 |
255 | def __call__(self, example):
256 | # raise NotImplementedError()
257 | ret_dict = {'image': example['image'].resize(self.size, resample=RESAMPLE)}
258 | if 'labels' in example:
259 | ret_dict['labels'] = example['labels'].resize(self.size, resample=pimg.NEAREST)
260 | if 'depth' in example:
261 | ret_dict['depth'] = example['depth'].resize(self.size, resample=RESAMPLE_D)
262 | return {**example, **ret_dict}
263 |
264 |
265 | class ResizeLongerSide:
266 | def __init__(self, size):
267 | self.size = size
268 |
269 | def __call__(self, example):
270 | ret_dict = {}
271 | k = 'image' if 'image' in example else 'labels'
272 | scale = self.size / max(example[k].size)
273 | size = tuple([int(wh * scale) for wh in example[k].size])
274 | if 'image' in example:
275 | ret_dict['image'] = example['image'].resize(size, resample=RESAMPLE)
276 | if 'labels' in example:
277 | ret_dict['labels'] = example['labels'].resize(size, resample=pimg.NEAREST)
278 | # if 'original_labels' in example:
279 | # ret_dict['original_labels'] = example['original_labels'].resize(size, resample=pimg.NEAREST)
280 | if 'depth' in example:
281 | ret_dict['depth'] = example['depth'].resize(size, resample=RESAMPLE_D)
282 | return {**example, **ret_dict}
283 |
--------------------------------------------------------------------------------
/data/transform/labels.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 |
3 | import numpy as np
4 | from PIL import Image as pimg
5 |
6 | __all__ = ['ExtractInstances', 'RemapLabels', 'ColorizeLabels']
7 |
8 |
9 | class ExtractInstances:
10 | def __init__(self, inst_map_to_id=None):
11 | self.inst_map_to_id = inst_map_to_id
12 |
13 | def __call__(self, example: dict):
14 | labels = np.int32(example['labels'])
15 | unique_ids = np.unique(labels)
16 | instances = defaultdict(list)
17 | for id in filter(lambda x: x > 1000, unique_ids):
18 | cls = self.inst_map_to_id.get(id // 1000, None)
19 | if cls is not None:
20 | instances[cls] += [labels == id]
21 | example['instances'] = instances
22 | return example
23 |
24 |
25 | class RemapLabels:
26 | def __init__(self, mapping: dict, ignore_id, total=35):
27 | self.mapping = np.ones((max(total, max(mapping.keys())) + 1,), dtype=np.uint8) * ignore_id
28 | self.ignore_id = ignore_id
29 | for i in range(len(self.mapping)):
30 | self.mapping[i] = mapping[i] if i in mapping else ignore_id
31 |
32 | def _trans(self, labels):
33 | max_k = self.mapping.shape[0] - 1
34 | labels[labels > max_k] //= 1000
35 | labels = self.mapping[labels].astype(labels.dtype)
36 | return labels
37 |
38 | def __call__(self, example):
39 | if not isinstance(example, dict):
40 | return self._trans(example)
41 | if 'labels' not in example:
42 | return example
43 | ret_dict = {'labels': pimg.fromarray(self._trans(np.array(example['labels'])))}
44 | if 'original_labels' in example:
45 | ret_dict['original_labels'] = pimg.fromarray(self._trans(np.array(example['original_labels'])))
46 | return {**example, **ret_dict}
47 |
48 |
49 | class ColorizeLabels:
50 | def __init__(self, color_info):
51 | self.color_info = np.array(color_info)
52 |
53 | def _trans(self, lab):
54 | R, G, B = [np.zeros_like(lab) for _ in range(3)]
55 | for l in np.unique(lab):
56 | mask = lab == l
57 | R[mask] = self.color_info[l][0]
58 | G[mask] = self.color_info[l][1]
59 | B[mask] = self.color_info[l][2]
60 | return np.stack((R, G, B), axis=-1).astype(np.uint8)
61 |
62 | def __call__(self, example):
63 | if not isinstance(example, dict):
64 | return self._trans(example)
65 | assert 'labels' in example
66 | return {**example, **{'labels': self._trans(example['labels']),
67 | 'original_labels': self._trans(example['original_labels'])}}
68 |
--------------------------------------------------------------------------------
/data/transform/photometric.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import cv2
4 | from PIL import Image as pimg
5 |
6 | __all__ = ['PhotometricDistort']
7 |
8 |
9 | class Compose(object):
10 |
11 | def __init__(self, transforms):
12 | self.transforms = transforms
13 |
14 | def __call__(self, img):
15 | for t in self.transforms:
16 | img = t(img)
17 | return img
18 |
19 |
20 | class RandomSaturation(object):
21 | def __init__(self, lower=0.5, upper=1.5):
22 | self.lower = lower
23 | self.upper = upper
24 | assert self.upper >= self.lower, "contrast upper must be >= lower."
25 | assert self.lower >= 0, "contrast lower must be non-negative."
26 |
27 | def __call__(self, image):
28 | if random.randint(0, 2):
29 | image[:, :, 1] *= random.uniform(self.lower, self.upper)
30 |
31 | return image
32 |
33 |
34 | class RandomHue(object):
35 | def __init__(self, delta=18.0):
36 | assert 0.0 <= delta <= 360.0
37 | self.delta = delta
38 |
39 | def __call__(self, image):
40 | if random.randint(0, 2):
41 | image[:, :, 0] += random.uniform(-self.delta, self.delta)
42 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
43 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
44 | return image
45 |
46 |
47 | class SwapChannels(object):
48 | """Transforms a tensorized image by swapping the channels in the order
49 | specified in the swap tuple.
50 | Args:
51 | swaps (int triple): final order of channels
52 | eg: (2, 1, 0)
53 | """
54 |
55 | def __init__(self, swaps):
56 | self.swaps = swaps
57 |
58 | def __call__(self, image):
59 | """
60 | Args:
61 | image (Tensor): image tensor to be transformed
62 | Return:
63 | a tensor with channels swapped according to swap
64 | """
65 | image = image[:, :, self.swaps]
66 | return image
67 |
68 |
69 | class RandomLightingNoise(object):
70 | def __init__(self):
71 | self.perms = ((0, 1, 2), (0, 2, 1),
72 | (1, 0, 2), (1, 2, 0),
73 | (2, 0, 1), (2, 1, 0))
74 |
75 | def __call__(self, image):
76 | if random.randint(0, 2):
77 | swap = self.perms[random.randint(0, len(self.perms) - 1)]
78 | shuffle = SwapChannels(swap) # shuffle channels
79 | image = shuffle(image)
80 | return image
81 |
82 |
83 | class ConvertColor(object):
84 | def __init__(self, current='BGR', transform='HSV'):
85 | self.transform = transform
86 | self.current = current
87 |
88 | def __call__(self, image):
89 | if self.current == 'BGR' and self.transform == 'HSV':
90 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
91 | elif self.current == 'HSV' and self.transform == 'BGR':
92 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
93 | else:
94 | raise NotImplementedError
95 | return image
96 |
97 |
98 | class RandomContrast(object):
99 | def __init__(self, lower=0.5, upper=1.5):
100 | self.lower = lower
101 | self.upper = upper
102 | assert self.upper >= self.lower, "contrast upper must be >= lower."
103 | assert self.lower >= 0, "contrast lower must be non-negative."
104 |
105 | # expects float image
106 | def __call__(self, image):
107 | if random.randint(0, 2):
108 | alpha = random.uniform(self.lower, self.upper)
109 | image *= alpha
110 | return image
111 |
112 |
113 | class RandomBrightness(object):
114 | def __init__(self, delta=32):
115 | assert delta >= 0.0
116 | assert delta <= 255.0
117 | self.delta = delta
118 |
119 | def __call__(self, image):
120 | if random.randint(0, 2):
121 | delta = random.uniform(-self.delta, self.delta)
122 | image += delta
123 | return image
124 |
125 |
126 | class PhotometricDistort(object):
127 | def __init__(self):
128 | self.pd = [
129 | RandomContrast(),
130 | ConvertColor(transform='HSV'),
131 | RandomSaturation(),
132 | RandomHue(),
133 | ConvertColor(current='HSV', transform='BGR'),
134 | RandomContrast()
135 | ]
136 | self.rand_brightness = RandomBrightness()
137 | self.rand_light_noise = RandomLightingNoise()
138 |
139 | def __call__(self, example):
140 | image = np.float32(example['image'])
141 | im = image.copy()
142 | im = self.rand_brightness(im)
143 | if random.randint(0, 2):
144 | distort = Compose(self.pd[:-1])
145 | else:
146 | distort = Compose(self.pd[1:])
147 | im = distort(im)
148 | im = self.rand_light_noise(im)
149 | ret = {
150 | 'image': pimg.fromarray(np.uint8(im)),
151 | }
152 | return {**example, **ret}
153 |
--------------------------------------------------------------------------------
/data/util.py:
--------------------------------------------------------------------------------
1 | import random
2 | from torch.utils.data import Dataset
3 | import torch
4 | import numpy as np
5 | import pickle
6 | from collections import defaultdict
7 | from PIL import Image as pimg
8 |
9 |
10 | def disparity_distribution_uniform(max_disp, num_bins):
11 | return np.linspace(0, max_disp, num_bins - 1)
12 |
13 |
14 | def disparity_distribution_log(num_bins):
15 | return np.power(np.sqrt(2), np.arange(num_bins - 1))
16 |
17 |
18 | def downsample_distribution(labels, factor, num_classes):
19 | h, w = labels.shape
20 | assert h % factor == 0 and w % factor == 0
21 | new_h = h // factor
22 | new_w = w // factor
23 | labels_4d = np.ascontiguousarray(labels.reshape(new_h, factor, new_w, factor), labels.dtype)
24 | labels_oh = np.eye(num_classes, dtype=np.float32)[labels_4d]
25 | target_dist = labels_oh.sum((1, 3)) / factor ** 2
26 | return target_dist
27 |
28 |
29 | def downsample_distribution_th(labels, factor, num_classes, ignore_id=None):
30 | n, h, w = labels.shape
31 | assert h % factor == 0 and w % factor == 0
32 | new_h = h // factor
33 | new_w = w // factor
34 | labels_4d = labels.view(n, new_h, factor, new_w, factor)
35 | labels_oh = torch.eye(num_classes).to(labels_4d.device)[labels_4d]
36 | target_dist = labels_oh.sum(2).sum(3) / factor ** 2
37 | return target_dist
38 |
39 |
40 | def downsample_labels_th(labels, factor, num_classes):
41 | '''
42 | :param labels: Tensor(N, H, W)
43 | :param factor: int
44 | :param num_classes: int
45 | :return: FloatTensor(-1, num_classes), ByteTensor(-1, 1)
46 | '''
47 | n, h, w = labels.shape
48 | assert h % factor == 0 and w % factor == 0
49 | new_h = h // factor
50 | new_w = w // factor
51 | labels_4d = labels.view(n, new_h, factor, new_w, factor)
52 | # +1 class here because ignore id = num_classes
53 | labels_oh = torch.eye(num_classes + 1).to(labels_4d.device)[labels_4d]
54 | target_dist = labels_oh.sum(2).sum(3) / factor ** 2
55 | C = target_dist.shape[-1]
56 | target_dist = target_dist.view(-1, C)
57 | # keep only boxes which have p(ignore) < 0.5
58 | valid_mask = target_dist[:, -1] < 0.5
59 | target_dist = target_dist[:, :-1].contiguous()
60 | dist_sum = target_dist.sum(1, keepdim=True)
61 | # avoid division by zero
62 | dist_sum[dist_sum == 0] = 1
63 | # renormalize distribution after removing p(ignore)
64 | target_dist /= dist_sum
65 | return target_dist, valid_mask
66 |
67 |
68 | def equalize_hist_disparity_distribution(d, L):
69 | cd = np.cumsum(d / d.sum())
70 | Y = np.round((L - 1) * cd).astype(np.uint8)
71 | return np.array([np.argmax(Y == i) for i in range(L - 1)])
72 |
73 |
74 | def bb_intersection_over_union(boxA, boxB):
75 | # determine the (x, y)-coordinates of the intersection rectangle
76 | xA = max(boxA[0], boxB[0])
77 | yA = max(boxA[1], boxB[1])
78 | xB = min(boxA[2], boxB[2])
79 | yB = min(boxA[3], boxB[3])
80 |
81 | # compute the area of intersection rectangle
82 | interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
83 |
84 | # compute the area of both the prediction and ground-truth
85 | # rectangles
86 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
87 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
88 |
89 | # compute the intersection over union by taking the intersection
90 | # area and dividing it by the sum of prediction + ground-truth
91 | # areas - the interesection area
92 | iou = interArea / float(boxAArea + boxBArea - interArea)
93 |
94 | # return the intersection over union value
95 | return iou
96 |
97 |
98 | def one_hot_encoding(labels, C):
99 | '''
100 | Converts an integer label torch.autograd.Variable to a one-hot Variable.
101 |
102 | Parameters
103 | ----------
104 | labels : torch.autograd.Variable of torch.cuda.LongTensor
105 | N x 1 x H x W, where N is batch size.
106 | Each value is an integer representing correct classification.
107 | C : integer.
108 | number of classes in labels.
109 |
110 | Returns
111 | -------
112 | target : torch.autograd.Variable of torch.cuda.FloatTensor
113 | N x C x H x W, where C is class number. One-hot encoded.
114 | '''
115 | one_hot = torch.FloatTensor(labels.size(0), C, labels.size(2), labels.size(3)).to(labels.device).zero_()
116 | target = one_hot.scatter_(1, labels.data, 1)
117 |
118 | return target
119 |
120 |
121 | def crop_and_scale_img(img: pimg, crop_box, target_size, pad_size, resample, blank_value):
122 | target = pimg.new(img.mode, pad_size, color=blank_value)
123 | target.paste(img)
124 | res = target.crop(crop_box).resize(target_size, resample=resample)
125 | return res
126 |
--------------------------------------------------------------------------------
/data/vistas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orsic/swiftnet/2b88990e1ab674e8ef7cb533a1d8d49ef34ac93d/data/vistas/__init__.py
--------------------------------------------------------------------------------
/data/vistas/vistas.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 | from pathlib import Path
3 |
4 | class_info = ['animal--bird', 'animal--ground-animal', 'construction--barrier--curb', 'construction--barrier--fence',
5 | 'construction--barrier--guard-rail', 'construction--barrier--other-barrier',
6 | 'construction--barrier--wall', 'construction--flat--bike-lane', 'construction--flat--crosswalk-plain',
7 | 'construction--flat--curb-cut', 'construction--flat--parking', 'construction--flat--pedestrian-area',
8 | 'construction--flat--rail-track', 'construction--flat--road', 'construction--flat--service-lane',
9 | 'construction--flat--sidewalk', 'construction--structure--bridge', 'construction--structure--building',
10 | 'construction--structure--tunnel', 'human--person', 'human--rider--bicyclist',
11 | 'human--rider--motorcyclist', 'human--rider--other-rider', 'marking--crosswalk-zebra', 'marking--general',
12 | 'nature--mountain', 'nature--sand', 'nature--sky', 'nature--snow', 'nature--terrain',
13 | 'nature--vegetation', 'nature--water', 'object--banner', 'object--bench', 'object--bike-rack',
14 | 'object--billboard', 'object--catch-basin', 'object--cctv-camera', 'object--fire-hydrant',
15 | 'object--junction-box', 'object--mailbox', 'object--manhole', 'object--phone-booth', 'object--pothole',
16 | 'object--street-light', 'object--support--pole', 'object--support--traffic-sign-frame',
17 | 'object--support--utility-pole', 'object--traffic-light', 'object--traffic-sign--back',
18 | 'object--traffic-sign--front', 'object--trash-can', 'object--vehicle--bicycle', 'object--vehicle--boat',
19 | 'object--vehicle--bus', 'object--vehicle--car', 'object--vehicle--caravan', 'object--vehicle--motorcycle',
20 | 'object--vehicle--on-rails', 'object--vehicle--other-vehicle', 'object--vehicle--trailer',
21 | 'object--vehicle--truck', 'object--vehicle--wheeled-slow', 'void--car-mount', 'void--ego-vehicle',
22 | 'void--unlabeled']
23 | color_info = [[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], [180, 165, 180], [102, 102, 156],
24 | [102, 102, 156], [128, 64, 255], [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
25 | [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], [150, 100, 100], [70, 70, 70],
26 | [150, 120, 90], [220, 20, 60], [255, 0, 0], [255, 0, 0], [255, 0, 0], [200, 128, 128], [255, 255, 255],
27 | [64, 170, 64], [128, 64, 64], [70, 130, 180], [255, 255, 255], [152, 251, 152], [107, 142, 35],
28 | [0, 170, 30], [255, 255, 128], [250, 0, 30], [0, 0, 0], [220, 220, 220], [170, 170, 170], [222, 40, 40],
29 | [100, 170, 30], [40, 40, 40], [33, 33, 33], [170, 170, 170], [0, 0, 142], [170, 170, 170],
30 | [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 142], [250, 170, 30], [192, 192, 192],
31 | [220, 220, 0], [180, 165, 180], [119, 11, 32], [0, 0, 142], [0, 60, 100], [0, 0, 142], [0, 0, 90],
32 | [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], [0, 0, 70], [0, 0, 192], [32, 32, 32], [0, 0, 0],
33 | [0, 0, 0]]
34 |
35 |
36 | class Vistas(Dataset):
37 | class_info = class_info
38 | color_info = color_info
39 | num_classes = 63
40 |
41 | def __init__(self, root: Path, transforms: lambda x: x, subset='training', open_images=True, epoch=None):
42 | self.root = root
43 | self.open_images = open_images
44 | self.images_dir = root / subset / 'images'
45 | self.labels_dir = root / subset / 'labels'
46 |
47 | self.images = list(sorted(self.images_dir.glob('*.jpg')))
48 | self.labels = list(sorted(self.labels_dir.glob('*.png')))
49 |
50 | self.transforms = transforms
51 | self.subset = subset
52 | self.epoch = epoch
53 |
54 | print(f'Num images: {len(self)}')
55 |
56 | def __len__(self):
57 | return len(self.images)
58 |
59 | def __getitem__(self, item):
60 | ret_dict = {
61 | 'name': self.images[item].stem,
62 | 'subset': self.subset,
63 | 'labels': self.labels[item]
64 | }
65 | if self.open_images:
66 | ret_dict['image'] = self.images[item]
67 | if self.epoch is not None:
68 | ret_dict['epoch'] = int(self.epoch.value)
69 | return self.transforms(ret_dict)
70 |
--------------------------------------------------------------------------------
/datasets/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orsic/swiftnet/2b88990e1ab674e8ef7cb533a1d8d49ef34ac93d/datasets/.gitkeep
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 | import importlib.util
4 | from evaluation import evaluate_semseg
5 |
6 |
7 | def import_module(path):
8 | spec = importlib.util.spec_from_file_location("module", path)
9 | module = importlib.util.module_from_spec(spec)
10 | spec.loader.exec_module(module)
11 | return module
12 |
13 |
14 | parser = argparse.ArgumentParser(description='Detector train')
15 | parser.add_argument('config', type=str, help='Path to configuration .py file')
16 | parser.add_argument('--profile', dest='profile', action='store_true', help='Profile one forward pass')
17 |
18 | if __name__ == '__main__':
19 | args = parser.parse_args()
20 | conf_path = Path(args.config)
21 | conf = import_module(args.config)
22 |
23 | class_info = conf.dataset_val.class_info
24 |
25 | model = conf.model.cuda()
26 |
27 | for loader, name in conf.eval_loaders:
28 | iou, per_class_iou = evaluate_semseg(model, loader, class_info, observers=conf.eval_observers)
29 | print(f'{name}: {iou:.2f}')
30 |
--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluate import *
2 | from .prediction import *
3 |
--------------------------------------------------------------------------------
/evaluation/evaluate.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 |
3 | import numpy as np
4 | import torch
5 | from tqdm import tqdm
6 | from time import perf_counter
7 |
8 | import lib.cylib as cylib
9 |
10 | __all__ = ['compute_errors', 'get_pred', 'evaluate_semseg']
11 |
12 |
13 | def compute_errors(conf_mat, class_info, verbose=True):
14 | num_correct = conf_mat.trace()
15 | num_classes = conf_mat.shape[0]
16 | total_size = conf_mat.sum()
17 | avg_pixel_acc = num_correct / total_size * 100.0
18 | TPFP = conf_mat.sum(1)
19 | TPFN = conf_mat.sum(0)
20 | FN = TPFN - conf_mat.diagonal()
21 | FP = TPFP - conf_mat.diagonal()
22 | class_iou = np.zeros(num_classes)
23 | class_recall = np.zeros(num_classes)
24 | class_precision = np.zeros(num_classes)
25 | per_class_iou = []
26 | if verbose:
27 | print('Errors:')
28 | for i in range(num_classes):
29 | TP = conf_mat[i, i]
30 | class_iou[i] = (TP / (TP + FP[i] + FN[i])) * 100.0
31 | if TPFN[i] > 0:
32 | class_recall[i] = (TP / TPFN[i]) * 100.0
33 | else:
34 | class_recall[i] = 0
35 | if TPFP[i] > 0:
36 | class_precision[i] = (TP / TPFP[i]) * 100.0
37 | else:
38 | class_precision[i] = 0
39 |
40 | class_name = class_info[i]
41 | per_class_iou += [(class_name, class_iou[i])]
42 | if verbose:
43 | print('\t%s IoU accuracy = %.2f %%' % (class_name, class_iou[i]))
44 | avg_class_iou = class_iou.mean()
45 | avg_class_recall = class_recall.mean()
46 | avg_class_precision = class_precision.mean()
47 | if verbose:
48 | print('IoU mean class accuracy -> TP / (TP+FN+FP) = %.2f %%' % avg_class_iou)
49 | print('mean class recall -> TP / (TP+FN) = %.2f %%' % avg_class_recall)
50 | print('mean class precision -> TP / (TP+FP) = %.2f %%' % avg_class_precision)
51 | print('pixel accuracy = %.2f %%' % avg_pixel_acc)
52 | return avg_pixel_acc, avg_class_iou, avg_class_recall, avg_class_precision, total_size, per_class_iou
53 |
54 |
55 | def get_pred(logits, labels, conf_mat):
56 | _, pred = torch.max(logits.data, dim=1)
57 | pred = pred.byte().cpu()
58 | pred = pred.numpy().astype(np.int32)
59 | true = labels.numpy().astype(np.int32)
60 | cylib.collect_confusion_matrix(pred.reshape(-1), true.reshape(-1), conf_mat)
61 |
62 |
63 | def mt(sync=False):
64 | if sync:
65 | torch.cuda.synchronize()
66 | return 1000 * perf_counter()
67 |
68 |
69 | def evaluate_semseg(model, data_loader, class_info, observers=()):
70 | model.eval()
71 | managers = [torch.no_grad()] + list(observers)
72 | with contextlib.ExitStack() as stack:
73 | for ctx_mgr in managers:
74 | stack.enter_context(ctx_mgr)
75 | conf_mat = np.zeros((model.num_classes, model.num_classes), dtype=np.uint64)
76 | for step, batch in tqdm(enumerate(data_loader), total=len(data_loader)):
77 | batch['original_labels'] = batch['original_labels'].numpy().astype(np.uint32)
78 | logits, additional = model.do_forward(batch, batch['original_labels'].shape[1:3])
79 | pred = torch.argmax(logits.data, dim=1).byte().cpu().numpy().astype(np.uint32)
80 | for o in observers:
81 | o(pred, batch, additional)
82 | cylib.collect_confusion_matrix(pred.flatten(), batch['original_labels'].flatten(), conf_mat)
83 | print('')
84 | pixel_acc, iou_acc, recall, precision, _, per_class_iou = compute_errors(conf_mat, class_info, verbose=True)
85 | model.train()
86 | return iou_acc, per_class_iou
87 |
--------------------------------------------------------------------------------
/evaluation/prediction.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image as pimg
3 |
4 | __all__ = ['StorePreds', 'StoreSubmissionPreds']
5 |
6 |
7 | class StorePreds:
8 | def __init__(self, store_dir, to_img, to_color):
9 | self.store_dir = store_dir
10 | self.to_img = to_img
11 | self.to_color = to_color
12 |
13 | def __enter__(self):
14 | return self
15 |
16 | def __exit__(self, exc_type, exc_val, exc_tb):
17 | pass
18 |
19 | def __str__(self):
20 | return ''
21 |
22 | def __call__(self, pred, batch, additional):
23 | b = self.to_img(batch)
24 | for p, im, gt, name, subset in zip(pred, b['image'], b['original_labels'], b['name'], b['subset']):
25 | store_img = np.concatenate([i.astype(np.uint8) for i in [im, self.to_color(p), gt]], axis=0)
26 | store_img = pimg.fromarray(store_img)
27 | store_img.thumbnail((960, 1344))
28 | store_img.save(f'{self.store_dir}/{subset}/{name}.jpg')
29 |
30 | class StoreSubmissionPreds:
31 | def __init__(self, store_dir, remap, to_color=None, store_dir_color=None):
32 | self.store_dir = store_dir
33 | self.store_dir_color = store_dir_color
34 | self.to_color = to_color
35 | self.remap = remap
36 |
37 | def __enter__(self):
38 | return self
39 |
40 | def __exit__(self, exc_type, exc_val, exc_tb):
41 | pass
42 |
43 | def __str__(self):
44 | return ''
45 |
46 | def __call__(self, pred, batch, additional):
47 | for p, name in zip(pred.astype(np.uint8), batch['name']):
48 | pimg.fromarray(self.remap(p)).save(f'{self.store_dir}/{name}.png')
49 | pimg.fromarray(self.to_color(p)).save(f'{self.store_dir_color}/{name}.png')
--------------------------------------------------------------------------------
/lib/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | rm cylib.so
3 |
4 | cython -a cylib.pyx -o cylib.cc
5 |
6 | #g++ -shared -pthread -fPIC -fwrapv -O3 -Wall -fno-strict-aliasing \
7 | #-I/usr/lib/python3.7/site-packages/numpy/core/include -I/usr/include/python3.7m -o cylib.so cylib.cc
8 | g++ -shared -pthread -fPIC -fwrapv -O3 -Wall -fno-strict-aliasing \
9 | -I/usr/lib/python3.8/site-packages/numpy/core/include -I/usr/include/python3.8 -o cylib.so cylib.cc
10 |
--------------------------------------------------------------------------------
/lib/cylib.h:
--------------------------------------------------------------------------------
1 | #include