├── .github └── GracoNet.png ├── LICENSE ├── README.md ├── eval ├── __init__.py ├── fid_resize299.py ├── fid_score.py ├── inception.py ├── lpips_1dir.py ├── resnet_4ch.py ├── simopa_acc.py ├── simopa_cfg.py ├── simopa_dst.py └── simopa_net.py ├── faster-rcnn ├── README.md ├── _init_paths.py ├── cfgs │ └── res101.yml ├── convert_data.py ├── data │ └── genome │ │ └── 1600-400-20 │ │ └── objects_vocab.txt ├── generate_tsv.py ├── lib │ ├── datasets │ │ ├── VOCdevkit-matlab-wrapper │ │ │ ├── get_voc_opts.m │ │ │ ├── voc_eval.m │ │ │ └── xVOCap.m │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── ds_utils.py │ │ ├── factory.py │ │ ├── imagenet.py │ │ ├── imdb.py │ │ ├── pascal_voc.py │ │ ├── pascal_voc_rbg.py │ │ ├── tools │ │ │ └── mcg_munge.py │ │ ├── vg.py │ │ ├── vg_eval.py │ │ └── voc_eval.py │ ├── model │ │ ├── __init__.py │ │ ├── csrc │ │ │ ├── ROIAlign.h │ │ │ ├── ROIPool.h │ │ │ ├── cpu │ │ │ │ ├── ROIAlign_cpu.cpp │ │ │ │ ├── nms_cpu.cpp │ │ │ │ └── vision.h │ │ │ ├── cuda │ │ │ │ ├── ROIAlign_cuda.cu │ │ │ │ ├── ROIPool_cuda.cu │ │ │ │ ├── nms.cu │ │ │ │ └── vision.h │ │ │ ├── nms.h │ │ │ └── vision.cpp │ │ ├── faster_rcnn │ │ │ ├── __init__.py │ │ │ ├── faster_rcnn.py │ │ │ ├── resnet.py │ │ │ └── vgg16.py │ │ ├── nms │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── nms │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── make.sh │ │ │ ├── nms_cpu.py │ │ │ ├── nms_gpu.py │ │ │ ├── nms_kernel.cu │ │ │ ├── nms_wrapper.py │ │ │ └── src │ │ │ │ ├── nms_cuda.h │ │ │ │ ├── nms_cuda_kernel.cu │ │ │ │ └── nms_cuda_kernel.h │ │ ├── roi_align │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ └── src │ │ │ │ ├── roi_align.c │ │ │ │ ├── roi_align.h │ │ │ │ ├── roi_align_cuda.c │ │ │ │ ├── roi_align_cuda.h │ │ │ │ ├── roi_align_kernel.cu │ │ │ │ └── roi_align_kernel.h │ │ ├── roi_crop │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── _crop_resize.so │ │ │ │ └── roi_crop │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ └── src │ │ │ │ ├── roi_crop.c │ │ │ │ ├── roi_crop.h │ │ │ │ ├── roi_crop_cuda.c │ │ │ │ ├── roi_crop_cuda.h │ │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ │ └── roi_crop_cuda_kernel.h │ │ ├── roi_layers │ │ │ ├── __init__.py │ │ │ ├── nms.py │ │ │ ├── roi_align.py │ │ │ └── roi_pool.py │ │ ├── roi_pooling │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pooling │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ └── src │ │ │ │ ├── roi_pooling.c │ │ │ │ ├── roi_pooling.h │ │ │ │ ├── roi_pooling_cuda.c │ │ │ │ ├── roi_pooling_cuda.h │ │ │ │ ├── roi_pooling_kernel.cu │ │ │ │ └── roi_pooling_kernel.h │ │ ├── rpn │ │ │ ├── __init__.py │ │ │ ├── anchor_target_layer.py │ │ │ ├── bbox_transform.py │ │ │ ├── generate_anchors.py │ │ │ ├── proposal_layer.py │ │ │ ├── proposal_target_layer_cascade.py │ │ │ └── rpn.py │ │ └── utils │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── bbox.pyx │ │ │ ├── blob.py │ │ │ ├── config.py │ │ │ ├── logger.py │ │ │ └── net_utils.py │ ├── roi_data_layer │ │ ├── __init__.py │ │ ├── minibatch.py │ │ ├── roibatchLoader.py │ │ └── roidb.py │ ├── setup.py │ └── utils │ │ └── timer.py ├── models │ └── README.md └── object_150_list.txt ├── infer.py ├── infer_placenet.py ├── infer_terse.py ├── loader ├── __init__.py ├── base.py ├── datasets.py └── utils.py ├── main.py ├── main_placenet.py ├── main_terse.py ├── model.py ├── model_placenet.py ├── model_terse.py ├── network.py ├── network_placenet.py ├── network_terse.py ├── requirements.txt ├── result └── README.md ├── script ├── eval_acc.sh ├── eval_fid.sh └── eval_lpips.sh └── tool ├── __init__.py ├── preprocess.py ├── summarize.py └── utils.py /.github/GracoNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/.github/GracoNet.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 BCMI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/eval/__init__.py -------------------------------------------------------------------------------- /eval/fid_resize299.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | from tqdm import tqdm 4 | from PIL import Image 5 | from torchvision import transforms 6 | from torchvision.transforms import InterpolationMode 7 | import os 8 | 9 | 10 | if __name__ == '__main__': 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 13 | parser.add_argument("--epoch", type=int, required=True, help="epoch for evaluation") 14 | parser.add_argument("--eval_type", type=str, default="eval", help="evaluation type") 15 | opt = parser.parse_args() 16 | 17 | data_dir = os.path.join('result', opt.expid, opt.eval_type, str(opt.epoch)) 18 | assert (os.path.exists(data_dir)) 19 | if not os.path.exists(os.path.join(data_dir, 'images299')): 20 | os.mkdir(os.path.join(data_dir, 'images299')) 21 | csv_file = os.path.join(data_dir, '{}.csv'.format(opt.eval_type)) 22 | csv_data = csv.DictReader(open(csv_file, 'r')) 23 | for i, row in tqdm(enumerate(csv_data)): 24 | img_src = os.path.join(data_dir, row['img_path']) 25 | img_tar = os.path.join(data_dir, 'images299', row['img_path'].split('/')[-1]) 26 | comp_img = Image.open(img_src).convert('RGB') 27 | comp_img_299 = transforms.Resize((299, 299), interpolation=InterpolationMode.BILINEAR)(comp_img) 28 | comp_img_299.save(img_tar) 29 | -------------------------------------------------------------------------------- /eval/lpips_1dir.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | from tqdm import tqdm 4 | import os 5 | import numpy as np 6 | import torch 7 | import lpips 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 11 | parser.add_argument('-d','--dir', type=str, default='./imgs/ex_dir') 12 | parser.add_argument('-v','--version', type=str, default='0.1') 13 | parser.add_argument('--use_gpu', action='store_true', help='turn on flag to use GPU') 14 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 15 | parser.add_argument("--epoch", type=int, required=True, help="epoch for evaluation") 16 | parser.add_argument("--eval_type", type=str, default="evaluni", help="evaluation type") 17 | parser.add_argument("--repeat", type=int, default=10, help="repeat count for sampling z") 18 | opt = parser.parse_args() 19 | 20 | assert (opt.repeat > 1) 21 | data_dir = os.path.join('result', opt.expid, opt.eval_type, str(opt.epoch)) 22 | assert (os.path.exists(data_dir)) 23 | 24 | # initialize the model 25 | loss_fn = lpips.LPIPS(net='alex', version=opt.version) 26 | if (opt.use_gpu): 27 | loss_fn.cuda() 28 | 29 | # crawl directory 30 | files_list = list(sorted(os.listdir(opt.dir))) 31 | files_dict = {} 32 | for filename in files_list: 33 | index = filename.split('_')[0] 34 | if index in files_dict: 35 | files_dict[index].append(filename) 36 | else: 37 | files_dict[index] = [filename] 38 | total = len(files_dict) 39 | 40 | # stores distances 41 | dist_all = {} 42 | for i, index in enumerate(tqdm(files_dict, total=total)): 43 | dist_all[index] = [] 44 | files = files_dict[index] 45 | for ff, file0 in enumerate(files[:-1]): 46 | img0 = lpips.im2tensor(lpips.load_image(os.path.join(opt.dir, file0))) # RGB image from [-1,1] 47 | if (opt.use_gpu): 48 | img0 = img0.cuda() 49 | for file1 in files[ff+1:]: 50 | img1 = lpips.im2tensor(lpips.load_image(os.path.join(opt.dir, file1))) 51 | if (opt.use_gpu): 52 | img1 = img1.cuda() 53 | # compute distance 54 | with torch.no_grad(): 55 | dist01 = loss_fn.forward(img0, img1).squeeze().cpu().item() 56 | dist_all[index].append(dist01) 57 | 58 | # calculate results 59 | dist_res = np.zeros((total, 2), dtype=np.float32) 60 | for i, index in enumerate(dist_all): 61 | dists = dist_all[index] 62 | dist_res[i,0] = np.mean(np.array(dists)) # avg of dists for index 63 | dist_res[i,1] = np.std(np.array(dists))/np.sqrt(len(dists)) # stderr of dists for index 64 | 65 | dist_avg = np.mean(dist_res[:,0]) 66 | dist_stderr = np.mean(dist_res[:,1]) 67 | print(" - LPIPS (Variety): dist = {:.3f}, stderr = {:.6f}".format(dist_avg, dist_stderr)) 68 | mark = 'a' if os.path.exists(os.path.join(data_dir, "{}_lpips_variety.txt".format(opt.eval_type))) else 'w' 69 | with open(os.path.join(data_dir, "{}_lpips_variety.txt".format(opt.eval_type)), mark) as f: 70 | f.write("{}\n".format(datetime.datetime.now())) 71 | f.write(" - LPIPS (Variety): dist = {:.3f}, stderr = {:.6f}\n".format(dist_avg, dist_stderr)) 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /eval/simopa_acc.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import os 4 | import torch 5 | from tqdm import tqdm 6 | import numpy as np 7 | import torch 8 | 9 | from simopa_cfg import opt 10 | from simopa_dst import ImageDataset 11 | from simopa_net import ObjectPlaceNet 12 | 13 | 14 | def evaluate(args): 15 | # modify configs 16 | opt.dataset_path = os.path.join('result', args.expid, args.eval_type, str(args.epoch)) 17 | assert (os.path.exists(opt.dataset_path)) 18 | opt.img_path = opt.dataset_path 19 | opt.mask_path = opt.dataset_path 20 | opt.test_data_path = os.path.join(opt.dataset_path, '{}.csv'.format(args.eval_type)) 21 | opt.test_box_dic_path = os.path.join(opt.dataset_path, '{}_bboxes.npy'.format(args.eval_type)) 22 | opt.test_reference_feature_path = os.path.join(opt.dataset_path, '{}_feats.npy'.format(args.eval_type)) 23 | opt.test_target_feature_path = os.path.join(opt.dataset_path, '{}_fgfeats.npy'.format(args.eval_type)) 24 | 25 | opt.relation_method = 5 26 | opt.attention_method = 2 27 | opt.refer_num = 5 28 | opt.attention_head = 16 29 | opt.without_mask = False 30 | opt.without_global_feature = False 31 | 32 | net = ObjectPlaceNet(backbone_pretrained=False) 33 | 34 | checkpoint_path = args.checkpoint 35 | print('load pretrained weights from ', checkpoint_path) 36 | net.load_state_dict(torch.load(checkpoint_path)) 37 | net = net.cuda().eval() 38 | 39 | total = 0 40 | pred_labels = [] 41 | sample_ids = [] 42 | 43 | testset = ImageDataset(istrain=False) 44 | test_loader = torch.utils.data.DataLoader(testset, batch_size=128, 45 | shuffle=False, num_workers=2, 46 | drop_last=False, pin_memory=True) 47 | 48 | with torch.no_grad(): 49 | for batch_index, (sample_id, img_cat, label, target_box, refer_box, target_feats, refer_feats, target_mask, refer_mask, tar_class, w, h) in enumerate( 50 | tqdm(test_loader)): 51 | img_cat, label, target_box, refer_box, target_mask, refer_mask, w, h = img_cat.cuda(), label.cuda(), target_box.cuda( 52 | ), refer_box.cuda(), target_mask.cuda(), refer_mask.cuda(), w.cuda(), h.cuda() 53 | target_feats, refer_feats = target_feats.cuda(), refer_feats.cuda() 54 | logits, weights = net(img_cat, target_box, refer_box, target_feats, refer_feats, target_mask, refer_mask, w, h) 55 | pred_labels.extend(logits.max(1)[1].cpu().numpy()) 56 | total += label.size(0) 57 | sample_ids.extend(list(sample_id)) 58 | 59 | pred_acc = (np.array(pred_labels, dtype=np.int32) == 1).sum() / len(pred_labels) 60 | print(" - Accuracy = {:.3f}".format(pred_acc)) 61 | mark = 'a' if os.path.exists(os.path.join(opt.dataset_path, "{}_acc.txt".format(args.eval_type))) else 'w' 62 | with open(os.path.join(opt.dataset_path, "{}_acc.txt".format(args.eval_type)), mark) as f: 63 | f.write("{}\n".format(datetime.datetime.now())) 64 | f.write(" - Accuracy = {:.3f}\n".format(pred_acc)) 65 | 66 | 67 | if __name__ == '__main__': 68 | parser = argparse.ArgumentParser() 69 | parser.add_argument("--checkpoint", type=str, required=True, help="path to loaded checkpoint") 70 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 71 | parser.add_argument("--epoch", type=int, required=True, help="epoch for evaluation") 72 | parser.add_argument("--eval_type", type=str, default="eval", help="evaluation type") 73 | args = parser.parse_args() 74 | assert os.path.exists(args.checkpoint) 75 | evaluate(args) 76 | -------------------------------------------------------------------------------- /eval/simopa_cfg.py: -------------------------------------------------------------------------------- 1 | class Config(object): 2 | ## Path 3 | pretrained_model_path = None 4 | dataset_path = None 5 | img_path = None 6 | mask_path = None 7 | 8 | # * train 9 | train_data_path = None 10 | box_dic_path = None 11 | depth_feats_path = None 12 | train_reference_feature_path = None 13 | train_target_feature_path = None 14 | 15 | # * test 16 | test_data_path = None 17 | test_box_dic_path = None 18 | test_reference_feature_path = None 19 | test_target_feature_path = None 20 | 21 | ## Loader 22 | img_size = 256 23 | binary_mask_size = 64 24 | 25 | # * train 26 | num_workers = 4 27 | batch_size = 64 28 | base_lr = 1e-4 29 | lr_milestones = [10, 16] 30 | lr_gamma = 0.1 31 | epochs = 20 32 | eval_freq = 1 33 | save_freq = 5 34 | display_freq = 10 35 | 36 | ## Network 37 | class_num = 2 38 | geometric_feature_dim = 256 39 | roi_align_size = 3 40 | global_feature_size = 8 41 | attention_dim_head = 64 42 | 43 | # * reference head 44 | backbone = 'resnet18' 45 | relation_method = None 46 | attention_method = None 47 | refer_num = None 48 | attention_head = None 49 | without_mask = None 50 | without_global_feature = None 51 | 52 | opt = Config() 53 | -------------------------------------------------------------------------------- /eval/simopa_dst.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import csv 6 | import torch 7 | import torchvision.transforms as transforms 8 | from torch.utils.data import Dataset 9 | 10 | from simopa_cfg import opt 11 | 12 | 13 | class ImageDataset(Dataset): 14 | def __init__(self, istrain=True): 15 | self.istrain = istrain 16 | 17 | with open(opt.train_data_path if istrain else opt.test_data_path, "r") as f: 18 | reader = csv.reader(f) 19 | reader = list(reader) 20 | title = reader[0] 21 | annid_index = title.index('annID') 22 | scid_index = title.index('scID') 23 | category_index = title.index('catnm') 24 | label_index = title.index('label') 25 | image_path_index = title.index('img_path') 26 | mask_path_index = title.index('msk_path') 27 | target_box_index = title.index('bbox') 28 | 29 | self.sample_ids = [] 30 | self.labels = [] 31 | self.images_path = [] 32 | self.mask_path = [] 33 | self.target_box = [] 34 | self.dic_name = [] 35 | self.target_class = [] 36 | 37 | for row in reader[1:]: 38 | category = row[category_index] 39 | label = int(row[label_index]) 40 | image_path = row[image_path_index] 41 | mask_path = row[mask_path_index] 42 | target_box = eval(row[target_box_index]) 43 | sample_id = "{}_{}_{}_{}_{}_{}".format(row[annid_index], row[scid_index], target_box[0], target_box[1], target_box[2], target_box[3]) 44 | self.sample_ids.append(sample_id) 45 | self.labels.append(label) 46 | self.images_path.append(os.path.join(opt.img_path, image_path)) 47 | self.mask_path.append(os.path.join(opt.mask_path, mask_path)) 48 | self.target_box.append(target_box) 49 | self.dic_name.append(image_path) 50 | self.target_class.append(category) 51 | 52 | self.img_transform = transforms.Compose([ 53 | transforms.Resize((opt.img_size, opt.img_size)), 54 | transforms.ToTensor() 55 | ]) 56 | self.mask_transform = transforms.Compose([ 57 | transforms.Resize((opt.img_size, opt.img_size)), 58 | transforms.ToTensor() 59 | ]) 60 | self.transforms_flip = transforms.Compose([ 61 | transforms.RandomHorizontalFlip(p=1) 62 | ]) 63 | 64 | # reference box and depth feature 65 | if istrain: 66 | self.refer_box_dic = np.load(opt.box_dic_path, allow_pickle=True) 67 | self.depth_feats_path = opt.depth_feats_path 68 | self.target_features = np.load(opt.train_target_feature_path, allow_pickle=True) 69 | self.refer_features = np.load(opt.train_reference_feature_path) 70 | else: 71 | self.refer_box_dic = np.load(opt.test_box_dic_path, allow_pickle=True) 72 | self.target_features = np.load(opt.test_target_feature_path, allow_pickle=True) 73 | self.refer_features = np.load(opt.test_reference_feature_path) 74 | 75 | def __getitem__(self, index): 76 | img = Image.open(self.images_path[index]).convert('RGB') 77 | w = img.width 78 | h = img.height 79 | img = self.img_transform(img) 80 | 81 | mask = Image.open(self.mask_path[index]).convert('L') 82 | mask = self.img_transform(mask) 83 | 84 | is_flip = False 85 | if self.istrain and np.random.uniform() < 0.5: 86 | img = self.transforms_flip(img) 87 | mask = self.transforms_flip(mask) 88 | is_flip = True 89 | img_mask = torch.cat([img, mask], dim=0) 90 | 91 | label = self.labels[index] 92 | target_box = self.target_box[index] 93 | x1, y1, bw, bh = target_box 94 | x2, y2 = x1 + bw, y1 + bh 95 | if is_flip: 96 | x1 = w - x1 97 | x2 = w - x2 98 | x1, x2 = x2, x1 99 | target_box = torch.tensor([x1, y1, x2, y2]) 100 | 101 | refer_box = self.refer_box_dic[index] 102 | refer_score = refer_box[:, -1] 103 | refer_keep = np.argsort(refer_score)[::-1][:opt.refer_num] 104 | refer_box = refer_box[refer_keep] 105 | refer_box = torch.from_numpy(refer_box) 106 | 107 | refer_feats = self.refer_features[index][refer_keep] 108 | target_feats = self.target_features[index] 109 | 110 | if is_flip: 111 | x1, y1, x2, y2 = refer_box[:, 0], refer_box[:, 1], refer_box[:, 2], refer_box[:, 3] 112 | x1 = w - x1 113 | x2 = w - x2 114 | x1, x2 = x2, x1 115 | refer_box = torch.cat([x1[:, None], y1[:, None], x2[:, None], y2[:, None], 116 | refer_box[:, 4:5], refer_box[:, 5:]], dim=1) 117 | 118 | # produce binary mask for target/reference boxes 119 | bm_size = opt.binary_mask_size 120 | scale_x1 = (target_box[0] / w * bm_size).int() 121 | scale_y1 = (target_box[1] / h * bm_size).int() 122 | scale_x2 = (target_box[2] / w * bm_size).int() 123 | scale_y2 = (target_box[3] / h * bm_size).int() 124 | 125 | target_mask = torch.zeros(1, bm_size, bm_size, dtype=img.dtype) 126 | target_mask[0, scale_y1: scale_y2, scale_x1: scale_x2] = 1 127 | 128 | refer_mask = torch.zeros(opt.refer_num, bm_size, bm_size, dtype=target_mask.dtype) 129 | scale_x1 = (refer_box[:, 0] / w * bm_size).int() 130 | scale_y1 = (refer_box[:, 1] / h * bm_size).int() 131 | scale_x2 = (refer_box[:, 2] / w * bm_size).int() 132 | scale_y2 = (refer_box[:, 3] / h * bm_size).int() 133 | 134 | for i in range(opt.refer_num): 135 | refer_mask[i, scale_y1[i]: scale_y2[i], scale_x1[i]: scale_x2[i]] = 1 136 | tar_class = self.target_class[index] 137 | 138 | sample_id = self.sample_ids[index] 139 | return sample_id, img_mask, label, target_box, refer_box, target_feats, refer_feats, target_mask, refer_mask, tar_class, w, h 140 | 141 | def __len__(self): 142 | return len(self.labels) 143 | -------------------------------------------------------------------------------- /faster-rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with model pretrained on Visual Genome 2 | 3 | This directory is adapted and modified from [Faster-RCNN-VG](https://github.com/shilrley6/Faster-R-CNN-with-model-pretrained-on-Visual-Genome). Click the link to see more details. 4 | 5 | Please download the faster-rcnn model pretrained on visual genome from [google drive](https://drive.google.com/file/d/18n_3V1rywgeADZ3oONO0DsuuS9eMW6sN/view) (provided by [Faster-RCNN-VG](https://github.com/shilrley6/Faster-R-CNN-with-model-pretrained-on-Visual-Genome)) to ```./models/faster_rcnn_res101_vg.pth```. 6 | -------------------------------------------------------------------------------- /faster-rcnn/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /faster-rcnn/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | LEARNING_RATE: 0.001 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_SIZE: 7 17 | POOLING_MODE: align 18 | CROP_RESIZE_WITH_MAX_POOL: False 19 | -------------------------------------------------------------------------------- /faster-rcnn/convert_data.py: -------------------------------------------------------------------------------- 1 | """Convert image features from bottom up attention to numpy array""" 2 | 3 | # Example 4 | # python convert_data.py --expid ${expid}$ --epoch ${epoch}$ 5 | 6 | import os 7 | import base64 8 | import csv 9 | import sys 10 | import argparse 11 | import numpy as np 12 | 13 | csv.field_size_limit(sys.maxsize) 14 | 15 | 16 | FIELDNAMES = ['image_id', 'image_name', 'image_w', 'image_h', 'num_boxes', 'boxes', 'pred_scores', 'features', 'fg_feature'] 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 20 | parser.add_argument("--epoch", type=int, required=True, help="epoch for evaluation") 21 | parser.add_argument("--eval_type", type=str, default="eval", help="evaluation type") 22 | args = parser.parse_args() 23 | 24 | dataset_dir = os.path.join('../result', args.expid, args.eval_type, str(args.epoch)) 25 | csv_file = os.path.join(dataset_dir, '{}.csv'.format(args.eval_type)) 26 | assert (os.path.exists(csv_file)) 27 | csv_data = csv.DictReader(open(csv_file, 'r')) 28 | meta, bboxes, scores, features, fg_feature = [], {}, {}, {}, {} 29 | for i, row in enumerate(csv_data): 30 | meta.append(i) 31 | bboxes[i] = None 32 | scores[i] = None 33 | features[i] = None 34 | fg_feature[i] = None 35 | 36 | input_file = os.path.join(dataset_dir, "{}_roiinfos.csv".format(args.eval_type)) 37 | assert (os.path.exists(input_file)) 38 | with open(input_file, "r+") as tsv_in_file: 39 | reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES) 40 | for item in reader: 41 | item['image_id'] = int(item['image_id']) 42 | item['image_h'] = int(item['image_h']) 43 | item['image_w'] = int(item['image_w']) 44 | item['num_boxes'] = int(item['num_boxes']) 45 | for field in ['boxes', 'pred_scores', 'features']: 46 | data = item[field] 47 | buf = base64.b64decode(data[1:]) 48 | temp = np.frombuffer(buf, dtype=np.float32) 49 | item[field] = temp.reshape((item['num_boxes'], -1)) 50 | for field in ['fg_feature']: 51 | data = item[field] 52 | buf = base64.b64decode(data[1:]) 53 | temp = np.frombuffer(buf, dtype=np.float32) 54 | item[field] = temp.reshape((1, -1)) 55 | idx = np.argsort(-item['boxes'][:, 5]) 56 | item['boxes'] = item['boxes'][idx, :] 57 | item['pred_scores'] = item['pred_scores'][idx, :] 58 | item['features'] = item['features'][idx, :] 59 | 60 | if item['image_id'] in bboxes: 61 | bboxes[item['image_id']] = item['boxes'] 62 | scores[item['image_id']] = item['pred_scores'] 63 | features[item['image_id']] = item['features'] 64 | fg_feature[item['image_id']] = item['fg_feature'] 65 | 66 | output_dict = { 67 | "bboxes": bboxes, 68 | "scores": scores, 69 | "feats": features, 70 | "fgfeats": fg_feature 71 | } 72 | for k, v in output_dict.items(): 73 | output_file = os.path.join(dataset_dir, "{}_{}.npy".format(args.eval_type, k)) 74 | data_out = np.stack([v[sid] for sid in meta], axis=0) 75 | np.save(output_file, data_out) 76 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | from datasets.coco import coco 16 | from datasets.imagenet import imagenet 17 | from datasets.vg import vg 18 | 19 | import numpy as np 20 | 21 | # Set up voc__ 22 | for year in ['2007', '2012']: 23 | for split in ['train', 'val', 'trainval', 'test']: 24 | name = 'voc_{}_{}'.format(year, split) 25 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 26 | 27 | # Set up coco_2014_ 28 | for year in ['2014']: 29 | for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']: 30 | name = 'coco_{}_{}'.format(year, split) 31 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 32 | 33 | # Set up coco_2014_cap_ 34 | for year in ['2014']: 35 | for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']: 36 | name = 'coco_{}_{}'.format(year, split) 37 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 38 | 39 | # Set up coco_2015_ 40 | for year in ['2015']: 41 | for split in ['test', 'test-dev']: 42 | name = 'coco_{}_{}'.format(year, split) 43 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 44 | 45 | # Set up vg_ 46 | # for version in ['1600-400-20']: 47 | # for split in ['minitrain', 'train', 'minival', 'val', 'test']: 48 | # name = 'vg_{}_{}'.format(version,split) 49 | # __sets[name] = (lambda split=split, version=version: vg(version, split)) 50 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']: 51 | for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']: 52 | name = 'vg_{}_{}'.format(version,split) 53 | __sets[name] = (lambda split=split, version=version: vg(version, split)) 54 | 55 | # set up image net. 56 | for split in ['train', 'val', 'val1', 'val2', 'test']: 57 | name = 'imagenet_{}'.format(split) 58 | devkit_path = 'data/imagenet/ILSVRC/devkit' 59 | data_path = 'data/imagenet/ILSVRC' 60 | __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path)) 61 | 62 | def get_imdb(name): 63 | """Get an imdb (image database) by name.""" 64 | if name not in __sets: 65 | raise KeyError('Unknown dataset: {}'.format(name)) 66 | return __sets[name]() 67 | 68 | 69 | def list_imdbs(): 70 | """List all registered imdbs.""" 71 | return list(__sets.keys()) 72 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /faster-rcnn/lib/datasets/vg_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Fast/er R-CNN 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Bharath Hariharan 6 | # -------------------------------------------------------- 7 | 8 | import xml.etree.ElementTree as ET 9 | import os 10 | import numpy as np 11 | from .voc_eval import voc_ap 12 | 13 | def vg_eval( detpath, 14 | gt_roidb, 15 | image_index, 16 | classindex, 17 | ovthresh=0.5, 18 | use_07_metric=False, 19 | eval_attributes=False): 20 | """rec, prec, ap, sorted_scores, npos = voc_eval( 21 | detpath, 22 | gt_roidb, 23 | image_index, 24 | classindex, 25 | [ovthresh], 26 | [use_07_metric]) 27 | 28 | Top level function that does the Visual Genome evaluation. 29 | 30 | detpath: Path to detections 31 | gt_roidb: List of ground truth structs. 32 | image_index: List of image ids. 33 | classindex: Category index 34 | [ovthresh]: Overlap threshold (default = 0.5) 35 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 36 | (default False) 37 | """ 38 | # extract gt objects for this class 39 | class_recs = {} 40 | npos = 0 41 | for item,imagename in zip(gt_roidb,image_index): 42 | if eval_attributes: 43 | bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :] 44 | else: 45 | bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :] 46 | difficult = np.zeros((bbox.shape[0],)).astype(np.bool) 47 | det = [False] * bbox.shape[0] 48 | npos = npos + sum(~difficult) 49 | class_recs[str(imagename)] = {'bbox': bbox, 50 | 'difficult': difficult, 51 | 'det': det} 52 | if npos == 0: 53 | # No ground truth examples 54 | return 0,0,0,0,npos 55 | 56 | # read dets 57 | with open(detpath, 'r') as f: 58 | lines = f.readlines() 59 | if len(lines) == 0: 60 | # No detection examples 61 | return 0,0,0,0,npos 62 | 63 | splitlines = [x.strip().split(' ') for x in lines] 64 | image_ids = [x[0] for x in splitlines] 65 | confidence = np.array([float(x[1]) for x in splitlines]) 66 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 67 | 68 | # sort by confidence 69 | sorted_ind = np.argsort(-confidence) 70 | sorted_scores = -np.sort(-confidence) 71 | BB = BB[sorted_ind, :] 72 | image_ids = [image_ids[x] for x in sorted_ind] 73 | 74 | # go down dets and mark TPs and FPs 75 | nd = len(image_ids) 76 | tp = np.zeros(nd) 77 | fp = np.zeros(nd) 78 | for d in range(nd): 79 | R = class_recs[image_ids[d]] 80 | bb = BB[d, :].astype(float) 81 | ovmax = -np.inf 82 | BBGT = R['bbox'].astype(float) 83 | 84 | if BBGT.size > 0: 85 | # compute overlaps 86 | # intersection 87 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 88 | iymin = np.maximum(BBGT[:, 1], bb[1]) 89 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 90 | iymax = np.minimum(BBGT[:, 3], bb[3]) 91 | iw = np.maximum(ixmax - ixmin + 1., 0.) 92 | ih = np.maximum(iymax - iymin + 1., 0.) 93 | inters = iw * ih 94 | 95 | # union 96 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 97 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 98 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 99 | 100 | overlaps = inters / uni 101 | ovmax = np.max(overlaps) 102 | jmax = np.argmax(overlaps) 103 | 104 | if ovmax > ovthresh: 105 | if not R['difficult'][jmax]: 106 | if not R['det'][jmax]: 107 | tp[d] = 1. 108 | R['det'][jmax] = 1 109 | else: 110 | fp[d] = 1. 111 | else: 112 | fp[d] = 1. 113 | 114 | # compute precision recall 115 | fp = np.cumsum(fp) 116 | tp = np.cumsum(tp) 117 | rec = tp / float(npos) 118 | # avoid divide by zero in case the first detection matches a difficult 119 | # ground truth 120 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 121 | ap = voc_ap(rec, prec, use_07_metric) 122 | 123 | return rec, prec, ap, sorted_scores, npos 124 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 19 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({ 128 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 129 | order_t.device(), keep.scalar_type()) 130 | }).sort(0, false)); 131 | } 132 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 14 | const at::Tensor& rois, 15 | const float spatial_scale, 16 | const int pooled_height, 17 | const int pooled_width, 18 | const int batch_size, 19 | const int channels, 20 | const int height, 21 | const int width, 22 | const int sampling_ratio); 23 | 24 | 25 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 26 | const at::Tensor& rois, 27 | const float spatial_scale, 28 | const int pooled_height, 29 | const int pooled_width); 30 | 31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 32 | const at::Tensor& input, 33 | const at::Tensor& rois, 34 | const at::Tensor& argmax, 35 | const float spatial_scale, 36 | const int pooled_height, 37 | const int pooled_width, 38 | const int batch_size, 39 | const int channels, 40 | const int height, 41 | const int width); 42 | 43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 44 | 45 | 46 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 47 | const int height, 48 | const int width); 49 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | 6 | 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 8 | m.def("nms", &nms, "non-maximum suppression"); 9 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 10 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 11 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 12 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 13 | } 14 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/faster_rcnn/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | import torchvision.models as models 7 | from torch.autograd import Variable 8 | import numpy as np 9 | from model.utils.config import cfg 10 | from model.rpn.rpn import _RPN 11 | 12 | from model.roi_layers import ROIAlign, ROIPool 13 | 14 | # from model.roi_pooling.modules.roi_pool import _RoIPooling 15 | # from model.roi_align.modules.roi_align import RoIAlignAvg 16 | 17 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer 18 | import time 19 | import pdb 20 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta 21 | 22 | from PIL import Image 23 | 24 | 25 | class _fasterRCNN(nn.Module): 26 | """ faster RCNN """ 27 | def __init__(self, classes, class_agnostic): 28 | super(_fasterRCNN, self).__init__() 29 | self.classes = classes 30 | self.n_classes = len(classes) 31 | self.class_agnostic = class_agnostic 32 | # loss 33 | self.RCNN_loss_cls = 0 34 | self.RCNN_loss_bbox = 0 35 | 36 | # define rpn 37 | self.RCNN_rpn = _RPN(self.dout_base_model) 38 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) 39 | 40 | # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) 41 | # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) 42 | 43 | self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) 44 | self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) 45 | 46 | def forward(self, im_data, im_info, gt_boxes, num_boxes, pool_feat=False, fgroi=None): 47 | batch_size = im_data.size(0) 48 | 49 | im_info = im_info.data 50 | gt_boxes = gt_boxes.data 51 | num_boxes = num_boxes.data 52 | 53 | # feed image data to base model to obtain base feature map 54 | base_feat = self.RCNN_base(im_data) 55 | 56 | # feed base feature map tp RPN to obtain rois 57 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) 58 | 59 | ####将前景的feature加入进去 60 | if fgroi is not None: 61 | fg_roi = torch.zeros(rois.size(0), 1, 5) 62 | if cfg.USE_GPU_NMS: 63 | fg_roi = fg_roi.cuda() 64 | for ii in range(rois.size(0)): 65 | fg_roi[ii, :, 0] = rois[ii, 0, 0] 66 | fg_roi[ii, :, 1:5] = torch.tensor(fgroi[ii]).float() 67 | # print(fgroi[ii]) 68 | # print(fg_roi) 69 | rois = torch.cat((rois, fg_roi), dim=1) 70 | # print(rois.shape) 71 | # print(rois) 72 | 73 | 74 | # if it is training phrase, then use ground trubut bboxes for refining 75 | if self.training: 76 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) 77 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data 78 | 79 | rois_label = Variable(rois_label.view(-1).long()) 80 | rois_target = Variable(rois_target.view(-1, rois_target.size(2))) 81 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) 82 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) 83 | else: 84 | rois_label = None 85 | rois_target = None 86 | rois_inside_ws = None 87 | rois_outside_ws = None 88 | rpn_loss_cls = 0 89 | rpn_loss_bbox = 0 90 | 91 | rois = Variable(rois) 92 | # do roi pooling based on predicted rois 93 | 94 | if cfg.POOLING_MODE == 'align': 95 | pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) 96 | elif cfg.POOLING_MODE == 'pool': 97 | pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) 98 | 99 | # feed pooled features to top model 100 | pooled_feat = self._head_to_tail(pooled_feat) 101 | 102 | if fgroi is not None: 103 | fg_pooled_feat = pooled_feat[-1, :] 104 | pooled_feat = pooled_feat[:-1, :] 105 | rois = rois[:, :-1] 106 | 107 | # compute bbox offset 108 | bbox_pred = self.RCNN_bbox_pred(pooled_feat) 109 | if self.training and not self.class_agnostic: 110 | # select the corresponding columns according to roi labels 111 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) 112 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) 113 | bbox_pred = bbox_pred_select.squeeze(1) 114 | 115 | # compute object classification probability 116 | cls_score = self.RCNN_cls_score(pooled_feat) 117 | cls_prob = F.softmax(cls_score, 1) 118 | 119 | RCNN_loss_cls = 0 120 | RCNN_loss_bbox = 0 121 | 122 | if self.training: 123 | # classification loss 124 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) 125 | 126 | # bounding box regression L1 loss 127 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) 128 | 129 | 130 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1) 131 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) 132 | 133 | if pool_feat and fgroi is not None: 134 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, pooled_feat, fg_pooled_feat 135 | if pool_feat: 136 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, pooled_feat 137 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label 138 | 139 | def _init_weights(self): 140 | def normal_init(m, mean, stddev, truncated=False): 141 | """ 142 | weight initalizer: truncated normal and random normal. 143 | """ 144 | # x is a parameter 145 | if truncated: 146 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation 147 | else: 148 | m.weight.data.normal_(mean, stddev) 149 | m.bias.data.zero_() 150 | 151 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED) 152 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 153 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED) 154 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 155 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED) 156 | 157 | def create_architecture(self): 158 | self._init_modules() 159 | self._init_weights() 160 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/faster_rcnn/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | import math 15 | import torchvision.models as models 16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 17 | import pdb 18 | 19 | class vgg16(_fasterRCNN): 20 | def __init__(self, classes, pretrained=False, class_agnostic=False): 21 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth' 22 | self.dout_base_model = 512 23 | self.pretrained = pretrained 24 | self.class_agnostic = class_agnostic 25 | 26 | _fasterRCNN.__init__(self, classes, class_agnostic) 27 | 28 | def _init_modules(self): 29 | vgg = models.vgg16() 30 | if self.pretrained: 31 | print("Loading pretrained weights from %s" %(self.model_path)) 32 | state_dict = torch.load(self.model_path) 33 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()}) 34 | 35 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) 36 | 37 | # not using the last maxpool layer 38 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 39 | 40 | # Fix the layers before conv3: 41 | for layer in range(10): 42 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False 43 | 44 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model) 45 | 46 | self.RCNN_top = vgg.classifier 47 | 48 | # not using the last maxpool layer 49 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes) 50 | 51 | if self.class_agnostic: 52 | self.RCNN_bbox_pred = nn.Linear(4096, 4) 53 | else: 54 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) 55 | 56 | def _head_to_tail(self, pool5): 57 | 58 | pool5_flat = pool5.view(pool5.size(0), -1) 59 | fc7 = self.RCNN_top(pool5_flat) 60 | 61 | return fc7 62 | 63 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/nms_cpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import torch 5 | 6 | def nms_cpu(dets, thresh): 7 | dets = dets.numpy() 8 | x1 = dets[:, 0] 9 | y1 = dets[:, 1] 10 | x2 = dets[:, 2] 11 | y2 = dets[:, 3] 12 | scores = dets[:, 4] 13 | 14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | i = order.item(0) 20 | keep.append(i) 21 | xx1 = np.maximum(x1[i], x1[order[1:]]) 22 | yy1 = np.maximum(y1[i], y1[order[1:]]) 23 | xx2 = np.maximum(x2[i], x2[order[1:]]) 24 | yy2 = np.maximum(y2[i], y2[order[1:]]) 25 | 26 | w = np.maximum(0.0, xx2 - xx1 + 1) 27 | h = np.maximum(0.0, yy2 - yy1 + 1) 28 | inter = w * h 29 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 30 | 31 | inds = np.where(ovr <= thresh)[0] 32 | order = order[inds + 1] 33 | 34 | return torch.IntTensor(keep) 35 | 36 | 37 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from model.utils.config import cfg 9 | if torch.cuda.is_available(): 10 | from model.nms.nms_gpu import nms_gpu 11 | from model.nms.nms_cpu import nms_cpu 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | if dets.shape[0] == 0: 16 | return [] 17 | # ---numpy version--- 18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # ---pytorch version--- 20 | 21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh) 22 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/src/nms_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "nms_cuda_kernel.h" 13 | 14 | #define CUDA_WARN(XXX) \ 15 | do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \ 16 | cudaGetErrorString(XXX) << ", at line " << __LINE__ \ 17 | << std::endl; cudaDeviceSynchronize(); } while (0) 18 | 19 | #define CUDA_CHECK(condition) \ 20 | /* Code block avoids redefinition of cudaError_t error */ \ 21 | do { \ 22 | cudaError_t error = condition; \ 23 | if (error != cudaSuccess) { \ 24 | std::cout << cudaGetErrorString(error) << std::endl; \ 25 | } \ 26 | } while (0) 27 | 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 30 | 31 | __device__ inline float devIoU(float const * const a, float const * const b) { 32 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 33 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 34 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 35 | float interS = width * height; 36 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 37 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 38 | return interS / (Sa + Sb - interS); 39 | } 40 | 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh, 42 | float *dev_boxes, unsigned long long *dev_mask) { 43 | const int row_start = blockIdx.y; 44 | const int col_start = blockIdx.x; 45 | 46 | // if (row_start > col_start) return; 47 | 48 | const int row_size = 49 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 50 | const int col_size = 51 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 52 | 53 | __shared__ float block_boxes[threadsPerBlock * 5]; 54 | if (threadIdx.x < col_size) { 55 | block_boxes[threadIdx.x * 5 + 0] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 57 | block_boxes[threadIdx.x * 5 + 1] = 58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 59 | block_boxes[threadIdx.x * 5 + 2] = 60 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 61 | block_boxes[threadIdx.x * 5 + 3] = 62 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 63 | block_boxes[threadIdx.x * 5 + 4] = 64 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 65 | } 66 | __syncthreads(); 67 | 68 | if (threadIdx.x < row_size) { 69 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 70 | const float *cur_box = dev_boxes + cur_box_idx * 5; 71 | int i = 0; 72 | unsigned long long t = 0; 73 | int start = 0; 74 | if (row_start == col_start) { 75 | start = threadIdx.x + 1; 76 | } 77 | for (i = start; i < col_size; i++) { 78 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 79 | t |= 1ULL << i; 80 | } 81 | } 82 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 83 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 84 | } 85 | } 86 | 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 88 | int boxes_dim, float nms_overlap_thresh) { 89 | 90 | float* boxes_dev = NULL; 91 | unsigned long long* mask_dev = NULL; 92 | 93 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 94 | 95 | CUDA_CHECK(cudaMalloc(&boxes_dev, 96 | boxes_num * boxes_dim * sizeof(float))); 97 | CUDA_CHECK(cudaMemcpy(boxes_dev, 98 | boxes_host, 99 | boxes_num * boxes_dim * sizeof(float), 100 | cudaMemcpyHostToDevice)); 101 | 102 | CUDA_CHECK(cudaMalloc(&mask_dev, 103 | boxes_num * col_blocks * sizeof(unsigned long long))); 104 | 105 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 106 | DIVUP(boxes_num, threadsPerBlock)); 107 | dim3 threads(threadsPerBlock); 108 | 109 | // printf("i am at line %d\n", boxes_num); 110 | // printf("i am at line %d\n", boxes_dim); 111 | 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | // we need to create a memory for keep_out on cpu 127 | // otherwise, the following code cannot run 128 | 129 | int* keep_out_cpu = new int[boxes_num]; 130 | 131 | int num_to_keep = 0; 132 | for (int i = 0; i < boxes_num; i++) { 133 | int nblock = i / threadsPerBlock; 134 | int inblock = i % threadsPerBlock; 135 | 136 | if (!(remv[nblock] & (1ULL << inblock))) { 137 | // orignal: keep_out[num_to_keep++] = i; 138 | keep_out_cpu[num_to_keep++] = i; 139 | unsigned long long *p = &mask_host[0] + i * col_blocks; 140 | for (int j = nblock; j < col_blocks; j++) { 141 | remv[j] |= p[j]; 142 | } 143 | } 144 | } 145 | 146 | // copy keep_out_cpu to keep_out on gpu 147 | CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice)); 148 | 149 | // *num_out = num_to_keep; 150 | 151 | // original: *num_out = num_to_keep; 152 | // copy num_to_keep to num_out on gpu 153 | 154 | CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice)); 155 | 156 | // release cuda memory 157 | CUDA_CHECK(cudaFree(boxes_dev)); 158 | CUDA_CHECK(cudaFree(mask_dev)); 159 | // release cpu memory 160 | delete []keep_out_cpu; 161 | } 162 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | sources = ['src/roi_align.c'] 7 | headers = ['src/roi_align.h'] 8 | extra_objects = [] 9 | #sources = [] 10 | #headers = [] 11 | defines = [] 12 | with_cuda = False 13 | 14 | this_file = os.path.dirname(os.path.realpath(__file__)) 15 | print(this_file) 16 | 17 | if torch.cuda.is_available(): 18 | print('Including CUDA code.') 19 | sources += ['src/roi_align_cuda.c'] 20 | headers += ['src/roi_align_cuda.h'] 21 | defines += [('WITH_CUDA', None)] 22 | with_cuda = True 23 | 24 | extra_objects = ['src/roi_align_kernel.cu.o'] 25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 26 | 27 | ffi = create_extension( 28 | '_ext.roi_align', 29 | headers=headers, 30 | sources=sources, 31 | define_macros=defines, 32 | relative_to=__file__, 33 | with_cuda=with_cuda, 34 | extra_objects=extra_objects 35 | ) 36 | 37 | if __name__ == '__main__': 38 | ffi.build() 39 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | roi_align.roi_align_forward(self.aligned_height, 30 | self.aligned_width, 31 | self.spatial_scale, features, 32 | rois, output) 33 | # raise NotImplementedError 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | assert(self.feature_size is not None and grad_output.is_cuda) 39 | 40 | batch_size, num_channels, data_height, data_width = self.feature_size 41 | 42 | grad_input = self.rois.new(batch_size, num_channels, data_height, 43 | data_width).zero_() 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/src/roi_align.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); 3 | 4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale, 5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/src/roi_crop_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "roi_crop_cuda_kernel.h" 5 | 6 | #define real float 7 | 8 | // this symbol will be resolved automatically from PyTorch libs 9 | extern THCState *state; 10 | 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 12 | // we assume BHWD format in inputImages 13 | // we assume BHW(YX) format on grids 14 | 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){ 16 | // THCState *state = getCutorchState(L); 17 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 18 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 19 | // THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 20 | 21 | int success = 0; 22 | success = BilinearSamplerBHWD_updateOutput_cuda_kernel(THCudaTensor_size(state, output, 1), 23 | THCudaTensor_size(state, output, 3), 24 | THCudaTensor_size(state, output, 2), 25 | THCudaTensor_size(state, output, 0), 26 | THCudaTensor_size(state, inputImages, 1), 27 | THCudaTensor_size(state, inputImages, 2), 28 | THCudaTensor_size(state, inputImages, 3), 29 | THCudaTensor_size(state, inputImages, 0), 30 | THCudaTensor_data(state, inputImages), 31 | THCudaTensor_stride(state, inputImages, 0), 32 | THCudaTensor_stride(state, inputImages, 1), 33 | THCudaTensor_stride(state, inputImages, 2), 34 | THCudaTensor_stride(state, inputImages, 3), 35 | THCudaTensor_data(state, grids), 36 | THCudaTensor_stride(state, grids, 0), 37 | THCudaTensor_stride(state, grids, 3), 38 | THCudaTensor_stride(state, grids, 1), 39 | THCudaTensor_stride(state, grids, 2), 40 | THCudaTensor_data(state, output), 41 | THCudaTensor_stride(state, output, 0), 42 | THCudaTensor_stride(state, output, 1), 43 | THCudaTensor_stride(state, output, 2), 44 | THCudaTensor_stride(state, output, 3), 45 | THCState_getCurrentStream(state)); 46 | 47 | //check for errors 48 | if (!success) { 49 | THError("aborting"); 50 | } 51 | return 1; 52 | } 53 | 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 55 | THCudaTensor *gradGrids, THCudaTensor *gradOutput) 56 | { 57 | // THCState *state = getCutorchState(L); 58 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 59 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 60 | // THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 61 | // THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor"); 62 | // THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor"); 63 | 64 | int success = 0; 65 | success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(THCudaTensor_size(state, gradOutput, 1), 66 | THCudaTensor_size(state, gradOutput, 3), 67 | THCudaTensor_size(state, gradOutput, 2), 68 | THCudaTensor_size(state, gradOutput, 0), 69 | THCudaTensor_size(state, inputImages, 1), 70 | THCudaTensor_size(state, inputImages, 2), 71 | THCudaTensor_size(state, inputImages, 3), 72 | THCudaTensor_size(state, inputImages, 0), 73 | THCudaTensor_data(state, inputImages), 74 | THCudaTensor_stride(state, inputImages, 0), 75 | THCudaTensor_stride(state, inputImages, 1), 76 | THCudaTensor_stride(state, inputImages, 2), 77 | THCudaTensor_stride(state, inputImages, 3), 78 | THCudaTensor_data(state, grids), 79 | THCudaTensor_stride(state, grids, 0), 80 | THCudaTensor_stride(state, grids, 3), 81 | THCudaTensor_stride(state, grids, 1), 82 | THCudaTensor_stride(state, grids, 2), 83 | THCudaTensor_data(state, gradInputImages), 84 | THCudaTensor_stride(state, gradInputImages, 0), 85 | THCudaTensor_stride(state, gradInputImages, 1), 86 | THCudaTensor_stride(state, gradInputImages, 2), 87 | THCudaTensor_stride(state, gradInputImages, 3), 88 | THCudaTensor_data(state, gradGrids), 89 | THCudaTensor_stride(state, gradGrids, 0), 90 | THCudaTensor_stride(state, gradGrids, 3), 91 | THCudaTensor_stride(state, gradGrids, 1), 92 | THCudaTensor_stride(state, gradGrids, 2), 93 | THCudaTensor_data(state, gradOutput), 94 | THCudaTensor_stride(state, gradOutput, 0), 95 | THCudaTensor_stride(state, gradOutput, 1), 96 | THCudaTensor_stride(state, gradOutput, 2), 97 | THCudaTensor_stride(state, gradOutput, 3), 98 | THCState_getCurrentStream(state)); 99 | 100 | //check for errors 101 | if (!success) { 102 | THError("aborting"); 103 | } 104 | return 1; 105 | } 106 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from .nms import nms 4 | from .roi_align import ROIAlign 5 | from .roi_align import roi_align 6 | from .roi_pool import ROIPool 7 | from .roi_pool import roi_pool 8 | 9 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool"] 10 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from model import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from model import _C 9 | 10 | import pdb 11 | 12 | class _ROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | output = _C.roi_align_forward(input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | rois, = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | sampling_ratio = ctx.sampling_ratio 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_align_backward( 32 | grad_output, 33 | rois, 34 | spatial_scale, 35 | output_size[0], 36 | output_size[1], 37 | bs, 38 | ch, 39 | h, 40 | w, 41 | sampling_ratio, 42 | ) 43 | return grad_input, None, None, None, None 44 | 45 | 46 | roi_align = _ROIAlign.apply 47 | 48 | 49 | class ROIAlign(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | super(ROIAlign, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | self.sampling_ratio = sampling_ratio 55 | 56 | def forward(self, input, rois): 57 | return roi_align( 58 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 59 | ) 60 | 61 | def __repr__(self): 62 | tmpstr = self.__class__.__name__ + "(" 63 | tmpstr += "output_size=" + str(self.output_size) 64 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 65 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 66 | tmpstr += ")" 67 | return tmpstr 68 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from model import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | extra_objects = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | this_file = os.path.dirname(os.path.realpath(__file__)) 14 | print(this_file) 15 | 16 | if torch.cuda.is_available(): 17 | print('Including CUDA code.') 18 | sources += ['src/roi_pooling_cuda.c'] 19 | headers += ['src/roi_pooling_cuda.h'] 20 | defines += [('WITH_CUDA', None)] 21 | with_cuda = True 22 | extra_objects = ['src/roi_pooling.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_pooling', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /faster-rcnn/lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | import pdb 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | #array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | try: 40 | xrange # Python 2 41 | except NameError: 42 | xrange = range # Python 3 43 | 44 | 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 46 | scales=2**np.arange(3, 6)): 47 | """ 48 | Generate anchor (reference) windows by enumerating aspect ratios X 49 | scales wrt a reference (0, 0, 15, 15) window. 50 | """ 51 | 52 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 53 | ratio_anchors = _ratio_enum(base_anchor, ratios) 54 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 55 | for i in xrange(ratio_anchors.shape[0])]) 56 | return anchors 57 | 58 | def _whctrs(anchor): 59 | """ 60 | Return width, height, x center, and y center for an anchor (window). 61 | """ 62 | 63 | w = anchor[2] - anchor[0] + 1 64 | h = anchor[3] - anchor[1] + 1 65 | x_ctr = anchor[0] + 0.5 * (w - 1) 66 | y_ctr = anchor[1] + 0.5 * (h - 1) 67 | return w, h, x_ctr, y_ctr 68 | 69 | def _mkanchors(ws, hs, x_ctr, y_ctr): 70 | """ 71 | Given a vector of widths (ws) and heights (hs) around a center 72 | (x_ctr, y_ctr), output a set of anchors (windows). 73 | """ 74 | 75 | ws = ws[:, np.newaxis] 76 | hs = hs[:, np.newaxis] 77 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 78 | y_ctr - 0.5 * (hs - 1), 79 | x_ctr + 0.5 * (ws - 1), 80 | y_ctr + 0.5 * (hs - 1))) 81 | return anchors 82 | 83 | def _ratio_enum(anchor, ratios): 84 | """ 85 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 86 | """ 87 | 88 | w, h, x_ctr, y_ctr = _whctrs(anchor) 89 | size = w * h 90 | size_ratios = size / ratios 91 | ws = np.round(np.sqrt(size_ratios)) 92 | hs = np.round(ws * ratios) 93 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 94 | return anchors 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | if __name__ == '__main__': 108 | import time 109 | t = time.time() 110 | a = generate_anchors() 111 | print(time.time() - t) 112 | print(a) 113 | from IPython import embed; embed() 114 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/rpn/rpn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | 7 | from model.utils.config import cfg 8 | from .proposal_layer import _ProposalLayer 9 | from .anchor_target_layer import _AnchorTargetLayer 10 | from model.utils.net_utils import _smooth_l1_loss 11 | 12 | import numpy as np 13 | import math 14 | import pdb 15 | import time 16 | 17 | class _RPN(nn.Module): 18 | """ region proposal network """ 19 | def __init__(self, din): 20 | super(_RPN, self).__init__() 21 | 22 | self.din = din # get depth of input feature map, e.g., 512 23 | self.anchor_scales = cfg.ANCHOR_SCALES 24 | self.anchor_ratios = cfg.ANCHOR_RATIOS 25 | self.feat_stride = cfg.FEAT_STRIDE[0] 26 | 27 | # define the convrelu layers processing input feature map 28 | self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True) 29 | 30 | # define bg/fg classifcation score layer 31 | self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors) 32 | self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) 33 | 34 | # define anchor box offset prediction layer 35 | self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors) 36 | self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0) 37 | 38 | # define proposal layer 39 | self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios) 40 | 41 | # define anchor target layer 42 | self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios) 43 | 44 | self.rpn_loss_cls = 0 45 | self.rpn_loss_box = 0 46 | 47 | @staticmethod 48 | def reshape(x, d): 49 | input_shape = x.size() 50 | x = x.view( 51 | input_shape[0], 52 | int(d), 53 | int(float(input_shape[1] * input_shape[2]) / float(d)), 54 | input_shape[3] 55 | ) 56 | return x 57 | 58 | def forward(self, base_feat, im_info, gt_boxes, num_boxes): 59 | 60 | batch_size = base_feat.size(0) 61 | 62 | # return feature map after convrelu layer 63 | rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) 64 | # get rpn classification score 65 | rpn_cls_score = self.RPN_cls_score(rpn_conv1) 66 | 67 | rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) 68 | rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) 69 | rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) 70 | 71 | # get rpn offsets to the anchor boxes 72 | rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1) 73 | 74 | # proposal layer 75 | cfg_key = 'TRAIN' if self.training else 'TEST' 76 | 77 | rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data, 78 | im_info, cfg_key)) 79 | 80 | self.rpn_loss_cls = 0 81 | self.rpn_loss_box = 0 82 | 83 | # generating training labels and build the rpn loss 84 | if self.training: 85 | assert gt_boxes is not None 86 | 87 | rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes)) 88 | 89 | # compute classification loss 90 | rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) 91 | rpn_label = rpn_data[0].view(batch_size, -1) 92 | 93 | rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) 94 | rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep) 95 | rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) 96 | rpn_label = Variable(rpn_label.long()) 97 | self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) 98 | fg_cnt = torch.sum(rpn_label.data.ne(0)) 99 | 100 | rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] 101 | 102 | # compute bbox regression loss 103 | rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) 104 | rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) 105 | rpn_bbox_targets = Variable(rpn_bbox_targets) 106 | 107 | self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, 108 | rpn_bbox_outside_weights, sigma=3, dim=[1,2,3]) 109 | 110 | return rois, self.rpn_loss_cls, self.rpn_loss_box 111 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/faster-rcnn/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /faster-rcnn/lib/model/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes, 16 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 17 | return bbox_overlaps_c(boxes, query_boxes) 18 | 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 20 | np.ndarray[DTYPE_t, ndim=2] boxes, 21 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 22 | """ 23 | Parameters 24 | ---------- 25 | boxes: (N, 4) ndarray of float 26 | query_boxes: (K, 4) ndarray of float 27 | Returns 28 | ------- 29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 30 | """ 31 | cdef unsigned int N = boxes.shape[0] 32 | cdef unsigned int K = query_boxes.shape[0] 33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 34 | cdef DTYPE_t iw, ih, box_area 35 | cdef DTYPE_t ua 36 | cdef unsigned int k, n 37 | for k in range(K): 38 | box_area = ( 39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 41 | ) 42 | for n in range(N): 43 | iw = ( 44 | min(boxes[n, 2], query_boxes[k, 2]) - 45 | max(boxes[n, 0], query_boxes[k, 0]) + 1 46 | ) 47 | if iw > 0: 48 | ih = ( 49 | min(boxes[n, 3], query_boxes[k, 3]) - 50 | max(boxes[n, 1], query_boxes[k, 1]) + 1 51 | ) 52 | if ih > 0: 53 | ua = float( 54 | (boxes[n, 2] - boxes[n, 0] + 1) * 55 | (boxes[n, 3] - boxes[n, 1] + 1) + 56 | box_area - iw * ih 57 | ) 58 | overlaps[n, k] = iw * ih / ua 59 | return overlaps 60 | 61 | 62 | def bbox_intersections( 63 | np.ndarray[DTYPE_t, ndim=2] boxes, 64 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 65 | return bbox_intersections_c(boxes, query_boxes) 66 | 67 | 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 69 | np.ndarray[DTYPE_t, ndim=2] boxes, 70 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 71 | """ 72 | For each query box compute the intersection ratio covered by boxes 73 | ---------- 74 | Parameters 75 | ---------- 76 | boxes: (N, 4) ndarray of float 77 | query_boxes: (K, 4) ndarray of float 78 | Returns 79 | ------- 80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 81 | """ 82 | cdef unsigned int N = boxes.shape[0] 83 | cdef unsigned int K = query_boxes.shape[0] 84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 85 | cdef DTYPE_t iw, ih, box_area 86 | cdef DTYPE_t ua 87 | cdef unsigned int k, n 88 | for k in range(K): 89 | box_area = ( 90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 92 | ) 93 | for n in range(N): 94 | iw = ( 95 | min(boxes[n, 2], query_boxes[k, 2]) - 96 | max(boxes[n, 0], query_boxes[k, 0]) + 1 97 | ) 98 | if iw > 0: 99 | ih = ( 100 | min(boxes[n, 3], query_boxes[k, 3]) - 101 | max(boxes[n, 1], query_boxes[k, 1]) + 1 102 | ) 103 | if ih > 0: 104 | intersec[n, k] = iw * ih / box_area 105 | return intersec -------------------------------------------------------------------------------- /faster-rcnn/lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | # from scipy.misc import imread, imresize 12 | import cv2 13 | 14 | try: 15 | xrange # Python 2 16 | except NameError: 17 | xrange = range # Python 3 18 | 19 | 20 | def im_list_to_blob(ims): 21 | """Convert a list of images into a network input. 22 | 23 | Assumes images are already prepared (means subtracted, BGR order, ...). 24 | """ 25 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 26 | num_images = len(ims) 27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 28 | dtype=np.float32) 29 | for i in xrange(num_images): 30 | im = ims[i] 31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 32 | 33 | return blob 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | im_size_min = np.min(im_shape[0:2]) 43 | im_size_max = np.max(im_shape[0:2]) 44 | im_scale = float(target_size) / float(im_size_min) 45 | # Prevent the biggest axis from being more than MAX_SIZE 46 | # if np.round(im_scale * im_size_max) > max_size: 47 | # im_scale = float(max_size) / float(im_size_max) 48 | # im = imresize(im, im_scale) 49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 50 | interpolation=cv2.INTER_LINEAR) 51 | 52 | return im, im_scale 53 | -------------------------------------------------------------------------------- /faster-rcnn/lib/model/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | try: 6 | from StringIO import StringIO # Python 2.7 7 | except ImportError: 8 | from io import BytesIO # Python 3.x 9 | 10 | 11 | class Logger(object): 12 | 13 | def __init__(self, log_dir): 14 | """Create a summary writer logging to log_dir.""" 15 | self.writer = tf.summary.FileWriter(log_dir) 16 | 17 | def scalar_summary(self, tag, value, step): 18 | """Log a scalar variable.""" 19 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 20 | self.writer.add_summary(summary, step) 21 | 22 | def image_summary(self, tag, images, step): 23 | """Log a list of images.""" 24 | 25 | img_summaries = [] 26 | for i, img in enumerate(images): 27 | # Write the image to a string 28 | try: 29 | s = StringIO() 30 | except: 31 | s = BytesIO() 32 | scipy.misc.toimage(img).save(s, format="png") 33 | 34 | # Create an Image object 35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 36 | height=img.shape[0], 37 | width=img.shape[1]) 38 | # Create a Summary value 39 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 40 | 41 | # Create and write Summary 42 | summary = tf.Summary(value=img_summaries) 43 | self.writer.add_summary(summary, step) 44 | 45 | def histo_summary(self, tag, values, step, bins=1000): 46 | """Log a histogram of the tensor of values.""" 47 | 48 | # Create a histogram using numpy 49 | counts, bin_edges = np.histogram(values, bins=bins) 50 | 51 | # Fill the fields of the histogram proto 52 | hist = tf.HistogramProto() 53 | hist.min = float(np.min(values)) 54 | hist.max = float(np.max(values)) 55 | hist.num = int(np.prod(values.shape)) 56 | hist.sum = float(np.sum(values)) 57 | hist.sum_squares = float(np.sum(values**2)) 58 | 59 | # Drop the start of the first bin 60 | bin_edges = bin_edges[1:] 61 | 62 | # Add bin edges and counts 63 | for edge in bin_edges: 64 | hist.bucket_limit.append(edge) 65 | for c in counts: 66 | hist.bucket.append(c) 67 | 68 | # Create and write Summary 69 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 70 | self.writer.add_summary(summary, step) 71 | self.writer.flush() 72 | -------------------------------------------------------------------------------- /faster-rcnn/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /faster-rcnn/lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | from scipy.misc import imread 16 | from model.utils.config import cfg 17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 18 | import pdb 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | # Sample random scales to use for each image in this batch 23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 24 | size=num_images) 25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 27 | format(num_images, cfg.TRAIN.BATCH_SIZE) 28 | 29 | # Get the input image blob, formatted for caffe 30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 31 | 32 | blobs = {'data': im_blob} 33 | 34 | assert len(im_scales) == 1, "Single batch only" 35 | assert len(roidb) == 1, "Single batch only" 36 | 37 | # gt boxes: (x1, y1, x2, y2, cls) 38 | if cfg.TRAIN.USE_ALL_GT: 39 | # Include all ground truth boxes 40 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 41 | else: 42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 43 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 44 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 45 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 46 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 47 | blobs['gt_boxes'] = gt_boxes 48 | blobs['im_info'] = np.array( 49 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], 50 | dtype=np.float32) 51 | 52 | blobs['img_id'] = roidb[0]['img_id'] 53 | 54 | return blobs 55 | 56 | def _get_image_blob(roidb, scale_inds): 57 | """Builds an input blob from the images in the roidb at the specified 58 | scales. 59 | """ 60 | num_images = len(roidb) 61 | 62 | processed_ims = [] 63 | im_scales = [] 64 | for i in range(num_images): 65 | #im = cv2.imread(roidb[i]['image']) 66 | im = imread(roidb[i]['image']) 67 | 68 | if len(im.shape) == 2: 69 | im = im[:,:,np.newaxis] 70 | im = np.concatenate((im,im,im), axis=2) 71 | # flip the channel, since the original one using cv2 72 | # rgb -> bgr 73 | im = im[:,:,::-1] 74 | 75 | if roidb[i]['flipped']: 76 | im = im[:, ::-1, :] 77 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 78 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 79 | cfg.TRAIN.MAX_SIZE) 80 | im_scales.append(im_scale) 81 | processed_ims.append(im) 82 | 83 | # Create a blob to hold the input images 84 | blob = im_list_to_blob(processed_ims) 85 | 86 | return blob, im_scales 87 | -------------------------------------------------------------------------------- /faster-rcnn/lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import datasets 7 | import numpy as np 8 | from model.utils.config import cfg 9 | from datasets.factory import get_imdb 10 | import PIL 11 | import pdb 12 | 13 | def prepare_roidb(imdb): 14 | """Enrich the imdb's roidb by adding some derived quantities that 15 | are useful for training. This function precomputes the maximum 16 | overlap, taken over ground-truth boxes, between each ROI and 17 | each ground-truth box. The class with maximum overlap is also 18 | recorded. 19 | """ 20 | 21 | roidb = imdb.roidb 22 | if not (imdb.name.startswith('coco')): 23 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 24 | for i in range(imdb.num_images)] 25 | 26 | for i in range(len(imdb.image_index)): 27 | roidb[i]['img_id'] = imdb.image_id_at(i) 28 | roidb[i]['image'] = imdb.image_path_at(i) 29 | if not (imdb.name.startswith('coco')): 30 | roidb[i]['width'] = sizes[i][0] 31 | roidb[i]['height'] = sizes[i][1] 32 | # need gt_overlaps as a dense array for argmax 33 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 34 | # max overlap with gt over classes (columns) 35 | max_overlaps = gt_overlaps.max(axis=1) 36 | # gt class that had the max overlap 37 | max_classes = gt_overlaps.argmax(axis=1) 38 | roidb[i]['max_classes'] = max_classes 39 | roidb[i]['max_overlaps'] = max_overlaps 40 | # sanity checks 41 | # max overlap of 0 => class should be zero (background) 42 | zero_inds = np.where(max_overlaps == 0)[0] 43 | assert all(max_classes[zero_inds] == 0) 44 | # max overlap > 0 => class should not be zero (must be a fg class) 45 | nonzero_inds = np.where(max_overlaps > 0)[0] 46 | assert all(max_classes[nonzero_inds] != 0) 47 | 48 | 49 | def rank_roidb_ratio(roidb): 50 | # rank roidb based on the ratio between width and height. 51 | ratio_large = 2 # largest ratio to preserve. 52 | ratio_small = 0.5 # smallest ratio to preserve. 53 | 54 | ratio_list = [] 55 | for i in range(len(roidb)): 56 | width = roidb[i]['width'] 57 | height = roidb[i]['height'] 58 | ratio = width / float(height) 59 | 60 | if ratio > ratio_large: 61 | roidb[i]['need_crop'] = 1 62 | ratio = ratio_large 63 | elif ratio < ratio_small: 64 | roidb[i]['need_crop'] = 1 65 | ratio = ratio_small 66 | else: 67 | roidb[i]['need_crop'] = 0 68 | 69 | ratio_list.append(ratio) 70 | 71 | ratio_list = np.array(ratio_list) 72 | ratio_index = np.argsort(ratio_list) 73 | return ratio_list[ratio_index], ratio_index 74 | 75 | def filter_roidb(roidb): 76 | # filter the image without bounding box. 77 | print('before filtering, there are %d images...' % (len(roidb))) 78 | i = 0 79 | while i < len(roidb): 80 | if len(roidb[i]['boxes']) == 0: 81 | del roidb[i] 82 | i -= 1 83 | i += 1 84 | 85 | print('after filtering, there are %d images...' % (len(roidb))) 86 | return roidb 87 | 88 | def combined_roidb(imdb_names, training=True): 89 | """ 90 | Combine multiple roidbs 91 | """ 92 | 93 | def get_training_roidb(imdb): 94 | """Returns a roidb (Region of Interest database) for use in training.""" 95 | if cfg.TRAIN.USE_FLIPPED: 96 | print('Appending horizontally-flipped training examples...') 97 | imdb.append_flipped_images() 98 | print('done') 99 | 100 | print('Preparing training data...') 101 | 102 | prepare_roidb(imdb) 103 | #ratio_index = rank_roidb_ratio(imdb) 104 | print('done') 105 | 106 | return imdb.roidb 107 | 108 | def get_roidb(imdb_name): 109 | imdb = get_imdb(imdb_name) 110 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 111 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 112 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 113 | roidb = get_training_roidb(imdb) 114 | return roidb 115 | 116 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 117 | roidb = roidbs[0] 118 | 119 | if len(roidbs) > 1: 120 | for r in roidbs[1:]: 121 | roidb.extend(r) 122 | tmp = get_imdb(imdb_names.split('+')[1]) 123 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 124 | else: 125 | imdb = get_imdb(imdb_names) 126 | 127 | if training: 128 | roidb = filter_roidb(roidb) 129 | 130 | ratio_list, ratio_index = rank_roidb_ratio(roidb) 131 | 132 | return imdb, roidb, ratio_list, ratio_index 133 | -------------------------------------------------------------------------------- /faster-rcnn/lib/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "model", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "model._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="faster_rcnn", 61 | version="0.1", 62 | description="object detection in pytorch", 63 | packages=find_packages(exclude=("configs", "tests",)), 64 | ext_modules=get_extensions(), 65 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 66 | ) 67 | -------------------------------------------------------------------------------- /faster-rcnn/lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /faster-rcnn/models/README.md: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with model pretrained on Visual Genome 2 | 3 | Please download the faster-rcnn model pretrained on visual genome from [google drive](https://drive.google.com/file/d/18n_3V1rywgeADZ3oONO0DsuuS9eMW6sN/view) (provided by [Faster-RCNN-VG](https://github.com/shilrley6/Faster-R-CNN-with-model-pretrained-on-Visual-Genome)) to ```./faster_rcnn_res101_vg.pth```. 4 | -------------------------------------------------------------------------------- /faster-rcnn/object_150_list.txt: -------------------------------------------------------------------------------- 1 | roof 2 | kite 3 | pant 4 | bowl 5 | laptop 6 | paper 7 | shoe 8 | railing 9 | chair 10 | windshield 11 | ear 12 | tire 13 | cup 14 | bench 15 | tail 16 | bike 17 | board 18 | orange 19 | hat 20 | finger 21 | plate 22 | woman 23 | handle 24 | branch 25 | food 26 | elephant 27 | bear 28 | wave 29 | tile 30 | giraffe 31 | desk 32 | lady 33 | towel 34 | glove 35 | bag 36 | nose 37 | rock 38 | tower 39 | motorcycle 40 | sneaker 41 | fence 42 | people 43 | house 44 | sign 45 | hair 46 | street 47 | zebra 48 | racket 49 | logo 50 | girl 51 | arm 52 | wire 53 | leaf 54 | clock 55 | hill 56 | bird 57 | umbrella 58 | leg 59 | screen 60 | men 61 | sink 62 | trunk 63 | post 64 | sidewalk 65 | box 66 | boy 67 | cow 68 | skateboard 69 | plane 70 | stand 71 | pillow 72 | toilet 73 | pot 74 | number 75 | pole 76 | table 77 | boat 78 | sheep 79 | horse 80 | eye 81 | sock 82 | window 83 | vehicle 84 | curtain 85 | man 86 | banana 87 | fork 88 | head 89 | door 90 | shelf 91 | cabinet 92 | glass 93 | flag 94 | train 95 | child 96 | seat 97 | neck 98 | room 99 | player 100 | ski 101 | cap 102 | tree 103 | bed 104 | cat 105 | light 106 | skier 107 | engine 108 | drawer 109 | guy 110 | airplane 111 | car 112 | mountain 113 | shirt 114 | paw 115 | boot 116 | snow 117 | lamp 118 | book 119 | flower 120 | animal 121 | bus 122 | vegetable 123 | tie 124 | beach 125 | pizza 126 | wheel 127 | plant 128 | helmet 129 | track 130 | hand 131 | fruit 132 | mouth 133 | letter 134 | vase 135 | kid 136 | building 137 | short 138 | surfboard 139 | phone 140 | coat 141 | counter 142 | dog 143 | face 144 | jacket 145 | person 146 | truck 147 | bottle 148 | basket 149 | jean 150 | wing 151 | -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from tqdm import tqdm 4 | import numpy as np 5 | from PIL import Image 6 | import torch 7 | from torchvision import transforms 8 | 9 | from loader import dataset_dict, get_loader 10 | from loader.utils import gen_composite_image 11 | 12 | 13 | def sample(sample_dataset, model, iter, gen_dir): 14 | model.start_eval() 15 | id_list = [0, 1500, 2500, 3500, 4500, 5500, 6500, 7500, 8500, 10000] 16 | for id in id_list: 17 | index, bg_feat, fg_feat, fg_msk_feat, fg_bbox, comp_feat, comp_msk_feat, comp_crop_feat, label, trans_label, catnm = sample_dataset[id] 18 | pred_img, pred_msk, pred_trans = model.test_genorator(bg_feat.unsqueeze(0), fg_feat.unsqueeze(0), fg_msk_feat.unsqueeze(0), torch.Tensor(fg_bbox).unsqueeze(0)) 19 | img = transforms.ToPILImage()(pred_img.cpu()[0]).convert('RGB') 20 | sample_dir = os.path.join(gen_dir, str(id)) 21 | if not os.path.exists(sample_dir): 22 | os.makedirs(sample_dir) 23 | img.save(os.path.join(sample_dir, '{}.jpg'.format(iter))) 24 | 25 | 26 | def infer(eval_loader, opt, model=None, repeat=1): 27 | def csv_title(): 28 | return 'annID,scID,bbox,catnm,label,img_path,msk_path' 29 | def csv_str(annid, scid, gen_comp_bbox, catnm, gen_file_name): 30 | return '{},{},"{}",{},-1,images/{}.jpg,masks/{}.png'.format(annid, scid, gen_comp_bbox, catnm, gen_file_name, gen_file_name) 31 | 32 | assert (repeat >= 1) 33 | save_dir = os.path.join('result', opt.expid) 34 | eval_dir = os.path.join(save_dir, opt.eval_type, str(opt.epoch)) 35 | assert (not os.path.exists(eval_dir)) 36 | img_sav_dir = os.path.join(eval_dir, 'images') 37 | msk_sav_dir = os.path.join(eval_dir, 'masks') 38 | csv_sav_file = os.path.join(eval_dir, '{}.csv'.format(opt.eval_type)) 39 | os.makedirs(eval_dir) 40 | os.mkdir(img_sav_dir) 41 | os.mkdir(msk_sav_dir) 42 | 43 | if model is None: 44 | from model import GAN 45 | model_dir = os.path.join(save_dir, 'models') 46 | model_path = os.path.join(model_dir, str(opt.epoch) + '.pth') 47 | assert(os.path.exists(model_path)) 48 | model = GAN(opt) 49 | loaded = torch.load(model_path) 50 | assert(opt.epoch == loaded['epoch']) 51 | model.load_state_dict(loaded['model'], strict=True) 52 | model.start_eval() 53 | 54 | gen_res = [] 55 | 56 | for i, (indices, annids, scids, bg_img_arrs, fg_img_arrs, fg_msk_arrs, comp_img_arrs, comp_msk_arrs, bg_img_feats, fg_img_feats, fg_msk_feats, fg_bboxes, comp_img_feats, comp_msk_feats, comp_crop_feats, labels, trans_labels, catnms) in enumerate(tqdm(eval_loader)): 57 | index, annid, scid, bg_img_arr, fg_img_arr, fg_msk_arr, comp_img_arr, comp_msk_arr, label, trans_label, catnm = \ 58 | indices[0], annids[0], scids[0], bg_img_arrs[0], fg_img_arrs[0], fg_msk_arrs[0], comp_img_arrs[0], comp_msk_arrs[0], labels[0], trans_labels[0], catnms[0] 59 | for repeat_id in range(repeat): 60 | pred_img_, pred_msk_, pred_trans_ = model.test_genorator(bg_img_feats, fg_img_feats, fg_msk_feats, fg_bboxes) 61 | gen_comp_img, gen_comp_msk, gen_comp_bbox = gen_composite_image( 62 | bg_img=Image.fromarray(bg_img_arr.numpy().astype(np.uint8)).convert('RGB'), 63 | fg_img=Image.fromarray(fg_img_arr.numpy().astype(np.uint8)).convert('RGB'), 64 | fg_msk=Image.fromarray(fg_msk_arr.numpy().astype(np.uint8)).convert('L'), 65 | trans=(pred_trans_.cpu().numpy().astype(np.float32)[0]).tolist(), 66 | fg_bbox=None 67 | ) 68 | if repeat == 1: 69 | gen_file_name = "{}_{}_{}_{}_{}_{}_{}".format(index, annid, scid, gen_comp_bbox[0], gen_comp_bbox[1], gen_comp_bbox[2], gen_comp_bbox[3]) 70 | else: 71 | gen_file_name = "{}_{}_{}_{}_{}_{}_{}_{}".format(index, repeat_id, annid, scid, gen_comp_bbox[0], gen_comp_bbox[1], gen_comp_bbox[2], gen_comp_bbox[3]) 72 | gen_comp_img.save(os.path.join(img_sav_dir, '{}.jpg'.format(gen_file_name))) 73 | gen_comp_msk.save(os.path.join(msk_sav_dir, '{}.png'.format(gen_file_name))) 74 | gen_res.append(csv_str(annid, scid, gen_comp_bbox, catnm, gen_file_name)) 75 | 76 | with open(csv_sav_file, "w") as f: 77 | f.write(csv_title() + '\n') 78 | for line in gen_res: 79 | f.write(line + '\n') 80 | 81 | 82 | def parse_args(): 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument("--dst", type=str, choices=list(dataset_dict.keys()), default="OPADst1", help="dataloder type") 85 | parser.add_argument("--img_size", type=int, default=256, help="size of image") 86 | parser.add_argument("--lr", type=float, default=0.00002, help="adam: learning rate") 87 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") 88 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 89 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 90 | parser.add_argument("--data_root", type=str, default="OPA", help="dataset root") 91 | parser.add_argument("--eval_type", type=str, choices=["train", "trainpos", "sample", "eval", "evaluni"], default="eval", help="evaluation type") 92 | parser.add_argument("--d_noise", type=int, default=1024, help="dimension of random noise/vector") 93 | parser.add_argument("--d_model", type=int, default=512, help="dimension of features") 94 | parser.add_argument("--d_k", type=int, default=64, help="dimension of key in multi-head attention") 95 | parser.add_argument("--d_v", type=int, default=64, help="dimension of value in multi-head attention") 96 | parser.add_argument("--n_heads", type=int, default=8, help="number of heads in multi-head attention") 97 | parser.add_argument("--len_k", type=int, default=84, help="number of background nodes") 98 | parser.add_argument("--epoch", type=int, required=True, help="which epoch to evaluate") 99 | parser.add_argument("--repeat", type=int, default=1, help="number of times to sample different random vectors") 100 | opt = parser.parse_args() 101 | return opt 102 | 103 | 104 | if __name__ == '__main__': 105 | opt = parse_args() 106 | eval_loader = get_loader(opt.dst, batch_size=1, num_workers=1, image_size=opt.img_size, shuffle=False, mode_type=opt.eval_type, data_root=opt.data_root) 107 | with torch.no_grad(): 108 | infer(eval_loader, opt, model=None, repeat=opt.repeat) 109 | -------------------------------------------------------------------------------- /infer_terse.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from tqdm import tqdm 4 | import numpy as np 5 | from PIL import Image 6 | import torch 7 | from torchvision import transforms 8 | 9 | from loader import dataset_dict, get_loader 10 | from loader.utils import gen_composite_image 11 | 12 | 13 | def infer(eval_loader, opt, model=None, repeat=1): 14 | def csv_title(): 15 | return 'annID,scID,bbox,catnm,label,img_path,msk_path' 16 | def csv_str(annid, scid, gen_comp_bbox, catnm, gen_file_name): 17 | return '{},{},"{}",{},-1,images/{}.jpg,masks/{}.png'.format(annid, scid, gen_comp_bbox, catnm, gen_file_name, gen_file_name) 18 | 19 | assert (repeat >= 1) 20 | save_dir = os.path.join('result', opt.expid) 21 | eval_dir = os.path.join(save_dir, opt.eval_type, str(opt.epoch)) 22 | assert (not os.path.exists(eval_dir)) 23 | img_sav_dir = os.path.join(eval_dir, 'images') 24 | msk_sav_dir = os.path.join(eval_dir, 'masks') 25 | csv_sav_file = os.path.join(eval_dir, '{}.csv'.format(opt.eval_type)) 26 | os.makedirs(eval_dir) 27 | os.mkdir(img_sav_dir) 28 | os.mkdir(msk_sav_dir) 29 | 30 | if model is None: 31 | from model_terse import GAN 32 | model_dir = os.path.join(save_dir, 'models') 33 | model_path = os.path.join(model_dir, str(opt.epoch) + '.pth') 34 | assert(os.path.exists(model_path)) 35 | model = GAN(opt) 36 | loaded = torch.load(model_path) 37 | assert(opt.epoch == loaded['epoch']) 38 | model.load_state_dict(loaded['model'], strict=True) 39 | model.start_eval() 40 | 41 | gen_res = [] 42 | 43 | for i, (indices, annids, scids, bg_img_arrs, fg_img_arrs, fg_msk_arrs, comp_img_arrs, comp_msk_arrs, bg_img_feats, fg_img_feats, fg_msk_feats, fg_bboxes, comp_img_feats, comp_msk_feats, comp_crop_feats, labels, trans_labels, catnms) in enumerate(tqdm(eval_loader)): 44 | index, annid, scid, bg_img_arr, fg_img_arr, fg_msk_arr, comp_img_arr, comp_msk_arr, label, trans_label, catnm = \ 45 | indices[0], annids[0], scids[0], bg_img_arrs[0], fg_img_arrs[0], fg_msk_arrs[0], comp_img_arrs[0], comp_msk_arrs[0], labels[0], trans_labels[0], catnms[0] 46 | for repeat_id in range(repeat): 47 | pred_img_, pred_msk_, pred_trans_ = model.test_genorator(bg_img_feats, fg_img_feats, fg_msk_feats, fg_bboxes) 48 | gen_comp_img, gen_comp_msk, gen_comp_bbox = gen_composite_image( 49 | bg_img=Image.fromarray(bg_img_arr.numpy().astype(np.uint8)).convert('RGB'), 50 | fg_img=Image.fromarray(fg_img_arr.numpy().astype(np.uint8)).convert('RGB'), 51 | fg_msk=Image.fromarray(fg_msk_arr.numpy().astype(np.uint8)).convert('L'), 52 | trans=(pred_trans_.cpu().numpy().astype(np.float32)[0]).tolist(), 53 | fg_bbox=None 54 | ) 55 | if repeat == 1: 56 | gen_file_name = "{}_{}_{}_{}_{}_{}_{}".format(index, annid, scid, gen_comp_bbox[0], gen_comp_bbox[1], gen_comp_bbox[2], gen_comp_bbox[3]) 57 | else: 58 | gen_file_name = "{}_{}_{}_{}_{}_{}_{}_{}".format(index, repeat_id, annid, scid, gen_comp_bbox[0], gen_comp_bbox[1], gen_comp_bbox[2], gen_comp_bbox[3]) 59 | gen_comp_img.save(os.path.join(img_sav_dir, '{}.jpg'.format(gen_file_name))) 60 | gen_comp_msk.save(os.path.join(msk_sav_dir, '{}.png'.format(gen_file_name))) 61 | gen_res.append(csv_str(annid, scid, gen_comp_bbox, catnm, gen_file_name)) 62 | 63 | with open(csv_sav_file, "w") as f: 64 | f.write(csv_title() + '\n') 65 | for line in gen_res: 66 | f.write(line + '\n') 67 | 68 | 69 | def parse_args(): 70 | parser = argparse.ArgumentParser() 71 | parser.add_argument("--dst", type=str, choices=list(dataset_dict.keys()), default="OPADst1", help="dataloder type") 72 | parser.add_argument("--img_size", type=int, default=256, help="size of images") 73 | parser.add_argument("--lr", type=float, default=0.0001, help="adam: learning rate") 74 | parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") 75 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 76 | parser.add_argument("--weight_decay", type=float, default=0.0005, help="adam: weight decay") 77 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 78 | parser.add_argument("--data_root", type=str, default="OPA", help="dataset root") 79 | parser.add_argument("--eval_type", type=str, choices=["train", "trainpos", "sample", "eval", "evaluni"], default="eval", help="evaluation type") 80 | parser.add_argument("--dim_fc", type=int, default=1600, help="fc input dimension") 81 | parser.add_argument("--d_model", type=int, default=512, help="backbone feature dimension") 82 | parser.add_argument("--d_branch", type=int, default=20, help="branch feature dimension") 83 | parser.add_argument("--epoch", type=int, required=True, help="which epoch to evaluate") 84 | parser.add_argument("--repeat", type=int, default=1, help="number of times to sample different random vectors") 85 | opt = parser.parse_args() 86 | return opt 87 | 88 | 89 | if __name__ == '__main__': 90 | opt = parse_args() 91 | eval_loader = get_loader(opt.dst, batch_size=1, num_workers=1, image_size=opt.img_size, shuffle=False, mode_type=opt.eval_type, data_root=opt.data_root) 92 | with torch.no_grad(): 93 | infer(eval_loader, opt, model=None, repeat=opt.repeat) 94 | -------------------------------------------------------------------------------- /loader/__init__.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | if __name__ == 'loader': 4 | from .base import OPABasicDataset 5 | from .datasets import OPADst1, OPADst3 6 | elif __name__ == '__init__': 7 | from base import OPABasicDataset 8 | from datasets import OPADst1, OPADst3 9 | else: 10 | raise NotImplementedError 11 | 12 | 13 | dataset_dict = {"OPABasicDataset": OPABasicDataset, "OPADst1": OPADst1, "OPADst3": OPADst3} 14 | 15 | def get_loader(name, batch_size, num_workers, image_size, shuffle, mode_type, data_root): 16 | dset = dataset_dict[name](size=image_size, mode_type=mode_type, data_root=data_root) 17 | loader = DataLoader(dset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) 18 | return loader 19 | 20 | def get_dataset(name, image_size, mode_type, data_root): 21 | dset = dataset_dict[name](size=image_size, mode_type=mode_type, data_root=data_root) 22 | return dset 23 | -------------------------------------------------------------------------------- /loader/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | from torch.utils.data import Dataset 4 | 5 | if __name__ == 'loader.base': 6 | from .utils import obtain_opa_data 7 | elif __name__ == '__main__' or __name__ == 'base': 8 | from utils import obtain_opa_data 9 | else: 10 | raise NotImplementedError 11 | 12 | 13 | class OPABasicDataset(Dataset): 14 | def __init__(self, size, mode_type, data_root): 15 | # self.error_bar = 0.15 16 | self.size = size 17 | self.mode_type = mode_type 18 | self.data_root = data_root 19 | self.bg_dir = os.path.join(data_root, "background") 20 | self.fg_dir = os.path.join(data_root, "foreground") 21 | self.fg_msk_dir = os.path.join(data_root, "foreground") 22 | 23 | if mode_type == "train": 24 | csv_file = os.path.join(data_root, "train_data.csv") 25 | elif mode_type == "trainpos": 26 | csv_file = os.path.join(data_root, "train_data_pos.csv") 27 | elif mode_type == "sample": 28 | csv_file = os.path.join(data_root, "test_data.csv") 29 | elif mode_type == "eval": 30 | csv_file = os.path.join(data_root, "test_data_pos.csv") 31 | elif mode_type == "evaluni": 32 | csv_file = os.path.join(data_root, "test_data_pos_unique.csv") 33 | else: 34 | raise NotImplementedError 35 | self.data = obtain_opa_data(csv_file) 36 | 37 | def __len__(self): 38 | return len(self.data) 39 | 40 | def __getitem__(self, index): 41 | index_, annid, scid, bbox, scale, label, catnm, img_path, msk_path = self.data[index] 42 | 43 | bg_path = os.path.join(self.bg_dir, catnm, "{}.jpg".format(scid)) 44 | fg_path = os.path.join(self.fg_dir, catnm, "{}.jpg".format(annid)) 45 | fg_mask_path = os.path.join(self.fg_msk_dir, catnm, "mask_{}.jpg".format(annid)) 46 | img_path = os.path.join(self.data_root, img_path) 47 | msk_path = os.path.join(self.data_root, msk_path) 48 | 49 | bg_img = Image.open(bg_path).convert('RGB') 50 | fg_img = Image.open(fg_path).convert('RGB') 51 | fg_msk = Image.open(fg_mask_path).convert('L') 52 | comp_img = Image.open(img_path).convert('RGB') 53 | comp_msk = Image.open(msk_path).convert('L') 54 | 55 | assert (bg_img.size == comp_img.size and comp_img.size == comp_msk.size and fg_img.size == fg_msk.size) 56 | # assert (math.fabs((bbox[2] * fg_img.size[1]) / (bbox[3] * fg_img.size[0]) - 1.0) < self.error_bar) 57 | assert (bbox[0] + bbox[2] <= bg_img.size[0] and bbox[1] + bbox[3] <= bg_img.size[1]) 58 | 59 | return index_, annid, scid, bbox, scale, label, catnm, bg_img, fg_img, fg_msk, comp_img, comp_msk 60 | -------------------------------------------------------------------------------- /loader/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from PIL import Image 3 | import numpy as np 4 | from torchvision import transforms 5 | 6 | 7 | def obtain_opa_data(csv_file): 8 | csv_data = csv.DictReader(open(csv_file, 'r')) 9 | res_data = [ 10 | [ 11 | i, int(row['annID']), int(row['scID']), 12 | list(map(int, row['bbox'][1:-1].split(','))), 13 | row['scale'], int(row['label']), row['catnm'], 14 | row['new_img_path'], row['new_msk_path'], 15 | ] 16 | for i, row in enumerate(csv_data) 17 | ] 18 | return res_data 19 | 20 | 21 | def img_crop(x, x_mode, bbox): 22 | assert (x_mode in ['gray', 'color']) 23 | h_low, h_high, w_low, w_high = bbox[1], bbox[1] + bbox[3], bbox[0], bbox[0] + bbox[2] 24 | y_arr = np.array(x, dtype=np.uint8) 25 | if x_mode == 'gray': 26 | y_arr = y_arr[h_low:h_high, w_low:w_high] 27 | else: 28 | y_arr = y_arr[h_low:h_high, w_low:w_high, :] 29 | y = Image.fromarray(y_arr) 30 | return y 31 | 32 | 33 | def get_trans_label(bg_img, fg_img, bbox): 34 | assert (bg_img.size[0] > bbox[2] and bg_img.size[1] > bbox[3]) 35 | bg_w, bg_h, fg_w, fg_h = bg_img.size[0], bg_img.size[1], fg_img.size[0], fg_img.size[1] 36 | trans_label = np.zeros(3, dtype=np.float32) # [relative_scale, relative_x, relative_y] in (0,1)^3 37 | if bg_w / bg_h > fg_w / fg_h: 38 | trans_label[0] = bbox[3] / bg_h 39 | else: 40 | trans_label[0] = bbox[2] / bg_w 41 | trans_label[1] = bbox[0] / (bg_w - bbox[2]) 42 | trans_label[2] = bbox[1] / (bg_h - bbox[3]) 43 | assert (trans_label.min() >= 0 and trans_label.max() <= 1) 44 | return trans_label 45 | 46 | 47 | def gen_composite_image(bg_img, fg_img, fg_msk, trans, fg_bbox=None): 48 | def modify(x, y, w, h): 49 | if x < 0: 50 | x = 0 51 | if x >= bg_img.size[0]: 52 | x = bg_img.size[0] - 1 53 | if y < 0: 54 | y = 0 55 | if y >= bg_img.size[1]: 56 | y = bg_img.size[1] - 1 57 | if w <= 0: 58 | w = 1 59 | if h <= 0: 60 | h = 1 61 | return x, y, w, h 62 | if fg_bbox != None: 63 | fg_img = img_crop(fg_img, 'color', fg_bbox) 64 | fg_msk = img_crop(fg_msk, 'gray', fg_bbox) 65 | bg_w, bg_h, fg_w, fg_h = bg_img.size[0], bg_img.size[1], fg_img.size[0], fg_img.size[1] 66 | relative_scale, relative_x, relative_y = trans[0], trans[1], trans[2] 67 | if bg_w / bg_h > fg_w / fg_h: 68 | fg_w_new, fg_h_new = bg_h * relative_scale * fg_w / fg_h, bg_h * relative_scale 69 | else: 70 | fg_w_new, fg_h_new = bg_w * relative_scale, bg_w * relative_scale * fg_h / fg_w 71 | start_x, start_y, width, height = round((bg_w - fg_w_new) * relative_x), round((bg_h - fg_h_new) * relative_y), round(fg_w_new), round(fg_h_new) 72 | start_x, start_y, width, height = modify(start_x, start_y, width, height) 73 | resize_func = transforms.Resize((height, width), interpolation=Image.BILINEAR) 74 | fg_img_new, fg_msk_new = resize_func(fg_img), resize_func(fg_msk) 75 | comp_img_arr, bg_img_arr, fg_img_arr, fg_msk_arr = np.array(bg_img), np.array(bg_img), np.array(fg_img_new), np.array(fg_msk_new) 76 | fg_msk_arr_norm = fg_msk_arr[:,:,np.newaxis].repeat(3, axis=2) / 255.0 77 | comp_img_arr[start_y:start_y+height, start_x:start_x+width, :] = fg_msk_arr_norm * fg_img_arr + (1.0 - fg_msk_arr_norm) * bg_img_arr[start_y:start_y+height, start_x:start_x+width, :] 78 | comp_img = Image.fromarray(comp_img_arr.astype(np.uint8)).convert('RGB') 79 | comp_msk_arr = np.zeros(comp_img_arr.shape[:2]) 80 | comp_msk_arr[start_y:start_y+height, start_x:start_x+width] = fg_msk_arr 81 | comp_msk = Image.fromarray(comp_msk_arr.astype(np.uint8)).convert('L') 82 | return comp_img, comp_msk, [start_x, start_y, width, height] 83 | -------------------------------------------------------------------------------- /main_placenet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | from PIL import Image 5 | import warnings 6 | with warnings.catch_warnings(): 7 | warnings.filterwarnings("ignore", category=FutureWarning) 8 | import tensorboard_logger as tb_logger 9 | 10 | from tool.utils import make_dirs, save, resume, make_logger, AverageMeter 11 | from loader import dataset_dict, get_loader, get_dataset 12 | from model_placenet import GAN 13 | from infer_placenet import infer 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--dst", type=str, choices=list(dataset_dict.keys()), default="OPADst3", help="dataloder type") 19 | parser.add_argument("--n_epochs", type=int, default=15, help="number of epochs of training") 20 | parser.add_argument("--batch_size", type=int, default=32, help="size of the batches") 21 | parser.add_argument("--img_size", type=int, default=256, help="size of images") 22 | parser.add_argument("--lr", type=float, default=0.0001, help="adam: learning rate") 23 | parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") 24 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 25 | parser.add_argument("--weight_decay", type=float, default=0.0005, help="adam: weight decay") 26 | parser.add_argument("--sample_interval", type=int, default=100, help="interval between image sampling") 27 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 28 | parser.add_argument("--resume_pth", type=str, default=None, help="specify a .pth path to resume training, or None to train from scratch") 29 | parser.add_argument("--data_root", type=str, default="OPA", help="dataset root") 30 | parser.add_argument("--eval_type", type=str, choices=["train", "trainpos", "sample", "eval", "evaluni"], default="eval", help="evaluation type") 31 | parser.add_argument("--samp_N", type=int, default=4, help="sampling count of random z during training") 32 | parser.add_argument("--d_emb", type=int, default=512, help="embedding dimension") 33 | parser.add_argument("--d_fc_gen", type=int, default=512, help="generator fc dimension") 34 | parser.add_argument("--d_fc_disc", type=int, default=512, help="discriminator fc dimension") 35 | parser.add_argument("--margin", type=float, default=1, help="alpha in ndiv loss") 36 | parser.add_argument("--with_infer", action='store_true', default=False, help="action to make inference after each training epoch") 37 | opt = parser.parse_args() 38 | return opt 39 | 40 | 41 | def main(): 42 | opt = parse_args() 43 | save_dir = os.path.join('result', opt.expid) 44 | dirs, is_old_exp = make_dirs(save_dir) 45 | model_dir, sample_dir, tblog_dir, log_path = dirs['model_dir'], dirs['sample_dir'], dirs['tblog_dir'], dirs['log_path'] 46 | assert (is_old_exp or opt.resume_pth is None) 47 | 48 | tb_logger.configure(tblog_dir, flush_secs=5) 49 | logger = make_logger(log_path) 50 | logger.info(opt) 51 | 52 | train_loader = get_loader(opt.dst, batch_size=opt.batch_size, num_workers=8, image_size=opt.img_size, shuffle=True, mode_type="train", data_root=opt.data_root) 53 | sample_dataset = get_dataset(opt.dst, image_size=opt.img_size, mode_type="sample", data_root=opt.data_root) 54 | eval_loader = get_loader(opt.dst, batch_size=1, num_workers=1, image_size=opt.img_size, shuffle=False, mode_type=opt.eval_type, data_root=opt.data_root) 55 | 56 | model = GAN(opt) 57 | model, start_ep = resume(opt.resume_pth, model, resume_list=['generator', 'discriminator'], strict=True, logger=logger) 58 | assert (start_ep < opt.n_epochs) 59 | model.Eiters = start_ep * len(train_loader) 60 | 61 | g_gan_loss_meter, g_ndiv_loss_meter, d_real_loss_meter, d_fake_loss_meter = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() 62 | 63 | for epoch in range(start_ep, opt.n_epochs): 64 | for i, (indices, bg_img_feats, fg_img_feats, fg_msk_feats, comp_img_feats, comp_msk_feats, comp_bboxes, labels, catnms) in enumerate(train_loader): 65 | model.start_train() 66 | g_gan_loss, g_ndiv_loss, d_real_loss, d_fake_loss = model.train_disc_gen(bg_img_feats, fg_img_feats, fg_msk_feats, comp_img_feats, comp_msk_feats, comp_bboxes, labels) 67 | 68 | tb_logger.log_value('g_gan_loss', g_gan_loss.item(), step=model.Eiters) 69 | tb_logger.log_value('g_ndiv_loss', g_ndiv_loss.item(), step=model.Eiters) 70 | tb_logger.log_value('d_real_loss', d_real_loss.item(), step=model.Eiters) 71 | tb_logger.log_value('d_fake_loss', d_fake_loss.item(), step=model.Eiters) 72 | 73 | bs = len(indices) 74 | g_gan_loss_meter.update(g_gan_loss.item(), bs) 75 | g_ndiv_loss_meter.update(g_ndiv_loss.item(), bs) 76 | d_real_loss_meter.update(d_real_loss.item(), bs) 77 | d_fake_loss_meter.update(d_fake_loss.item(), bs) 78 | 79 | if (epoch * len(train_loader) + i) % 10 == 0: 80 | logger.info( 81 | "[Epoch %d/%d] [Batch %d/%d] [G - gan: %.3f ndiv: %.5f] [D - real: %.3f, fake1: %.3f]" 82 | % (epoch + 1, opt.n_epochs, i + 1, len(train_loader), g_gan_loss_meter.avg, g_ndiv_loss_meter.avg, d_real_loss_meter.avg, d_fake_loss_meter.avg) 83 | ) 84 | 85 | opt.epoch = epoch + 1 86 | if opt.with_infer: 87 | with torch.no_grad(): 88 | infer(eval_loader, opt, model) 89 | 90 | save(model_dir, model, opt, logger=logger) 91 | 92 | g_gan_loss_meter.reset() 93 | g_ndiv_loss_meter.reset() 94 | d_real_loss_meter.reset() 95 | d_fake_loss_meter.reset() 96 | 97 | 98 | if __name__ == '__main__': 99 | main() 100 | -------------------------------------------------------------------------------- /main_terse.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | from PIL import Image 5 | import warnings 6 | with warnings.catch_warnings(): 7 | warnings.filterwarnings("ignore", category=FutureWarning) 8 | import tensorboard_logger as tb_logger 9 | 10 | from tool.utils import make_dirs, save, resume, make_logger, AverageMeter 11 | from loader import dataset_dict, get_loader, get_dataset 12 | from model_terse import GAN 13 | from infer_terse import infer 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--dst", type=str, choices=list(dataset_dict.keys()), default="OPADst1", help="dataloder type") 19 | parser.add_argument("--n_epochs", type=int, default=15, help="number of epochs of training") 20 | parser.add_argument("--batch_size", type=int, default=32, help="size of the batches") 21 | parser.add_argument("--img_size", type=int, default=256, help="size of images") 22 | parser.add_argument("--lr", type=float, default=0.0001, help="adam: learning rate") 23 | parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") 24 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 25 | parser.add_argument("--weight_decay", type=float, default=0.0005, help="adam: weight decay") 26 | parser.add_argument("--sample_interval", type=int, default=100, help="interval between image sampling") 27 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 28 | parser.add_argument("--resume_pth", type=str, default=None, help="specify a .pth path to resume training, or None to train from scratch") 29 | parser.add_argument("--data_root", type=str, default="OPA", help="dataset root") 30 | parser.add_argument("--eval_type", type=str, choices=["train", "trainpos", "sample", "eval", "evaluni"], default="eval", help="evaluation type") 31 | parser.add_argument("--dim_fc", type=int, default=1600, help="fc input dimension") 32 | parser.add_argument("--d_model", type=int, default=512, help="backbone feature dimension") 33 | parser.add_argument("--d_branch", type=int, default=20, help="branch feature dimension") 34 | parser.add_argument("--with_infer", action='store_true', default=False, help="action to make inference after each training epoch") 35 | opt = parser.parse_args() 36 | return opt 37 | 38 | 39 | def main(): 40 | opt = parse_args() 41 | save_dir = os.path.join('result', opt.expid) 42 | dirs, is_old_exp = make_dirs(save_dir) 43 | model_dir, sample_dir, tblog_dir, log_path = dirs['model_dir'], dirs['sample_dir'], dirs['tblog_dir'], dirs['log_path'] 44 | assert (is_old_exp or opt.resume_pth is None) 45 | 46 | tb_logger.configure(tblog_dir, flush_secs=5) 47 | logger = make_logger(log_path) 48 | logger.info(opt) 49 | 50 | train_loader = get_loader(opt.dst, batch_size=opt.batch_size, num_workers=8, image_size=opt.img_size, shuffle=True, mode_type="train", data_root=opt.data_root) 51 | sample_dataset = get_dataset(opt.dst, image_size=opt.img_size, mode_type="sample", data_root=opt.data_root) 52 | eval_loader = get_loader(opt.dst, batch_size=1, num_workers=1, image_size=opt.img_size, shuffle=False, mode_type=opt.eval_type, data_root=opt.data_root) 53 | 54 | model = GAN(opt) 55 | model, start_ep = resume(opt.resume_pth, model, resume_list=['generator', 'discriminator'], strict=True, logger=logger) 56 | assert (start_ep < opt.n_epochs) 57 | model.Eiters = start_ep * len(train_loader) 58 | 59 | g_gan_loss_meter, d_real_loss_meter, d_fake_loss_meter = AverageMeter(), AverageMeter(), AverageMeter() 60 | 61 | for epoch in range(start_ep, opt.n_epochs): 62 | for i, (indices, bg_img_feats, fg_img_feats, fg_msk_feats, fg_bboxes, comp_img_feats, comp_msk_feats, comp_crop_feats, labels, trans_labels, catnms) in enumerate(train_loader): 63 | model.start_train() 64 | g_gan_loss, d_real_loss, d_fake_loss = model.train_disc_gen(bg_img_feats, fg_img_feats, fg_msk_feats, fg_bboxes, comp_img_feats, comp_msk_feats, labels) 65 | 66 | tb_logger.log_value('g_gan_loss', g_gan_loss.item(), step=model.Eiters) 67 | tb_logger.log_value('d_real_loss', d_real_loss.item(), step=model.Eiters) 68 | tb_logger.log_value('d_fake_loss', d_fake_loss.item(), step=model.Eiters) 69 | 70 | bs = len(indices) 71 | g_gan_loss_meter.update(g_gan_loss.item(), bs) 72 | d_real_loss_meter.update(d_real_loss.item(), bs) 73 | d_fake_loss_meter.update(d_fake_loss.item(), bs) 74 | 75 | if (epoch * len(train_loader) + i) % 10 == 0: 76 | logger.info( 77 | "[Epoch %d/%d] [Batch %d/%d] [G - gan: %.3f] [D - real: %.3f, fake1: %.3f]" 78 | % (epoch + 1, opt.n_epochs, i + 1, len(train_loader), g_gan_loss_meter.avg, d_real_loss_meter.avg, d_fake_loss_meter.avg) 79 | ) 80 | 81 | opt.epoch = epoch + 1 82 | if opt.with_infer: 83 | with torch.no_grad(): 84 | infer(eval_loader, opt, model) 85 | 86 | save(model_dir, model, opt, logger=logger) 87 | 88 | g_gan_loss_meter.reset() 89 | d_real_loss_meter.reset() 90 | d_fake_loss_meter.reset() 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torchvision.models as models 6 | 7 | 8 | BN_MOMENTUM = 0.1 9 | 10 | 11 | def vgg16_bn(pretrained): 12 | model = models.vgg16_bn(pretrained=False) 13 | model.features[0] = nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1, bias=True) 14 | 15 | if pretrained: 16 | pretrained_state_dict = models.vgg16_bn(pretrained=True).state_dict() 17 | conv = pretrained_state_dict['features.0.weight'] 18 | new = torch.zeros(64, 1, 3, 3) 19 | for i, output_channel in enumerate(conv): 20 | new[i] = 0.299 * output_channel[0] + 0.587 * output_channel[1] + 0.114 * output_channel[2] 21 | pretrained_state_dict['features.0.weight'] = torch.cat((conv, new), dim=1) 22 | model.load_state_dict(pretrained_state_dict) 23 | 24 | return model 25 | 26 | 27 | class VAEEncoder(nn.Module): 28 | def __init__(self, opt): 29 | super(VAEEncoder, self).__init__() 30 | self.fc1 = nn.Sequential( 31 | nn.Linear(opt.d_model, 1024), 32 | nn.ReLU(inplace=True) 33 | ) 34 | self.fc2 = nn.Linear(1024, opt.d_noise) 35 | self.fc3 = nn.Linear(1024, opt.d_noise) 36 | 37 | def encode(self, x): 38 | h = self.fc1(x) 39 | mu = self.fc2(h) 40 | logvar = self.fc3(h) 41 | return mu, logvar 42 | 43 | def reparameterize(self, mu, logvar): 44 | if self.training: 45 | std = torch.exp(logvar / 2.0) 46 | eps = torch.randn_like(std) 47 | return mu + eps * std 48 | else: 49 | return mu 50 | 51 | def forward(self, x): 52 | mu, logvar = self.encode(x) 53 | latent_code = self.reparameterize(mu, logvar) 54 | return latent_code, mu, logvar 55 | 56 | 57 | class FgBgRegression(nn.Module): 58 | def __init__(self, opt): 59 | super(FgBgRegression, self).__init__() 60 | self.regressor = nn.Sequential( 61 | nn.Linear(opt.d_model + opt.d_noise, 1024), 62 | nn.ReLU(True), 63 | nn.Dropout(0.5), 64 | nn.Linear(1024, 1024), 65 | nn.ReLU(True), 66 | nn.Dropout(0.5), 67 | nn.Linear(1024, 3), 68 | ) 69 | 70 | def forward(self, x): 71 | out = self.regressor(x) 72 | return out 73 | 74 | 75 | class FgBgLayer(nn.Module): 76 | def __init__(self, opt, n_mesh): 77 | super(FgBgLayer, self).__init__() 78 | self.features = nn.Sequential( 79 | nn.Conv2d(opt.d_model, 512, kernel_size=3, stride=1, padding=1), 80 | nn.BatchNorm2d(512, momentum=BN_MOMENTUM), 81 | nn.ReLU(inplace=True), 82 | nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), 83 | nn.BatchNorm2d(512, momentum=BN_MOMENTUM), 84 | nn.ReLU(inplace=True), 85 | nn.Conv2d(512, opt.d_model, kernel_size=3, stride=1, padding=1), 86 | nn.BatchNorm2d(opt.d_model, momentum=BN_MOMENTUM), 87 | nn.ReLU(inplace=True) 88 | ) 89 | self.pool = nn.AdaptiveAvgPool2d((n_mesh, n_mesh)) 90 | 91 | def forward(self, x): 92 | feats = self.features(x) 93 | pooled_feats = self.pool(feats) 94 | nodes = pooled_feats.view(pooled_feats.shape[0], pooled_feats.shape[1], -1).transpose(1, 2).contiguous() 95 | return nodes 96 | 97 | 98 | class FgBgHead(nn.Module): 99 | def __init__(self, opt, n_mesh_list): 100 | super(FgBgHead, self).__init__() 101 | self.layers = nn.ModuleList([FgBgLayer(opt, n_mesh) for n_mesh in n_mesh_list]) 102 | 103 | def forward(self, x): 104 | node_list = [] 105 | for layer in self.layers: 106 | nodes = layer(x) 107 | node_list.append(nodes) 108 | return torch.cat(node_list, dim=1) 109 | 110 | 111 | class ScaledDotProductAttention(nn.Module): 112 | def __init__(self, opt): 113 | super(ScaledDotProductAttention, self).__init__() 114 | self.opt = opt 115 | self.pos_k = nn.Embedding(opt.n_heads * opt.len_k, opt.d_k) 116 | self.pos_v = nn.Embedding(opt.n_heads * opt.len_k, opt.d_v) 117 | self.pos_ids = torch.LongTensor(list(range(opt.n_heads * opt.len_k))).view(1, opt.n_heads, opt.len_k) 118 | 119 | def forward(self, Q, K, V): 120 | K_pos = self.pos_k(self.pos_ids.cuda()) 121 | V_pos = self.pos_v(self.pos_ids.cuda()) 122 | scores = torch.matmul(Q, (K + K_pos).transpose(-1, -2)) / np.sqrt(self.opt.d_k) 123 | attn = nn.Softmax(dim=-1)(scores) 124 | context = torch.matmul(attn, V + V_pos) 125 | return context, attn 126 | 127 | 128 | class MultiHeadAttention(nn.Module): 129 | def __init__(self, opt): 130 | super(MultiHeadAttention, self).__init__() 131 | self.opt = opt 132 | self.W_Q = nn.Linear(opt.d_model, opt.d_k * opt.n_heads) 133 | self.W_K = nn.Linear(opt.d_model, opt.d_k * opt.n_heads) 134 | self.W_V = nn.Linear(opt.d_model, opt.d_v * opt.n_heads) 135 | self.att = ScaledDotProductAttention(opt) 136 | self.W_O = nn.Linear(opt.n_heads * opt.d_v, opt.d_model) 137 | self.norm = nn.LayerNorm(opt.d_model) 138 | 139 | def forward(self, Q, K, V): 140 | residual, batch_size = Q, Q.size(0) 141 | q_s = self.W_Q(Q).view(batch_size, -1, self.opt.n_heads, self.opt.d_k).transpose(1,2) 142 | k_s = self.W_K(K).view(batch_size, -1, self.opt.n_heads, self.opt.d_k).transpose(1,2) 143 | v_s = self.W_V(V).view(batch_size, -1, self.opt.n_heads, self.opt.d_v).transpose(1,2) 144 | context, attn = self.att(q_s, k_s, v_s) 145 | context = context.transpose(1, 2).contiguous().view(batch_size, -1, self.opt.n_heads * self.opt.d_v) 146 | output = self.W_O(context) 147 | return self.norm(output + residual), attn 148 | 149 | 150 | class FgBgAttention(nn.Module): 151 | def __init__(self, opt): 152 | super(FgBgAttention, self).__init__() 153 | self.att = MultiHeadAttention(opt) 154 | 155 | def forward(self, fg_feats, bg_feats): 156 | output, attn = self.att(fg_feats, bg_feats, bg_feats) 157 | return output, attn 158 | -------------------------------------------------------------------------------- /network_placenet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torchvision.models as models 6 | 7 | 8 | class Encoder(nn.Module): 9 | def __init__(self): 10 | super(Encoder, self).__init__() 11 | self.net = nn.Sequential(*list(models.resnet18(pretrained=True).children())[:-1]) 12 | 13 | def forward(self, x): 14 | output = self.net(x).view(x.size(0), -1) 15 | return output 16 | -------------------------------------------------------------------------------- /network_terse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torchvision.models as models 6 | 7 | 8 | BN_MOMENTUM = 0.1 9 | 10 | 11 | def split_branch(opt): 12 | return nn.Sequential( 13 | nn.Conv2d(opt.d_model, 256, kernel_size=3, stride=1, padding=0), 14 | nn.BatchNorm2d(256, momentum=BN_MOMENTUM), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=0), 17 | nn.BatchNorm2d(256, momentum=BN_MOMENTUM), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(256, opt.d_branch, kernel_size=3, stride=1, padding=0), 20 | nn.BatchNorm2d(opt.d_branch, momentum=BN_MOMENTUM), 21 | nn.ReLU(inplace=True), 22 | nn.MaxPool2d(kernel_size=2, stride=2) 23 | ) 24 | 25 | 26 | class RegressionFC(nn.Module): 27 | def __init__(self, opt): 28 | super(RegressionFC, self).__init__() 29 | self.features = nn.Sequential( 30 | nn.Conv2d(opt.d_branch * 2, 64, kernel_size=5, padding=2), 31 | nn.ReLU(inplace=True), 32 | nn.BatchNorm2d(64, momentum=BN_MOMENTUM), 33 | nn.Conv2d(64, 64, kernel_size=5, padding=2), 34 | nn.ReLU(inplace=True), 35 | nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 36 | ) 37 | self.regressor = nn.Sequential( 38 | nn.Linear(opt.dim_fc, 128), 39 | nn.ReLU(inplace=True), 40 | nn.BatchNorm1d(128, momentum=BN_MOMENTUM), 41 | nn.Linear(128, 128), 42 | nn.ReLU(inplace=True), 43 | nn.BatchNorm1d(128, momentum=BN_MOMENTUM), 44 | nn.Linear(128, 3), 45 | ) 46 | 47 | def forward(self, x): 48 | feats = self.features(x) 49 | feats_flat = feats.view(feats.shape[0], -1) 50 | out = self.regressor(feats_flat) 51 | return out 52 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cffi 2 | cython 3 | easydict 4 | einops 5 | imageio 6 | lpips==0.1.4 7 | matplotlib 8 | msgpack 9 | opencv-python 10 | pyyaml==5.1 11 | scipy==1.2.1 12 | tensorboard_logger 13 | tensorboardX 14 | torchsummary 15 | tqdm -------------------------------------------------------------------------------- /result/README.md: -------------------------------------------------------------------------------- 1 | This directory stores the trained models, logs, and evaluation results. If you have performed a expeiment with ```--expid ```, you will find a folder named ```YOUR_EXPERIMENT_NAME``` under this directory. 2 | 3 | If you want to infer our provided models, please 1) download the corresponding ```.zip``` file given in the model zoo, 2) place it under this directory and uncompress it. -------------------------------------------------------------------------------- /script/eval_acc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Calculating Accuracy (plausibility) ### 4 | 5 | ### START USAGE ### 6 | # sh script/eval_acc.sh ${EXPID} ${EPOCH} ${BINARY_CLASSIFIER} 7 | ### END USAGE ### 8 | 9 | EXPID=$1 10 | EPOCH=$2 11 | BINARY_CLASSIFIER=$3 12 | 13 | cd faster-rcnn 14 | python generate_tsv.py --expid ${EXPID} --epoch ${EPOCH} --eval_type "eval" --cuda 15 | python convert_data.py --expid ${EXPID} --epoch ${EPOCH} --eval_type "eval" 16 | cd .. 17 | python eval/simopa_acc.py --checkpoint ${BINARY_CLASSIFIER} --expid ${EXPID} --epoch ${EPOCH} --eval_type "eval" 18 | 19 | ### Uncomment the following lines if you would like to delete faster-rcnn intermediate results ### 20 | # rm result/${EXPID}/eval/${EPOCH}/eval_roiinfos.csv 21 | # rm result/${EXPID}/eval/${EPOCH}/eval_fgfeats.npy 22 | # rm result/${EXPID}/eval/${EPOCH}/eval_scores.npy 23 | # rm result/${EXPID}/eval/${EPOCH}/eval_feats.npy 24 | # rm result/${EXPID}/eval/${EPOCH}/eval_bboxes.npy 25 | -------------------------------------------------------------------------------- /script/eval_fid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Calculating FID (plausibility) ### 4 | 5 | ### START USAGE ### 6 | # sh script/eval_fid.sh ${EXPID} ${EPOCH} ${FID_GT_IMGS} 7 | ### END USAGE ### 8 | 9 | EXPID=$1 10 | EPOCH=$2 11 | FID_GT_IMGS=$3 12 | 13 | python eval/fid_resize299.py --expid ${EXPID} --epoch ${EPOCH} --eval_type "eval" 14 | python eval/fid_score.py result/${EXPID}/eval/${EPOCH}/images299/ ${FID_GT_IMGS} --expid ${EXPID} --epoch ${EPOCH} --eval_type "eval" 15 | -------------------------------------------------------------------------------- /script/eval_lpips.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Calculating Variety LPIPS (diversity) ### 4 | 5 | ### START USAGE ### 6 | # sh script/eval_lpips.sh ${EXPID} ${EPOCH} 7 | ### END USAGE ### 8 | 9 | EXPID=$1 10 | EPOCH=$2 11 | 12 | python eval/lpips_1dir.py -d result/${EXPID}/evaluni/${EPOCH}/images/ --expid ${EXPID} --epoch ${EPOCH} --eval_type "evaluni" --repeat 10 --use_gpu 13 | -------------------------------------------------------------------------------- /tool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/GracoNet-Object-Placement/d7042ee94198d53eef8764f55c428873efd1c586/tool/__init__.py -------------------------------------------------------------------------------- /tool/preprocess.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | import os 4 | import sys 5 | from PIL import Image 6 | from torchvision import transforms 7 | 8 | sys.path.append(os.getcwd()) 9 | from loader import get_dataset 10 | 11 | 12 | def preprocess(data_root): 13 | # calculate the mapping from foreground id to category name 14 | fg_id_to_catnm = {} 15 | root = os.path.join(data_root, 'foreground') 16 | for catnm in os.listdir(root): 17 | for filename in os.listdir(os.path.join(root, catnm)): 18 | if filename.startswith('mask'): 19 | continue 20 | id = filename.split('.')[0] 21 | assert (id not in fg_id_to_catnm) 22 | fg_id_to_catnm[id] = catnm 23 | 24 | # calculate set of background ids for each category 25 | bg_catnm_idset = {} 26 | root = os.path.join(data_root, 'background') 27 | for catnm in os.listdir(root): 28 | for filename in os.listdir(os.path.join(root, catnm)): 29 | id = filename.split('.')[0] 30 | try: 31 | bg_catnm_idset[catnm].add(id) 32 | except: 33 | bg_catnm_idset[catnm] = set([id]) 34 | 35 | # obtain the transformed training data 36 | with open(os.path.join(data_root, 'train_data.csv'), 'w') as g: 37 | g.write('imgID,annID,scID,bbox,scale,catnm,position,label,new_img_path,new_msk_path\n') 38 | csv_data = csv.DictReader(open(os.path.join(data_root, 'train_set.csv'), 'r')) 39 | for i, row in enumerate(csv_data): 40 | assert (row['fg_id'] in fg_id_to_catnm) 41 | catnm = fg_id_to_catnm[row['fg_id']] 42 | assert (catnm in bg_catnm_idset and row['bg_id'] in bg_catnm_idset[catnm]) 43 | res = [ 44 | str(i), row['fg_id'], row['bg_id'], '"' + row['position'] + '"', 45 | row['scale'], catnm, "-1", 46 | row['label'], row['img_name'][8:], row['mask_name'][8:] 47 | ] 48 | g.write(','.join(res) + '\n') 49 | 50 | # obtain the transformed test data 51 | with open(os.path.join(data_root, 'test_data.csv'), 'w') as g: 52 | g.write('imgID,annID,scID,bbox,scale,catnm,position,label,new_img_path,new_msk_path\n') 53 | csv_data = csv.DictReader(open(os.path.join(data_root, 'test_set.csv'), 'r')) 54 | for i, row in enumerate(csv_data): 55 | assert (row['fg_id'] in fg_id_to_catnm) 56 | catnm = fg_id_to_catnm[row['fg_id']] 57 | assert (catnm in bg_catnm_idset and row['bg_id'] in bg_catnm_idset[catnm]) 58 | res = [ 59 | str(i), row['fg_id'], row['bg_id'], '"' + row['position'] + '"', 60 | row['scale'], catnm, "-1", 61 | row['label'], row['img_name'][8:], row['mask_name'][8:] 62 | ] 63 | g.write(','.join(res) + '\n') 64 | 65 | # obtain the training data with only positive labels 66 | with open(os.path.join(data_root, 'train_data.csv'), 'r') as f: 67 | lines = f.readlines() 68 | with open(os.path.join(data_root, 'train_data_pos.csv'), 'w') as g: 69 | for i, line in enumerate(lines): 70 | if i != 0 and int(line.split(',')[10]) == 0: 71 | continue 72 | g.write(line) 73 | 74 | # obtain the test data with only positive labels 75 | with open(os.path.join(data_root, 'test_data.csv'), 'r') as f: 76 | lines = f.readlines() 77 | with open(os.path.join(data_root, 'test_data_pos.csv'), 'w') as g: 78 | for i, line in enumerate(lines): 79 | if i != 0 and int(line.split(',')[10]) == 0: 80 | continue 81 | g.write(line) 82 | 83 | # obtain the test data with only positive labels, 84 | # and filter out repetitive samples with the same foregroud/background pairs 85 | with open(os.path.join(data_root, 'test_data_pos.csv'), 'r') as f: 86 | lines = f.readlines() 87 | fgbg_set = set([]) 88 | with open(os.path.join(data_root, 'test_data_pos_unique.csv'), 'w') as g: 89 | for i, line in enumerate(lines): 90 | if i == 0: 91 | g.write(line) 92 | continue 93 | line_list = line.split(',') 94 | annID, scID = line_list[1], line_list[2] 95 | fgbg_mark = annID + '_' + scID 96 | if fgbg_mark in fgbg_set: 97 | continue 98 | fgbg_set.add(fgbg_mark) 99 | g.write(line) 100 | 101 | # obtain the ground-truth positive composite images that are resized to size 299, 102 | # which are used to calculate FID scores during evaluation 103 | output_dir_299 = os.path.join(data_root, "com_pic_testpos299") 104 | if not os.path.exists(output_dir_299): 105 | os.makedirs(output_dir_299) 106 | eval_dataset = get_dataset("OPABasicDataset", image_size=None, mode_type="eval", data_root=data_root) 107 | for i in range(len(eval_dataset)): 108 | index_, annid, scid, bbox, scale, label, catnm, bg_img, fg_img, fg_msk, comp_img, comp_msk = eval_dataset[i] 109 | comp_img_299 = transforms.Resize((299, 299), interpolation=Image.BILINEAR)(comp_img) 110 | comp_img_299.save(os.path.join(output_dir_299, '{}.jpg'.format(index_))) 111 | 112 | 113 | if __name__ == '__main__': 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument("--data_root", type=str, default="OPA", help="dataset root") 116 | opt = parser.parse_args() 117 | preprocess(opt.data_root) 118 | -------------------------------------------------------------------------------- /tool/summarize.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import datetime 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--expid", type=str, required=True, help="experiment name") 7 | parser.add_argument("--eval_type", type=str, default="eval", help="evaluation type") 8 | args = parser.parse_args() 9 | 10 | if args.eval_type == "eval": 11 | sumup_list = ['acc', 'fid'] 12 | elif args.eval_type == "evaluni": 13 | sumup_list = ['lpips_variety'] 14 | else: 15 | raise NotImplementedError 16 | 17 | eval_dir = os.path.join('result', args.expid, args.eval_type) 18 | assert (os.path.exists(eval_dir)) 19 | 20 | dt_ms = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') 21 | div_title = '=' * 100 22 | div_line = '-' * 43 23 | div_record = '-' * 95 24 | 25 | eval_resall_file = os.path.join('result', args.expid, '{}_resall.txt'.format(args.eval_type)) 26 | with open(eval_resall_file, 'w') as _out: 27 | _out.write("{}\nStatistic Time: {}\n".format(div_title, dt_ms)) 28 | eps = sorted(list(map(int, os.listdir(eval_dir)))) 29 | for ep in eps: 30 | _out.write("{} Epoch {} {}\n".format(div_line, ep, div_line)) 31 | for sumup_item in sumup_list: 32 | res_file = os.path.join(eval_dir, str(ep), '{}_{}.txt'.format(args.eval_type, sumup_item)) 33 | if os.path.exists(res_file): 34 | with open(res_file, 'r') as f: 35 | res_ep = f.read() 36 | _out.write("{}\n".format(res_ep.rstrip('\n'))) 37 | else: 38 | _out.write("{}\n".format("Skipping {} ...".format(sumup_item))) 39 | _out.write("{}\n".format(div_record)) 40 | _out.write("{}\n\n\n".format(div_title)) 41 | -------------------------------------------------------------------------------- /tool/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import datetime 4 | from PIL import Image 5 | import torch 6 | 7 | 8 | class AverageMeter(): 9 | """Computes and stores the average and current value""" 10 | def __init__(self): 11 | self.reset() 12 | 13 | def reset(self): 14 | self.val = 0 15 | self.avg = 0 16 | self.sum = 0 17 | self.count = 0 18 | 19 | def update(self, val, n=1): 20 | self.val = val 21 | self.sum += val * n 22 | self.count += n 23 | self.avg = self.sum / self.count 24 | 25 | 26 | def make_logger(log_file): 27 | logger = logging.getLogger() 28 | logger.setLevel(logging.INFO) 29 | 30 | logfile = log_file 31 | fh = logging.FileHandler(logfile) 32 | fh.setLevel(logging.DEBUG) 33 | 34 | ch = logging.StreamHandler() 35 | ch.setLevel(logging.INFO) 36 | 37 | formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s") 38 | fh.setFormatter(formatter) 39 | ch.setFormatter(formatter) 40 | 41 | logger.addHandler(fh) 42 | logger.addHandler(ch) 43 | logger.info('logfile = {}'.format(logfile)) 44 | return logger 45 | 46 | 47 | def make_dirs(save_dir): 48 | is_old_exp = os.path.exists(save_dir) 49 | 50 | model_dir = os.path.join(save_dir, 'models') 51 | sample_dir = os.path.join(save_dir, 'sample') 52 | tblog_dir = os.path.join(save_dir, 'tblog') 53 | log_path = os.path.join(save_dir, 'log-{}'.format(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f'))) 54 | 55 | if not is_old_exp: 56 | os.makedirs(save_dir) 57 | os.mkdir(model_dir) 58 | os.mkdir(sample_dir) 59 | os.mkdir(tblog_dir) 60 | 61 | return { 62 | 'save_dir': save_dir, 63 | 'model_dir': model_dir, 64 | 'sample_dir': sample_dir, 65 | 'tblog_dir': tblog_dir, 66 | 'log_path': log_path 67 | }, is_old_exp 68 | 69 | 70 | def save(model_dir, model, opt, logger=None): 71 | if not os.path.exists(model_dir): 72 | os.makedirs(model_dir) 73 | sav_path = os.path.join(model_dir, '{}.pth'.format(opt.epoch)) 74 | if logger is None: 75 | print("=> saving checkpoint to '{}'".format(sav_path)) 76 | else: 77 | logger.info("=> saving checkpoint to '{}'".format(sav_path)) 78 | 79 | torch.save({ 80 | 'epoch': opt.epoch, 81 | 'model': model.state_dict(), 82 | 'opt': opt, 83 | 'optimizer': model.optimizer_dict(), 84 | }, sav_path) 85 | 86 | 87 | def resume(path, model, resume_list, strict=False, logger=None): 88 | if path is None: 89 | return model, 0 90 | 91 | assert (os.path.exists(path)) 92 | if logger is None: 93 | print("=> loading {} from checkpoint '{}' with strict={}".format(resume_list, path, strict)) 94 | else: 95 | logger.info("=> loading {} from checkpoint '{}' with strict={}".format(resume_list, path, strict)) 96 | checkpoint = torch.load(path) 97 | 98 | pretrained_model_dict = checkpoint['model'] 99 | model_dict = model.state_dict() 100 | for k in pretrained_model_dict: 101 | if k in resume_list: 102 | model_dict[k].update(pretrained_model_dict[k]) 103 | model.load_state_dict(model_dict, strict=strict) 104 | 105 | pretrained_opt_dict = checkpoint['optimizer'] 106 | opt_dict = model.optimizer_dict() 107 | for k in pretrained_opt_dict: 108 | if k in resume_list: 109 | opt_dict[k].update(pretrained_opt_dict[k]) 110 | model.load_opt_state_dict(opt_dict) 111 | 112 | epoch = checkpoint['epoch'] 113 | return model, epoch 114 | --------------------------------------------------------------------------------