├── .gitignore ├── LICENSE ├── README.md └── viewpoint_optim ├── IL ├── evaluate.py ├── expert_traj.py ├── main.py └── pointnet.py ├── LICENSE ├── README.md ├── RL_CNN ├── cnn.py ├── environment.py └── main.py ├── RL_pointnet ├── environment.py ├── evaluate.py ├── main.py └── pointnet.py ├── __init__.py ├── modules ├── __init__.py ├── bn.py ├── deeplab.py ├── dense.py ├── functions.py ├── misc.py ├── residual.py └── src │ ├── common.h │ ├── inplace_abn.cpp │ ├── inplace_abn.h │ ├── inplace_abn_cpu.cpp │ └── inplace_abn_cuda.cu ├── segmodel.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .idea/ 6 | *.zip 7 | shapenetcore_partanno_segmentation_benchmark_v0/* 8 | logs/ 9 | trained_models/ 10 | a2c/models 11 | *.png 12 | *.jpg 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # SageMath parsed files 90 | *.sage.py 91 | 92 | # Environments 93 | .env 94 | .venv 95 | env/ 96 | venv/ 97 | ENV/ 98 | env.bak/ 99 | venv.bak/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | .spyproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Xiangyu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transferable-Active-Grasping 2 | Transferable Active Grasping and Real Embodied Dataset (ICRA 2020) 3 | 4 | We will release the code and dataset soon. 5 | -------------------------------------------------------------------------------- /viewpoint_optim/IL/evaluate.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import argparse 5 | import os 6 | import sys 7 | import time 8 | 9 | import torch 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | from torch.autograd import Variable 13 | 14 | from environment import ActiveAgent 15 | from pointnet import * 16 | from utils import setup_logger 17 | 18 | 19 | class CB(nn.Module): 20 | def __init__(self, num_points=3000, output=5): 21 | super(CB, self).__init__() 22 | self.num_points = num_points 23 | self.feat = PointNetfeat(num_points, global_feat=True) 24 | self.fc = end_layer(in_channels=1024, out_channels=128) 25 | 26 | self.fc11 = nn.Linear(128, 64) 27 | self.fc12 = nn.Linear(64, 32) 28 | self.fc13 = nn.Linear(32, 16) 29 | self.fc1 = nn.Linear(16, 8) 30 | self.fc2 = nn.Linear(8, output) 31 | 32 | self.apply(weights_init) 33 | self.train() 34 | 35 | def forward(self, x): 36 | x, _ = self.feat(x) 37 | x = F.relu(self.fc(x)) 38 | x = F.relu(self.fc11(x)) 39 | x = F.relu(self.fc12(x)) 40 | x = F.relu(self.fc13(x)) 41 | x = F.relu(self.fc1(x)) 42 | x = self.fc2(x) 43 | 44 | return x 45 | 46 | 47 | parser = argparse.ArgumentParser(description='A2C') 48 | parser.add_argument('--lr', type=float, default=0.0001, 49 | help='learning rate (default: 0.0001)') 50 | parser.add_argument('--hidden-size', type=int, default=1024, 51 | help='Hidden size for LSTM') 52 | parser.add_argument('--gamma', type=float, default=0.99, 53 | help='discount factor for rewards (default: 0.99)') 54 | parser.add_argument('--tau', type=float, default=1.00, 55 | help='parameter for GAE (default: 1.00)') 56 | parser.add_argument('--entropy-coef', type=float, default=0.01, 57 | help='entropy term coefficient (default: 0.01)') 58 | parser.add_argument('--value-loss-coef', type=float, default=0.5, 59 | help='value loss coefficient (default: 0.5)') 60 | parser.add_argument('--max-grad-norm', type=float, default=20, 61 | help='value loss coefficient (default: 50)') 62 | parser.add_argument('--seed', type=int, default=456, 63 | help='random seed (default: 1)') 64 | parser.add_argument('--num-steps', type=int, default=20, 65 | help='number of forward steps in A2C (default: 20)') 66 | parser.add_argument('--max-episode-length', type=int, default=50, 67 | help='maximum length of an episode (default: 1000000)') 68 | parser.add_argument('--env-name', default='PointNetActorCritic', 69 | help='environment to train on') 70 | parser.add_argument('--no-shared', default=False, 71 | help='use an optimizer without shared momentum.') 72 | parser.add_argument('--n-points', type=int, default=3000, 73 | help='the number of points feed to pointnet') 74 | parser.add_argument('--log-dir', type=str, default='logs', 75 | help='Folder to save logs') 76 | parser.add_argument('--model-dir', type=str, default='trained_models', 77 | help='Folder to save models') 78 | parser.add_argument('--data-dir', type=str, default='data', 79 | help='Folder to IORD') 80 | parser.add_argument('--resume', default=True, 81 | help='resume latest model or not') 82 | parser.add_argument('--num-actions', type=int, default=5, 83 | help='discrete action space') 84 | parser.add_argument('--num-test', type=int, default=50, 85 | help='test time') 86 | parser.add_argument('--min', type=bool, default=True, 87 | help='use min-vis or not') 88 | 89 | # segmentation settings 90 | parser.add_argument("--depth-fusion", type=str, default='no-depth', 91 | choices=['no-depth', 'pixel-concat', 'feature-concat']) 92 | parser.add_argument("--vote-mode", metavar="NAME", 93 | type=str, choices=["plain", "mean", "voting", "max", 94 | "mean+flip", "voting+flip", "max+flip"], default="mean") 95 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2]) 96 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"], 97 | default="class", 98 | help="How the output files are formatted." 99 | " -- palette: color coded predictions" 100 | " -- raw: gray-scale predictions" 101 | " -- prob: gray-scale predictions plus probabilities") 102 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load") 103 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model") 104 | 105 | 106 | if __name__ == '__main__': 107 | args = parser.parse_args() 108 | if not os.path.isdir(args.log_dir): 109 | os.makedirs(args.log_dir) 110 | 111 | torch.manual_seed(args.seed) 112 | np.random.seed(args.seed) 113 | 114 | model = CB() 115 | model = model.cuda() 116 | env = ActiveAgent(idx=0, n_points=args.n_points, 117 | seg_args=args, mode='sim', mode='semantic') 118 | env.seed(args.seed) 119 | 120 | # resume latest model 121 | if args.resume: 122 | model_path = os.path.join(args.model_dir, 'latest.pth') 123 | if not os.path.isdir(args.model_dir): 124 | os.makedirs(args.model_dir) 125 | elif os.path.exists(model_path): 126 | print('Loading model from %s...' % model_path) 127 | model.load_state_dict(torch.load(model_path)) 128 | 129 | itr = 0 130 | epoch = 0 131 | training_time = 50 132 | # train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt')) 133 | # test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt')) 134 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 135 | 136 | # test parameters 137 | all_success_time = 0 138 | all_time = 0 139 | ep_success_time = 0 140 | success_phase = 0.1 141 | check_flag = False 142 | 143 | for _ in range(5): 144 | epoch += 1 145 | ################### testing phase ################### 146 | model = model.eval() 147 | 148 | state, _ = env.reset(min_vis=args.min) 149 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 150 | if torch.cuda.is_available(): 151 | state = state.cuda() 152 | reward_sum = 0 153 | done = True 154 | 155 | episode_length = 0 156 | testing = True 157 | while testing: 158 | episode_length += 1 159 | 160 | with torch.no_grad(): 161 | logit = model(state) 162 | prob = F.softmax(logit, dim=1) 163 | action = prob.max(1, keepdim=True)[1].data.cpu().numpy() 164 | 165 | # path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0]) 166 | # test_logger.info(path_info) 167 | 168 | state, reward, done = env.step(action[0, 0]) 169 | reward_sum += reward 170 | 171 | if done: 172 | # print('testing: ', all_time) 173 | success = env.end_flag 174 | all_success_time += success 175 | ep_success_time += success 176 | all_time += 1 177 | if all_time % args.num_test == 0: 178 | check_flag = True 179 | 180 | state, _ = env.reset(min_vis=args.min) 181 | 182 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 183 | if torch.cuda.is_available(): 184 | state = state.cuda() 185 | 186 | if check_flag: 187 | all_success_rate = all_success_time / all_time 188 | log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.4f, ALL Success: %0.4f' \ 189 | % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate) 190 | # test_logger.info(log_info) 191 | print(log_info) 192 | 193 | reward_sum = 0 194 | episode_length = 0 195 | ep_success_time = 0 196 | check_flag = False 197 | testing = False 198 | 199 | time.sleep(1) -------------------------------------------------------------------------------- /viewpoint_optim/IL/expert_traj.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import argparse 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | import os 9 | import sys 10 | 11 | # for PCD 12 | # sys.path.append('..') 13 | # from environment import ActiveAgent 14 | 15 | # for CNN 16 | sys.path.append('../a2cCNN') 17 | from environment import ActiveAgent 18 | 19 | from pointnet import PointNetActorCritic 20 | from utils import setup_logger 21 | 22 | 23 | def init_parser(): 24 | parser = argparse.ArgumentParser(description='expert_traj') 25 | 26 | parser.add_argument('--model-dir', type=str, default='trained_models', 27 | help='Folder to expert models') 28 | parser.add_argument('--mode', type=str, default='pointnet', 29 | help='Feature extraction mode') 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def collect_expert_traj(model_path='latest.pth', mode='pointnet'): 36 | hidden_size = 1024 37 | n_traj = 100 38 | 39 | env = ActiveAgent(idx=666, n_points=3000) 40 | env.seed(456) 41 | logger = setup_logger('test', 'logs/expert_traj.txt') 42 | expert_traj = [] 43 | traj = [] 44 | 45 | model = PointNetActorCritic(num_points=env.n_points, num_actions=env.n_actions) 46 | model.load_state_dict(torch.load(model_path)) 47 | model.eval() 48 | if torch.cuda.is_available(): 49 | model = model.cuda() 50 | 51 | state, _ = env.reset(verbose=True) 52 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 53 | if torch.cuda.is_available(): 54 | state = state.cuda() 55 | reward_sum = 0 56 | done = True 57 | episode_length = 0 58 | 59 | while True: 60 | episode_length += 1 61 | # Sync with the shared model 62 | if done: 63 | with torch.no_grad(): 64 | cx = torch.zeros(1, hidden_size) 65 | hx = torch.zeros(1, hidden_size) 66 | else: 67 | with torch.no_grad(): 68 | cx = cx.data 69 | hx = hx.data 70 | if torch.cuda.is_available(): 71 | hx = hx.cuda() 72 | cx = cx.cuda() 73 | 74 | with torch.no_grad(): 75 | value, logit, (hx, cx) = model((state, (hx, cx))) 76 | prob = F.softmax(logit, dim=1) 77 | action = prob.max(1, keepdim=True)[1].data.cpu().numpy() 78 | 79 | path_info = '%s %s %s %d' % (env.target_group, env.target_scene, env.coord, action[0, 0]) 80 | logger.info(path_info) 81 | print(path_info) 82 | traj.append((state.data.cpu().numpy(), action)) 83 | 84 | state, reward, done = env.step(action[0, 0]) 85 | reward_sum += reward 86 | 87 | if done: 88 | success = env.end_flag 89 | state, _ = env.reset() 90 | 91 | # collect an expert trajectory 92 | if success and episode_length <= 20: 93 | log_info = 'Traj %d, episode_length %d, reward %0.2f' \ 94 | % (n_traj, episode_length, reward_sum) 95 | logger.info(log_info) 96 | print(log_info) 97 | expert_traj.append(traj) 98 | n_traj -= 1 99 | 100 | traj = [] 101 | episode_length = 0 102 | reward_sum = 0 103 | 104 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 105 | if torch.cuda.is_available(): 106 | state = state.cuda() 107 | 108 | if not n_traj: 109 | break 110 | 111 | # save expert trajectory 112 | with open('expert_traj_%s.pkl' % mode, 'wb') as f: 113 | pickle.dump(expert_traj, f) 114 | 115 | 116 | if __name__ == '__main__': 117 | args = init_parser() 118 | collect_expert_traj(model_path=args.model_dir, mode=args.mode) -------------------------------------------------------------------------------- /viewpoint_optim/IL/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | 8 | import argparse 9 | import os 10 | import pickle 11 | import random 12 | import sys 13 | sys.path.append('/media/sdc/seg3d/T-Pointnet') 14 | sys.path.append('/media/sdc/seg3d/T-Pointnet/a2c') 15 | 16 | from pointnet import * 17 | from environment import ActiveAgent 18 | from utils import setup_logger 19 | 20 | 21 | class CB(nn.Module): 22 | def __init__(self, num_points=3000, output=5): 23 | super(CB, self).__init__() 24 | self.num_points = num_points 25 | self.feat = PointNetfeat(num_points, global_feat=True) 26 | self.fc = end_layer(in_channels=1024, out_channels=128) 27 | 28 | self.fc11 = nn.Linear(128, 64) 29 | self.fc12 = nn.Linear(64, 32) 30 | self.fc13 = nn.Linear(32, 16) 31 | self.fc1 = nn.Linear(16, 8) 32 | self.fc2 = nn.Linear(8, output) 33 | 34 | self.apply(weights_init) 35 | self.train() 36 | 37 | def forward(self, x): 38 | x, _ = self.feat(x) 39 | x = F.relu(self.fc(x)) 40 | x = F.relu(self.fc11(x)) 41 | x = F.relu(self.fc12(x)) 42 | x = F.relu(self.fc13(x)) 43 | x = F.relu(self.fc1(x)) 44 | x = self.fc2(x) 45 | 46 | return x 47 | 48 | 49 | class CNNCB(nn.Module): 50 | def __init__(self, output=5): 51 | super(CNNCB, self).__init__() 52 | self.feat = CNNfeat() 53 | self.fc = end_layer(in_channels=1024, out_channels=128) 54 | 55 | self.fc11 = nn.Linear(128, 64) 56 | self.fc12 = nn.Linear(64, 32) 57 | self.fc13 = nn.Linear(32, 16) 58 | self.fc1 = nn.Linear(16, 8) 59 | self.fc2 = nn.Linear(8, output) 60 | 61 | self.apply(weights_init) 62 | self.train() 63 | 64 | def forward(self, x): 65 | x = self.feat(x) 66 | x = F.relu(self.fc(x)) 67 | x = F.relu(self.fc11(x)) 68 | x = F.relu(self.fc12(x)) 69 | x = F.relu(self.fc13(x)) 70 | x = F.relu(self.fc1(x)) 71 | x = self.fc2(x) 72 | 73 | return x 74 | 75 | 76 | def para_setting(): 77 | # Training settings 78 | parser = argparse.ArgumentParser(description='A2C') 79 | parser.add_argument('--lr', type=float, default=0.0001, 80 | help='learning rate (default: 0.0001)') 81 | parser.add_argument('--hidden-size', type=int, default=1024, 82 | help='Hidden size for LSTM') 83 | parser.add_argument('--gamma', type=float, default=0.99, 84 | help='discount factor for rewards (default: 0.99)') 85 | parser.add_argument('--tau', type=float, default=1.00, 86 | help='parameter for GAE (default: 1.00)') 87 | parser.add_argument('--entropy-coef', type=float, default=0.01, 88 | help='entropy term coefficient (default: 0.01)') 89 | parser.add_argument('--value-loss-coef', type=float, default=0.5, 90 | help='value loss coefficient (default: 0.5)') 91 | parser.add_argument('--max-grad-norm', type=float, default=50, 92 | help='value loss coefficient (default: 50)') 93 | parser.add_argument('--seed', type=int, default=456, 94 | help='random seed (default: 1)') 95 | parser.add_argument('--num-steps', type=int, default=20, 96 | help='number of forward steps in A2C (default: 20)') 97 | parser.add_argument('--max-episode-length', type=int, default=20, 98 | help='maximum length of an episode (default: 1000000)') 99 | parser.add_argument('--env-name', default='PointNetActorCritic', 100 | help='environment to train on') 101 | parser.add_argument('--no-shared', default=False, 102 | help='use an optimizer without shared momentum.') 103 | parser.add_argument('--n-points', type=int, default=3000, 104 | help='the number of points feed to pointnet') 105 | parser.add_argument('--log-dir', type=str, default='logs', 106 | help='Folder to save logs') 107 | parser.add_argument('--model-dir', type=str, default='trained_models', 108 | help='Folder to save models') 109 | parser.add_argument('--data-dir', type=str, default='data', 110 | help='Folder to IORD') 111 | parser.add_argument('--resume', default=False, 112 | help='resume latest model or not') 113 | parser.add_argument('--num-actions', type=int, default=5, 114 | help='discrete action space') 115 | parser.add_argument('--num-test', type=int, default=10, 116 | help='test time') 117 | parser.add_argument('--feat-archi', type=bool, default='cnn', 118 | help='Feature extraction mode (pointnet or cnn)') 119 | 120 | # segmentation settings 121 | parser.add_argument("--depth-fusion", type=str, default='no-depth', 122 | choices=['no-depth', 'pixel-concat', 'feature-concat']) 123 | parser.add_argument("--vote-mode", metavar="NAME", 124 | type=str, choices=["plain", "mean", "voting", "max", 125 | "mean+flip", "voting+flip", "max+flip"], default="mean") 126 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2]) 127 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"], 128 | default="class", 129 | help="How the output files are formatted." 130 | " -- palette: color coded predictions" 131 | " -- raw: gray-scale predictions" 132 | " -- prob: gray-scale predictions plus probabilities") 133 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load") 134 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model") 135 | 136 | return parser 137 | 138 | 139 | if __name__ == '__main__': 140 | parser = para_setting() 141 | seg_args = init_seg_parser() 142 | if not os.path.isdir(args.log_dir): 143 | os.makedirs(args.log_dir) 144 | 145 | torch.manual_seed(args.seed) 146 | np.random.seed(args.seed) 147 | 148 | if args.feat_archi == 'pointnet': 149 | model = CB() 150 | elif args.feat_archi == 'cnn': 151 | model = CNNCB() 152 | criterion = nn.CrossEntropyLoss() 153 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 154 | 155 | model = model.cuda() 156 | criterion = criterion.cuda() 157 | 158 | env = ActiveAgent(idx=0, n_points=args.n_points, 159 | seg_args=args, mode='sim', root_path=args.data_dir) 160 | env.seed(args.seed) 161 | 162 | # resume latest model 163 | if args.resume: 164 | model_path = os.path.join(args.model_dir, 'latest.pth') 165 | if not os.path.isdir(args.model_dir): 166 | os.makedirs(args.model_dir) 167 | elif os.path.exists(model_path): 168 | print('Loading model from %s...' % model_path) 169 | model.load_state_dict(torch.load(model_path)) 170 | 171 | # expert 172 | with open('expert_traj.pkl', 'rb') as f: 173 | expert_traj_all = pickle.load(f) 174 | 175 | itr = 0 176 | epoch = 0 177 | training_time = 50 178 | train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt')) 179 | test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt')) 180 | 181 | # test parameters 182 | all_success_time = 0 183 | all_time = 0 184 | ep_success_time = 0 185 | success_phase = 0.1 186 | check_flag = False 187 | 188 | while True: 189 | epoch += 1 190 | ################### training phase ################### 191 | model = model.train() 192 | for train_itr in range(training_time): 193 | expert_traj = random.choice(expert_traj_all) 194 | 195 | grasp_expert = random.random() 196 | if grasp_expert < 0.3: 197 | expert_s, expert_a = expert_traj[-1] 198 | else: 199 | expert_s, expert_a = random.choice(expert_traj) 200 | expert_a = expert_a.squeeze(1) 201 | 202 | # expert_s = np.array([x[0] for x in expert_traj]).squeeze(1) 203 | # expert_a = np.array([x[1] for x in expert_traj]).squeeze(1).squeeze(1) 204 | 205 | logit = model(torch.from_numpy(expert_s).cuda()) 206 | itr += 1 207 | 208 | loss = criterion(logit, torch.from_numpy(expert_a).cuda()) 209 | print('behaviour cloning loss: ', loss.data.cpu().numpy()) 210 | train_logger.info('behaviour cloning loss: ' + str(loss.data.cpu().numpy())) 211 | optimizer.zero_grad() 212 | loss.backward() 213 | optimizer.step() 214 | 215 | ################### testing phase ################### 216 | model = model.eval() 217 | 218 | state, _ = env.reset() 219 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 220 | if torch.cuda.is_available(): 221 | state = state.cuda() 222 | reward_sum = 0 223 | done = True 224 | 225 | episode_length = 0 226 | testing = True 227 | while testing: 228 | episode_length += 1 229 | 230 | with torch.no_grad(): 231 | logit = model(state) 232 | prob = F.softmax(logit, dim=1) 233 | action = prob.max(1, keepdim=True)[1].data.cpu().numpy() 234 | 235 | # path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0]) 236 | # test_logger.info(path_info) 237 | 238 | state, reward, done = env.step(action[0, 0]) 239 | reward_sum += reward 240 | 241 | if done: 242 | success = env.end_flag 243 | all_success_time += success 244 | ep_success_time += success 245 | all_time += 1 246 | if all_time % args.num_test == 0: 247 | check_flag = True 248 | 249 | state, _ = env.reset() 250 | 251 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 252 | if torch.cuda.is_available(): 253 | state = state.cuda() 254 | 255 | if check_flag: 256 | all_success_rate = all_success_time / all_time 257 | log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.2f, ALL Success: %0.3f' \ 258 | % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate) 259 | test_logger.info(log_info) 260 | print(log_info) 261 | torch.save(model.state_dict(), os.path.join(args.model_dir, 'latest.pth')) 262 | 263 | # save models in some important phases 264 | if all_success_rate > success_phase: 265 | torch.save(model.state_dict(), 266 | os.path.join(args.model_dir, 'success_rate_%0.2f.pth' % success_phase)) 267 | success_phase += 0.1 268 | 269 | # save models according to steps 270 | if epoch % 20 == 0: 271 | torch.save(model.state_dict(), 272 | os.path.join(args.model_dir, 'model_%d.pth' % epoch)) 273 | 274 | reward_sum = 0 275 | episode_length = 0 276 | ep_success_time = 0 277 | check_flag = False 278 | testing = False -------------------------------------------------------------------------------- /viewpoint_optim/IL/pointnet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.parallel 7 | import torch.backends.cudnn as cudnn 8 | import torch.optim as optim 9 | import torch.utils.data 10 | import torchvision.transforms as transforms 11 | import torchvision.utils as vutils 12 | from torch.autograd import Variable 13 | from PIL import Image 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | import torch.nn.functional as F 17 | 18 | 19 | def weights_init(m): 20 | classname = m.__class__.__name__ 21 | if classname.find('Conv') != -1: 22 | weight_shape = list(m.weight.data.size()) 23 | fan_in = np.prod(weight_shape[1: 4]) 24 | fan_out = np.prod(weight_shape[2: 4]) * weight_shape[0] 25 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 26 | m.weight.data.uniform_(-w_bound, w_bound) 27 | m.bias.data.fill_(0.0) 28 | elif classname.find('Linear') != -1: 29 | weight_shape = list(m.weight.data.size()) 30 | fan_in = weight_shape[1] 31 | fan_out = weight_shape[0] 32 | w_bound = np.sqrt(6.0 / (fan_in + fan_out)) 33 | m.weight.data.uniform_(-w_bound, w_bound) 34 | m.bias.data.fill_(0.0) 35 | elif classname.find('BatchNorm') != -1: 36 | m.weight.data.fill_(1.0) 37 | m.bias.data.fill_(0.0) 38 | elif classname.find('LSTMCell') != -1: 39 | m.bias_ih.data.fill_(0.0) 40 | m.bias_hh.data.fill_(0.0) 41 | 42 | 43 | class STN3d(nn.Module): 44 | def __init__(self, num_points = 2500): 45 | super(STN3d, self).__init__() 46 | self.num_points = num_points 47 | self.conv1 = torch.nn.Conv1d(4, 64, 1) 48 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 49 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 50 | self.mp1 = torch.nn.MaxPool1d(num_points) 51 | self.fc1 = nn.Linear(1024, 512) 52 | self.fc2 = nn.Linear(512, 256) 53 | self.fc3 = nn.Linear(256, 9) 54 | self.relu = nn.ReLU() 55 | 56 | self.bn1 = nn.BatchNorm1d(64) 57 | self.bn2 = nn.BatchNorm1d(128) 58 | self.bn3 = nn.BatchNorm1d(1024) 59 | self.bn4 = nn.BatchNorm1d(512) 60 | self.bn5 = nn.BatchNorm1d(256) 61 | 62 | def forward(self, x): 63 | # x --> 3 * 3 64 | batchsize = x.shape[0] 65 | if batchsize > 1: 66 | x = F.relu(self.bn1(self.conv1(x))) 67 | x = F.relu(self.bn2(self.conv2(x))) 68 | x = F.relu(self.bn3(self.conv3(x))) 69 | x = self.mp1(x) 70 | x = x.view(-1, 1024) 71 | 72 | x = F.relu(self.bn4(self.fc1(x))) 73 | x = F.relu(self.bn5(self.fc2(x))) 74 | else: 75 | x = F.relu(self.conv1(x)) 76 | x = F.relu(self.conv2(x)) 77 | x = F.relu(self.conv3(x)) 78 | x = self.mp1(x) 79 | x = x.view(-1, 1024) 80 | 81 | x = F.relu(self.fc1(x)) 82 | x = F.relu(self.fc2(x)) 83 | 84 | x = self.fc3(x) 85 | 86 | iden = Variable(torch.eye(3)).view(1, -1).repeat(batchsize, 1) 87 | if x.is_cuda: 88 | device = torch.device('cuda:%d' % x.get_device()) 89 | iden = iden.to(device=device) 90 | x = x + iden 91 | x = x.view(-1, 3, 3) 92 | 93 | return x 94 | 95 | 96 | class PointNetfeat(nn.Module): 97 | def __init__(self, num_points=2500, global_feat=True): 98 | super(PointNetfeat, self).__init__() 99 | self.stn = STN3d(num_points=num_points) 100 | self.conv1 = torch.nn.Conv1d(4, 64, 1) 101 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 102 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 103 | self.bn1 = nn.BatchNorm1d(64) 104 | self.bn2 = nn.BatchNorm1d(128) 105 | self.bn3 = nn.BatchNorm1d(1024) 106 | self.mp1 = torch.nn.MaxPool1d(num_points) 107 | self.num_points = num_points 108 | self.global_feat = global_feat 109 | 110 | def forward(self, x): 111 | trans = self.stn(x) 112 | x = torch.cat([torch.bmm(trans, x[:, :3, :]), x[:, 3, :].unsqueeze(1)], dim=1) 113 | 114 | if x.shape[0] > 1: 115 | x = F.relu(self.bn1(self.conv1(x))) 116 | pointfeat = x 117 | x = F.relu(self.bn2(self.conv2(x))) 118 | x = self.bn3(self.conv3(x)) 119 | else: 120 | x = F.relu(self.conv1(x)) 121 | pointfeat = x 122 | x = F.relu(self.conv2(x)) 123 | x = self.conv3(x) 124 | 125 | x = self.mp1(x) 126 | x = x.view(-1, 1024) 127 | 128 | if self.global_feat: 129 | return x, trans 130 | else: 131 | x = x.view(-1, 1024, 1).repeat(1, 1, self.num_points) 132 | return torch.cat([x, pointfeat], 1), trans 133 | 134 | 135 | class CNNfeat(nn.Module): 136 | def __init__(self): 137 | super(CNNfeat, self).__init__() 138 | self.conv1 = nn.Sequential( # 960 * 640 139 | nn.Conv2d(in_channels=3, out_channels=64, kernel_size=10, 140 | stride=3, padding=0), # 317 * 211 141 | nn.ReLU(), 142 | ) 143 | self.conv2 = nn.Sequential( 144 | nn.Conv2d(64, 64, 5, 2, 0), # 157 * 104 145 | nn.ReLU(), 146 | ) 147 | self.conv3 = nn.Sequential( 148 | nn.Conv2d(64, 32, 3, 2, 0), # 78 * 51 149 | nn.ReLU(), 150 | # nn.MaxPool2d(kernel_size=2) # 19 * 25 151 | ) 152 | self.conv4 = nn.Sequential( 153 | nn.Conv2d(32, 16, 3, 2, 0), # 38 * 25 154 | nn.ReLU(), 155 | ) 156 | self.out = nn.Sequential( 157 | nn.Linear(16 * 38 * 25, 4096), 158 | nn.ReLU(), 159 | nn.Linear(4096, 2048), 160 | nn.ReLU(), 161 | nn.Linear(2048, 1024) 162 | ) 163 | # self.out = nn.MaxPool2d(kernel_size=(18, 25)) 164 | 165 | self.apply(weights_init) 166 | 167 | def forward(self, x): 168 | x = self.conv1(x) 169 | x = self.conv2(x) 170 | x = self.conv3(x) 171 | x = self.conv4(x) 172 | 173 | x = x.view(x.size(0), -1) 174 | x = self.out(x) 175 | x = x.view(-1, 1024) 176 | 177 | return x 178 | 179 | 180 | class end_layer(nn.Module): 181 | def __init__(self, in_channels=1024, out_channels=1): 182 | super(end_layer, self).__init__() 183 | self.fc1 = nn.Linear(in_channels, 512) 184 | self.fc2 = nn.Linear(512, 256) 185 | self.fc3 = nn.Linear(256, out_channels) 186 | self.bn1 = nn.BatchNorm1d(512) 187 | self.bn2 = nn.BatchNorm1d(256) 188 | 189 | self.apply(weights_init) 190 | 191 | def forward(self, x): 192 | if x.size()[0] == 1: 193 | x = F.relu(self.fc1(x)) 194 | x = F.relu(self.fc2(x)) 195 | else: 196 | x = F.relu(self.bn1(self.fc1(x))) 197 | x = F.relu(self.bn2(self.fc2(x))) 198 | return self.fc3(x) 199 | 200 | 201 | class PointNetActorCritic(nn.Module): 202 | def __init__(self, num_points=2500, num_actions=5): 203 | super(PointNetActorCritic, self).__init__() 204 | self.num_points = num_points 205 | self.feat = PointNetfeat(num_points, global_feat=True) 206 | 207 | self.lstm = nn.LSTMCell(1024, 1024) 208 | 209 | self.critic_linear = end_layer(in_channels=1024, out_channels=1) 210 | self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions) 211 | 212 | self.apply(weights_init) 213 | self.train() 214 | 215 | def forward(self, inputs): 216 | x, (hx, cx) = inputs 217 | x, _ = self.feat(x) 218 | hx, cx = self.lstm(x, (hx, cx)) 219 | x = hx 220 | 221 | return self.critic_linear(x), self.actor_linear(x), (hx, cx) 222 | 223 | 224 | class PointNetGail(nn.Module): 225 | def __init__(self, num_points=2500, num_actions=5): 226 | super(PointNetGail, self).__init__() 227 | self.num_points = num_points 228 | self.feat = PointNetfeat(num_points, global_feat=True) 229 | 230 | self.critic_linear = end_layer(in_channels=1024, out_channels=1) 231 | self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions) 232 | 233 | self.apply(weights_init) 234 | self.train() 235 | 236 | def forward(self, inputs): 237 | x = inputs 238 | x, _ = self.feat(x) 239 | 240 | return self.critic_linear(x), self.actor_linear(x) 241 | 242 | 243 | if __name__ == '__main__': 244 | sim_data = Variable(torch.rand(10, 4, 2500)) 245 | 246 | # trans = STN3d() 247 | # out = trans(sim_data) 248 | # print('stn', out.size()) 249 | 250 | # pointfeat = PointNetfeat(global_feat=True) 251 | # out, _ = pointfeat(sim_data) 252 | # print('global feat', out.size()) 253 | 254 | # pointfeat = PointNetfeat(global_feat=False) 255 | # out, _ = pointfeat(sim_data) 256 | # print('point feat', out.size()) 257 | 258 | cls = PointNetGail(num_actions=5) 259 | v, q= cls(sim_data) 260 | print(v.shape, q.shape) 261 | print(v) 262 | print(q) 263 | -------------------------------------------------------------------------------- /viewpoint_optim/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Shin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /viewpoint_optim/README.md: -------------------------------------------------------------------------------- 1 | # Viewpoint Optimization 2 | 3 | Here is the code for training an agent with RL (reinforcement learning) or IL (imitation learning) to optimize the viewpoint. 4 | 5 | ## Requirements 6 | 7 | * Python 3.x 8 | 9 | * pytorch 0.4.1 10 | 11 | ## To run 12 | 13 | ### Training & Evaluation with RL 14 | 15 | Use PointNet to extract feature: 16 | 17 | 1. `cd RL_pointnet` 18 | 2. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate] --n_points [number of points feed to PointNet]` 19 | 20 | For evaluation: 21 | `python evaluate.py --data_dir [folder to IORD] --model_dir [folder to save models]` 22 | 23 | Use CNN to extract feature: 24 | 25 | 1. `cd RL_CNN` 26 | 2. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate]` 27 | 28 | ### Training & Evaluation with IL 29 | 30 | Use PointNet to extract feature: 31 | 32 | 1. `cd IL` 33 | 2. `python expert_traj.py --model_dir [expert model] --mode pointnet` 34 | 3. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate --n_points [number of points feed to PointNet]` 35 | 36 | For evaluation: 37 | `python evaluate.py --data_dir [folder to IORD] --model_dir [folder to save models]` 38 | 39 | 40 | Use CNN to extract feature: 41 | 42 | 1. `cd il` 43 | 2. `python expert_traj.py --model_dir [expert model] --mode cnn` 44 | 3. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate]` -------------------------------------------------------------------------------- /viewpoint_optim/RL_CNN/cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import os 4 | import random 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.parallel 8 | import torch.backends.cudnn as cudnn 9 | import torch.optim as optim 10 | import torch.utils.data 11 | import torchvision.transforms as transforms 12 | import torchvision.utils as vutils 13 | from torch.autograd import Variable 14 | from PIL import Image 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | import torch.nn.functional as F 18 | 19 | 20 | def weights_init(m): 21 | classname = m.__class__.__name__ 22 | if classname.find('Conv') != -1: 23 | weight_shape = list(m.weight.data.size()) 24 | fan_in = np.prod(weight_shape[1: 4]) 25 | fan_out = np.prod(weight_shape[2: 4]) * weight_shape[0] 26 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 27 | m.weight.data.uniform_(-w_bound, w_bound) 28 | m.bias.data.fill_(0.0) 29 | elif classname.find('Linear') != -1: 30 | weight_shape = list(m.weight.data.size()) 31 | fan_in = weight_shape[1] 32 | fan_out = weight_shape[0] 33 | w_bound = np.sqrt(6.0 / (fan_in + fan_out)) 34 | m.weight.data.uniform_(-w_bound, w_bound) 35 | m.bias.data.fill_(0.0) 36 | elif classname.find('BatchNorm') != -1: 37 | m.weight.data.fill_(1.0) 38 | m.bias.data.fill_(0.0) 39 | elif classname.find('LSTMCell') != -1: 40 | m.bias_ih.data.fill_(0.0) 41 | m.bias_hh.data.fill_(0.0) 42 | 43 | 44 | class CNNfeat(nn.Module): 45 | def __init__(self): 46 | super(CNNfeat, self).__init__() 47 | self.conv1 = nn.Sequential( # 960 * 640 48 | nn.Conv2d(in_channels=3, out_channels=64, kernel_size=10, 49 | stride=3, padding=0), # 317 * 211 50 | nn.ReLU(), 51 | ) 52 | self.conv2 = nn.Sequential( 53 | nn.Conv2d(64, 64, 5, 2, 0), # 157 * 104 54 | nn.ReLU(), 55 | ) 56 | self.conv3 = nn.Sequential( 57 | nn.Conv2d(64, 32, 3, 2, 0), # 78 * 51 58 | nn.ReLU(), 59 | # nn.MaxPool2d(kernel_size=2) # 19 * 25 60 | ) 61 | self.conv4 = nn.Sequential( 62 | nn.Conv2d(32, 16, 3, 2, 0), # 38 * 25 63 | nn.ReLU(), 64 | ) 65 | self.out = nn.Sequential( 66 | nn.Linear(16 * 38 * 25, 4096), 67 | nn.ReLU(), 68 | nn.Linear(4096, 2048), 69 | nn.ReLU(), 70 | nn.Linear(2048, 1024) 71 | ) 72 | # self.out = nn.MaxPool2d(kernel_size=(18, 25)) 73 | 74 | self.apply(weights_init) 75 | 76 | def forward(self, x): 77 | x = self.conv1(x) 78 | x = self.conv2(x) 79 | x = self.conv3(x) 80 | x = self.conv4(x) 81 | 82 | x = x.view(x.size(0), -1) 83 | x = self.out(x) 84 | x = x.view(-1, 1024) 85 | 86 | return x 87 | 88 | 89 | class end_layer(nn.Module): 90 | def __init__(self, in_channels=1024, out_channels=1): 91 | super(end_layer, self).__init__() 92 | self.fc1 = nn.Linear(in_channels, 512) 93 | self.fc2 = nn.Linear(512, 256) 94 | self.fc3 = nn.Linear(256, out_channels) 95 | self.bn1 = nn.BatchNorm1d(512) 96 | self.bn2 = nn.BatchNorm1d(256) 97 | 98 | self.apply(weights_init) 99 | 100 | def forward(self, x): 101 | if x.size()[0] == 1: 102 | x = F.relu(self.fc1(x)) 103 | x = F.relu(self.fc2(x)) 104 | else: 105 | x = F.relu(self.bn1(self.fc1(x))) 106 | x = F.relu(self.bn2(self.fc2(x))) 107 | 108 | return self.fc3(x) 109 | 110 | 111 | class PointNetActorCritic(nn.Module): 112 | def __init__(self, num_points=2500, num_actions=5): 113 | super(PointNetActorCritic, self).__init__() 114 | self.num_points = num_points 115 | self.feat = CNNfeat() 116 | 117 | self.lstm = nn.LSTMCell(1024, 1024) 118 | 119 | self.critic_linear = end_layer(in_channels=1024, out_channels=1) 120 | self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions) 121 | 122 | self.apply(weights_init) 123 | self.train() 124 | 125 | def forward(self, inputs): 126 | x, (hx, cx) = inputs 127 | x = self.feat(x) 128 | hx, cx = self.lstm(x, (hx, cx)) 129 | x = hx 130 | 131 | return self.critic_linear(x), self.actor_linear(x), (hx, cx) 132 | 133 | 134 | if __name__ == '__main__': 135 | sim_data = Variable(torch.rand(10, 4, 2500)) 136 | 137 | # trans = STN3d() 138 | # out = trans(sim_data) 139 | # print('stn', out.size()) 140 | 141 | # pointfeat = PointNetfeat(global_feat=True) 142 | # out, _ = pointfeat(sim_data) 143 | # print('global feat', out.size()) 144 | 145 | # pointfeat = PointNetfeat(global_feat=False) 146 | # out, _ = pointfeat(sim_data) 147 | # print('point feat', out.size()) 148 | 149 | cls = PointNetActorCritic(num_actions=4) 150 | hx, cx = Variable(torch.zeros(10, 1024)), Variable(torch.zeros(10, 1024)) 151 | if torch.cuda.is_available(): 152 | sim_data = sim_data.cuda() 153 | cls = cls.cuda() 154 | hx, cx = hx.cuda(), cx.cuda() 155 | v, q, (hx ,cx) = cls((sim_data, (hx, cx))) 156 | print(v.shape, q.shape, hx.shape, cx.shape) 157 | print(v) 158 | print(q) -------------------------------------------------------------------------------- /viewpoint_optim/RL_CNN/environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.set_printoptions(threshold=np.nan) 3 | import random 4 | import os 5 | import sys 6 | import json 7 | from scipy.misc import imread 8 | from PIL import Image 9 | import argparse 10 | 11 | import torch 12 | import torch.nn as nn 13 | from torchvision import transforms 14 | 15 | from segmodel import SegmentationModule 16 | from utils import load_snapshot 17 | 18 | 19 | class MaskToTensor(object): 20 | def __call__(self, img): 21 | return torch.from_numpy(img.astype(np.int32)).long() 22 | 23 | 24 | object_map = { 25 | 'cube': 1, 26 | 'stapler': 2, 27 | 'cup': 3, 28 | 'orange': 4, 29 | 'tape': 5, 30 | 'bowl': 6, 31 | 'box': 7, 32 | 'cola': 8, 33 | 'chip_jar': 9, 34 | 'juice': 10, 35 | 'sugar_jar': 11, 36 | 'spoon': 12, 37 | 'triangle': 13, 38 | 'knife': 14, 39 | 'notebook': 15, 40 | 'rubik_cube': 16, 41 | 'laundry_liquid': 17 42 | } 43 | inv_cls_map = {str(v): k for k, v in object_map.items()} 44 | 45 | 46 | class ActiveAgent(): 47 | def __init__(self, idx, n_points, seg_args, 48 | mode='sim', root_path='the root path of IORD'): 49 | self.root_path = root_path 50 | self.scene_path = None 51 | self.group_list = [4, 6, 9] + list(range(10, 15)) + list(range(20, 36)) 52 | self.idx = idx 53 | self.logger = open('logs/env_%d.txt' % idx, 'w') 54 | 55 | self.mode = mode 56 | self.n_points = n_points 57 | self.n_actions = 5 58 | 59 | # moving information 60 | self.target_object = 0 61 | self.coord = [30, 0] 62 | self.pre_vis = 0 63 | self.end_thres = 0.85 64 | self.end_flag = False 65 | 66 | # camera parameters 67 | self.focalLength_x = 615.747 68 | self.focalLength_y = 616.041 69 | self.centerX = 317.017 70 | self.centerY = 241.722 71 | self.scalingFactor = 1000.0 72 | 73 | # load segment model 74 | self.args = seg_args 75 | print('using mode ', self.mode) 76 | if self.mode == 'semantic': 77 | model_dict = load_snapshot(self.args.snapshot, self.args.depth_fusion) 78 | self.segmodel = SegmentationModule( 79 | model_dict, 256, 18, self.args.depth_fusion, 80 | self.args.vote_mode, self.args.vote_scales 81 | ) 82 | self.segmodel = nn.DataParallel(self.segmodel) 83 | self.segmodel.load_state_dict(torch.load( 84 | os.path.join(self.args.model_dir, self.args.depth_fusion, 'epoch_2.pth') 85 | )) 86 | self.segmodel = self.segmodel.cuda().eval() 87 | 88 | self.input_transform = transforms.Compose([ 89 | transforms.ToTensor(), 90 | transforms.Normalize( 91 | [0.40384353, 0.45469216, 0.48145765], 92 | [0.20519882, 0.21251819, 0.22867874] 93 | ) 94 | ]) 95 | self.target_transform = MaskToTensor() 96 | 97 | def reset(self, min_vis=True, up=3, verbose=False): 98 | self.timestep = 0 99 | self.path = [] 100 | 101 | # choose starting point 102 | self.target_group = 'Group_%d_a' % random.choice(self.group_list) 103 | self.scene_idx = random.randint(3, 6) 104 | self.target_scene = sorted(os.listdir(os.path.join(self.root_path, self.target_group)))[self.scene_idx] 105 | self.scene_path = os.path.join(self.root_path, self.target_group, self.target_scene) 106 | self.coord = [random.randint(0, 4) * 10 + 30, random.randint(0, 35)] 107 | self.path.append(self.coord) 108 | 109 | # get objects 110 | gt = self._get_gt() 111 | objects = np.unique(gt)[1:] 112 | 113 | # choose target object according to visibility 114 | done = False 115 | self.end_flag = False 116 | with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' 117 | % tuple(self.coord), 'vis_demo.json'), 'r') as f: 118 | object_vis_dic = json.loads(f.read()) 119 | vis_order = sorted(object_vis_dic.items(), key=lambda x: x[1]) 120 | if min_vis: 121 | target_object_name = vis_order[random.randint(0, min(len(vis_order), 1))][0] 122 | else: 123 | min_objects = [object_map[vis_order[0][0]]] * 2 124 | target_object_name = inv_cls_map[str(random.choice(objects.tolist() + min_objects))] 125 | self.target_object = object_map[target_object_name] 126 | self.pre_vis = object_vis_dic.get(target_object_name, 0) 127 | 128 | if verbose: 129 | self.logger.write('Agent %d starting at %s, scene %s, coord %s \n' % \ 130 | (self.idx, self.target_group, self.target_scene, str(self.coord))) 131 | self.logger.write('Agent %d target object is [%d : %s] \n' % (self.idx, self.target_object, target_object_name)) 132 | self.logger.write('Agent %d the initial visibility is %f \n' % (self.idx, self.pre_vis)) 133 | self.logger.flush() 134 | 135 | return self._get_state_from_gt(gt), done 136 | 137 | def step(self, action): 138 | self.timestep += 1 139 | 140 | # 1-up 2-down 3-left 4-right 0-finish 141 | assert action in list(range(self.n_actions)) 142 | invalid_ops = False 143 | if action == 1: 144 | if self.coord[0] < 70: 145 | self.coord[0] += 10 146 | else: 147 | # self.coord[1] = (self.coord[1] + 18) % 36 148 | invalid_ops = True 149 | elif action == 2: 150 | if self.coord[0] > 30: 151 | self.coord[0] -= 10 152 | else: 153 | invalid_ops = True 154 | elif action == 3: 155 | self.coord[1] = (self.coord[1] + 35) % 36 156 | elif action == 4: 157 | self.coord[1] = (self.coord[1] + 1) % 36 158 | 159 | # if self.coord in self.path: 160 | # invalid_ops = True 161 | # else: 162 | # self.path.append(self.coord) 163 | 164 | done = False 165 | with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'vis_demo.json'), 'r') as f: 166 | vis_dic = json.loads(f.read()) 167 | vis = vis_dic.get(inv_cls_map[str(self.target_object)], 0) 168 | 169 | reward = vis - self.pre_vis - int(invalid_ops) 170 | # reward = 0 171 | self.pre_vis = vis 172 | if action == 0: 173 | done = vis > self.end_thres 174 | self.end_flag = done 175 | if done: 176 | reward = vis * 0.25 177 | else: 178 | reward = -0.5 - (1 - vis) - 0.05 * (20 - self.timestep) 179 | # reward = -1 180 | done = True 181 | 182 | gt = self._get_gt() 183 | 184 | if self.timestep >= 20: 185 | done = True 186 | # reward -= self.timestep * 0.1 187 | reward = -1 188 | 189 | return self._get_state_from_gt(gt), reward, done 190 | 191 | def _get_gt(self): 192 | if self.mode == 'sim': 193 | gt = imread(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'direct_mask.png')).astype(np.int8) 194 | elif self.mode == 'semantic': 195 | # get img 196 | img = np.array(Image.open(os.path.join( 197 | self.scene_path, 'RGB', '%d_RGB_%d.jpg' % tuple(self.coord) 198 | )).convert('RGB')).astype(np.float32) / 255.0 199 | depth = np.load(os.path.join( 200 | self.scene_path, 'depth', '%d_depth_%d.npy' % tuple(self.coord) 201 | )) 202 | img = self.input_transform(img) 203 | if self.args.depth_fusion != 'no-depth': 204 | depth_trans = transforms.ToTensor()(np.expand_dims(depth.astype(np.float32) / 1000.0, axis=2)) 205 | depth_trans = depth_trans.cuda() 206 | else: 207 | depth_trans = None 208 | 209 | # forward segment model 210 | with torch.no_grad(): 211 | probs = self.segmodel(img.unsqueeze(0).cuda(), depth_trans) 212 | preds = torch.argmax(probs, dim=1).data.cpu().numpy() 213 | gt = preds.astype(np.int8).squeeze(0) 214 | elif self.mode == 'instance': 215 | pass 216 | 217 | return gt 218 | 219 | def _get_state_from_gt(self, gt, step=3): 220 | tgt_mask = np.zeros(gt.shape).astype(np.int8) 221 | tgt_mask[gt == self.target_object] = 1 222 | tgt_mask = tgt_mask.reshape(gt.shape + (1,)) 223 | 224 | im_arr = imread(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d.jpg' % tuple(self.coord))).astype(np.float32) / 255.0 225 | # rgbt = np.concatenate((im_arr, tgt_mask), axis=2).astype(np.float32) 226 | 227 | tgt_mask = np.concatenate((tgt_mask, tgt_mask, tgt_mask), axis=2).astype(np.float32) 228 | rgbt = np.concatenate((im_arr, tgt_mask), axis=0) 229 | 230 | return rgbt.transpose(2, 0, 1) 231 | 232 | def seed(self, seed): 233 | random.seed(seed) 234 | np.random.seed(seed) 235 | 236 | def __del__(self): 237 | self.logger.close() 238 | 239 | 240 | if __name__ == '__main__': 241 | pass -------------------------------------------------------------------------------- /viewpoint_optim/RL_CNN/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import argparse 5 | import os 6 | import sys 7 | sys.path.append('..') 8 | import time 9 | 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from torch.autograd import Variable 14 | 15 | from environment import ActiveAgent 16 | from cnn import PointNetActorCritic 17 | from utils import setup_logger 18 | 19 | 20 | # Training settings 21 | parser = argparse.ArgumentParser(description='A2C') 22 | parser.add_argument('--lr', type=float, default=0.0001, 23 | help='learning rate (default: 0.0001)') 24 | parser.add_argument('--hidden-size', type=int, default=1024, 25 | help='Hidden size for LSTM') 26 | parser.add_argument('--gamma', type=float, default=0.99, 27 | help='discount factor for rewards (default: 0.99)') 28 | parser.add_argument('--tau', type=float, default=1.00, 29 | help='parameter for GAE (default: 1.00)') 30 | parser.add_argument('--entropy-coef', type=float, default=0.01, 31 | help='entropy term coefficient (default: 0.01)') 32 | parser.add_argument('--value-loss-coef', type=float, default=0.5, 33 | help='value loss coefficient (default: 0.5)') 34 | parser.add_argument('--max-grad-norm', type=float, default=20, 35 | help='value loss coefficient (default: 50)') 36 | parser.add_argument('--seed', type=int, default=456, 37 | help='random seed (default: 1)') 38 | parser.add_argument('--num-steps', type=int, default=20, 39 | help='number of forward steps in A2C (default: 20)') 40 | parser.add_argument('--max-episode-length', type=int, default=20, 41 | help='maximum length of an episode (default: 1000000)') 42 | parser.add_argument('--env-name', default='PointNetActorCritic', 43 | help='environment to train on') 44 | parser.add_argument('--no-shared', default=False, 45 | help='use an optimizer without shared momentum.') 46 | parser.add_argument('--n-points', type=int, default=3000, 47 | help='the number of points feed to pointnet') 48 | parser.add_argument('--log-dir', type=str, default='logs', 49 | help='Folder to save logs') 50 | parser.add_argument('--model-dir', type=str, default='trained_models', 51 | help='Folder to save models') 52 | parser.add_argument('--data-dir', type=str, default='data', 53 | help='Folder to IORD') 54 | parser.add_argument('--resume', default=False, 55 | help='resume latest model or not') 56 | parser.add_argument('--num-actions', type=int, default=5, 57 | help='discrete action space') 58 | parser.add_argument('--num-test', type=int, default=20, 59 | help='test time') 60 | 61 | # segmentation settings 62 | parser.add_argument("--depth-fusion", type=str, default='no-depth', 63 | choices=['no-depth', 'pixel-concat', 'feature-concat']) 64 | parser.add_argument("--vote-mode", metavar="NAME", 65 | type=str, choices=["plain", "mean", "voting", "max", 66 | "mean+flip", "voting+flip", "max+flip"], default="mean") 67 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2]) 68 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"], 69 | default="class", 70 | help="How the output files are formatted." 71 | " -- palette: color coded predictions" 72 | " -- raw: gray-scale predictions" 73 | " -- prob: gray-scale predictions plus probabilities") 74 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load") 75 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model") 76 | 77 | 78 | if __name__ == '__main__': 79 | args = parser.parse_args() 80 | if not os.path.isdir(args.log_dir): 81 | os.makedirs(args.log_dir) 82 | 83 | torch.manual_seed(args.seed) 84 | np.random.seed(args.seed) 85 | 86 | model = PointNetActorCritic(num_points=args.n_points, num_actions=args.num_actions) 87 | model = model.cuda() 88 | env = ActiveAgent(idx=0, n_points=args.n_points, 89 | seg_args=args, mode='sim', root_path=args.data_dir) 90 | env.seed(args.seed) 91 | 92 | # resume latest model 93 | if args.resume: 94 | model_path = os.path.join(args.model_dir, 'latest.pth') 95 | if not os.path.isdir(args.model_dir): 96 | os.makedirs(args.model_dir) 97 | elif os.path.exists(model_path): 98 | print('Loading model from %s...' % model_path) 99 | model.load_state_dict(torch.load(model_path)) 100 | 101 | itr = 0 102 | epoch = 0 103 | training_time = 50 104 | train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt')) 105 | test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt')) 106 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 107 | 108 | # test parameters 109 | all_success_time = 0 110 | all_time = 0 111 | ep_success_time = 0 112 | success_phase = 0.1 113 | check_flag = False 114 | 115 | while True: 116 | epoch += 1 117 | ################### training phase ################### 118 | model = model.train() 119 | for train_itr in range(training_time): 120 | training = True 121 | episode_length = 0 122 | 123 | state, _ = env.reset(min_vis=False) 124 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 125 | if torch.cuda.is_available(): 126 | state = state.cuda() 127 | done = True 128 | 129 | while training: 130 | if done: 131 | cx = Variable(torch.zeros(1, args.hidden_size)) 132 | hx = Variable(torch.zeros(1, args.hidden_size)) 133 | else: 134 | cx = Variable(cx.data) 135 | hx = Variable(hx.data) 136 | if torch.cuda.is_available(): 137 | hx = hx.cuda() 138 | cx = cx.cuda() 139 | 140 | values = [] 141 | log_probs = [] 142 | rewards = [] 143 | entropies = [] 144 | 145 | for step in range(args.num_steps): 146 | itr += 1 147 | episode_length += 1 148 | 149 | value, logit, (hx, cx) = model((state, (hx, cx))) 150 | prob = F.softmax(logit, dim=1) 151 | log_prob = F.log_softmax(logit, dim=1) 152 | entropy = -(log_prob * prob).sum(1, keepdim=True) 153 | entropies.append(entropy) 154 | 155 | action = prob.multinomial(num_samples=1).data.cpu() 156 | _action = Variable(action) 157 | if torch.cuda.is_available(): 158 | _action = _action.cuda() 159 | log_prob = log_prob.gather(1, _action) 160 | 161 | path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action.numpy()) 162 | # train_logger.info(path_info) 163 | 164 | state, reward, done = env.step(action.numpy()) 165 | 166 | if done: 167 | training = False 168 | success = env.end_flag 169 | log_info = 'Training Step: [%d - %d], Episode length: %d, Reward: %0.2f, Success: %s' \ 170 | % (epoch, train_itr, episode_length, sum(rewards) + reward, str(success)) 171 | train_logger.info(log_info) 172 | print(log_info) 173 | print(prob.cpu().detach().numpy()[0]) 174 | episode_length = 0 175 | # state, _ = env.reset(up=min(max(itr // 2500, 3), 6)) 176 | state, _ = env.reset(min_vis=False) 177 | 178 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 179 | if torch.cuda.is_available(): 180 | state = state.cuda() 181 | values.append(value) 182 | log_probs.append(log_prob) 183 | rewards.append(reward) 184 | 185 | if done: 186 | break 187 | 188 | R = torch.zeros(1, 1) 189 | if not done: 190 | value, _, _ = model((state, (hx, cx))) 191 | R = value.data 192 | 193 | policy_loss = 0 194 | value_loss = 0 195 | R = Variable(R) 196 | gae = torch.zeros(1, 1) 197 | if torch.cuda.is_available(): 198 | R = R.cuda() 199 | gae = gae.cuda() 200 | values.append(R) 201 | for i in reversed(range(len(rewards))): 202 | R = args.gamma * R + rewards[i] 203 | advantage = R - values[i] 204 | value_loss = value_loss + 0.5 * advantage.pow(2) 205 | 206 | # Generalized Advantage Estimataion 207 | delta_t = rewards[i] + args.gamma * \ 208 | values[i + 1].data - values[i].data 209 | gae = gae * args.gamma * args.tau + delta_t 210 | 211 | policy_loss = policy_loss - \ 212 | log_probs[i] * Variable(gae) - args.entropy_coef * entropies[i] 213 | 214 | optimizer.zero_grad() 215 | 216 | (policy_loss + args.value_loss_coef * value_loss).backward() 217 | torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) 218 | 219 | optimizer.step() 220 | 221 | 222 | ################### testing phase ################### 223 | model = model.eval() 224 | 225 | state, _ = env.reset(min_vis=False) 226 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 227 | if torch.cuda.is_available(): 228 | state = state.cuda() 229 | reward_sum = 0 230 | done = True 231 | 232 | episode_length = 0 233 | testing = True 234 | while testing: 235 | episode_length += 1 236 | # Sync with the shared model 237 | if done: 238 | with torch.no_grad(): 239 | cx = torch.zeros(1, args.hidden_size) 240 | hx = torch.zeros(1, args.hidden_size) 241 | else: 242 | with torch.no_grad(): 243 | cx = cx.data 244 | hx = hx.data 245 | if torch.cuda.is_available(): 246 | hx = hx.cuda() 247 | cx = cx.cuda() 248 | 249 | with torch.no_grad(): 250 | value, logit, (hx, cx) = model((state, (hx, cx))) 251 | prob = F.softmax(logit, dim=1) 252 | action = prob.max(1, keepdim=True)[1].data.cpu().numpy() 253 | 254 | path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0]) 255 | # test_logger.info(path_info) 256 | 257 | state, reward, done = env.step(action[0, 0]) 258 | reward_sum += reward 259 | 260 | if done: 261 | # print('testing: ', all_time) 262 | success = env.end_flag 263 | all_success_time += success 264 | ep_success_time += success 265 | all_time += 1 266 | if all_time % args.num_test == 0: 267 | check_flag = True 268 | 269 | state, _ = env.reset(min_vis=False) 270 | time.sleep(0.1) 271 | 272 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 273 | if torch.cuda.is_available(): 274 | state = state.cuda() 275 | 276 | if check_flag: 277 | all_success_rate = all_success_time / all_time 278 | log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.2f, ALL Success: %0.3f' \ 279 | % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate) 280 | test_logger.info(log_info) 281 | print(log_info) 282 | torch.save(model.state_dict(), os.path.join(args.model_dir, 'latest.pth')) 283 | 284 | # save models in some important phases 285 | if all_success_rate > success_phase: 286 | torch.save(model.state_dict(), 287 | os.path.join(args.model_dir, 'success_rate_%0.2f.pth' % success_phase)) 288 | success_phase += 0.1 289 | 290 | # save models according to steps 291 | if epoch % 20 == 0: 292 | torch.save(model.state_dict(), 293 | os.path.join(args.model_dir, 'model_%d.pth' % epoch)) 294 | 295 | reward_sum = 0 296 | episode_length = 0 297 | ep_success_time = 0 298 | check_flag = False 299 | testing = False 300 | 301 | time.sleep(1) -------------------------------------------------------------------------------- /viewpoint_optim/RL_pointnet/environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.set_printoptions(threshold=np.nan) 3 | import random 4 | import os 5 | import sys 6 | sys.path.append('..') 7 | import json 8 | from scipy.misc import imread 9 | from PIL import Image 10 | import argparse 11 | 12 | import torch 13 | import torch.nn as nn 14 | from torchvision import transforms 15 | 16 | from segmodel import SegmentationModule 17 | from utils import load_snapshot 18 | 19 | 20 | class MaskToTensor(object): 21 | def __call__(self, img): 22 | return torch.from_numpy(img.astype(np.int32)).long() 23 | 24 | 25 | object_map = { 26 | 'cube': 1, 27 | 'stapler': 2, 28 | 'cup': 3, 29 | 'orange': 4, 30 | 'tape': 5, 31 | 'bowl': 6, 32 | 'box': 7, 33 | 'cola': 8, 34 | 'chip_jar': 9, 35 | 'juice': 10, 36 | 'sugar_jar': 11, 37 | 'spoon': 12, 38 | 'triangle': 13, 39 | 'knife': 14, 40 | 'notebook': 15, 41 | 'rubik_cube': 16, 42 | 'laundry_liquid': 17 43 | } 44 | inv_cls_map = {str(v): k for k, v in object_map.items()} 45 | 46 | 47 | class ActiveAgent(): 48 | def __init__(self, idx, n_points, seg_args=init_parser(), 49 | mode='sim', root_path='the root path of IORD'): 50 | self.root_path = root_path 51 | self.scene_path = None 52 | self.group_list = [4, 6, 9] + list(range(10, 15)) + list(range(20, 36)) 53 | self.idx = idx 54 | self.logger = open('logs/env_%d.txt' % idx, 'w') 55 | 56 | self.mode = mode 57 | self.n_points = n_points 58 | self.n_actions = 5 59 | 60 | # moving information 61 | self.target_object = 0 62 | self.coord = [30, 0] 63 | self.pre_vis = 0 64 | self.end_thres = 0.85 65 | self.end_flag = False 66 | 67 | # camera parameters 68 | self.focalLength_x = 615.747 69 | self.focalLength_y = 616.041 70 | self.centerX = 317.017 71 | self.centerY = 241.722 72 | self.scalingFactor = 1000.0 73 | 74 | # load segment model 75 | self.args = seg_args 76 | print('using mode ', self.mode) 77 | if self.mode == 'semantic': 78 | model_dict = load_snapshot(self.args.snapshot, self.args.depth_fusion) 79 | self.segmodel = SegmentationModule( 80 | model_dict, 256, 18, self.args.depth_fusion, 81 | self.args.vote_mode, self.args.vote_scales 82 | ) 83 | self.segmodel = nn.DataParallel(self.segmodel) 84 | self.segmodel.load_state_dict(torch.load( 85 | os.path.join(self.args.model_dir, self.args.depth_fusion, 'epoch_15.pth') 86 | )) 87 | self.segmodel = self.segmodel.cuda().eval() 88 | 89 | self.input_transform = transforms.Compose([ 90 | transforms.ToTensor(), 91 | transforms.Normalize( 92 | [0.40384353, 0.45469216, 0.48145765], 93 | [0.20519882, 0.21251819, 0.22867874] 94 | ) 95 | ]) 96 | self.target_transform = MaskToTensor() 97 | 98 | def reset(self, min_vis=True, up=3, verbose=False): 99 | self.timestep = 0 100 | self.path = [] 101 | 102 | # choose starting point 103 | self.target_group = 'Group_%d_a' % random.choice(self.group_list) 104 | self.scene_idx = random.randint(3, 6) 105 | self.target_scene = sorted(os.listdir(os.path.join(self.root_path, self.target_group)))[self.scene_idx] 106 | self.scene_path = os.path.join(self.root_path, self.target_group, self.target_scene) 107 | self.coord = [random.randint(0, 4) * 10 + 30, random.randint(0, 35)] 108 | self.path.append(self.coord) 109 | 110 | # get objects 111 | gt = self._get_gt() 112 | objects = np.unique(gt)[1:] 113 | 114 | # choose target object according to visibility 115 | done = False 116 | self.end_flag = False 117 | with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' 118 | % tuple(self.coord), 'vis_demo.json'), 'r') as f: 119 | object_vis_dic = json.loads(f.read()) 120 | vis_order = sorted(object_vis_dic.items(), key=lambda x: x[1]) 121 | if min_vis: 122 | target_object_name = vis_order[random.randint(0, min(len(vis_order), 1))][0] 123 | else: 124 | min_objects = [object_map[vis_order[0][0]]] * 2 125 | target_object_name = inv_cls_map[str(random.choice(objects.tolist() + min_objects))] 126 | self.target_object = object_map[target_object_name] 127 | self.pre_vis = object_vis_dic.get(target_object_name, 0) 128 | 129 | if verbose: 130 | self.logger.write('Agent %d starting at %s, scene %s, coord %s \n' % \ 131 | (self.idx, self.target_group, self.target_scene, str(self.coord))) 132 | self.logger.write('Agent %d target object is [%d : %s] \n' % (self.idx, self.target_object, target_object_name)) 133 | self.logger.write('Agent %d the initial visibility is %f \n' % (self.idx, self.pre_vis)) 134 | self.logger.flush() 135 | 136 | return self._get_pcd_from_gt(gt), done 137 | 138 | def step(self, action): 139 | self.timestep += 1 140 | 141 | # 1-up 2-down 3-left 4-right 0-finish 142 | assert action in list(range(self.n_actions)) 143 | invalid_ops = False 144 | if action == 1: 145 | if self.coord[0] < 70: 146 | self.coord[0] += 10 147 | else: 148 | # self.coord[1] = (self.coord[1] + 18) % 36 149 | invalid_ops = True 150 | elif action == 2: 151 | if self.coord[0] > 30: 152 | self.coord[0] -= 10 153 | else: 154 | invalid_ops = True 155 | elif action == 3: 156 | self.coord[1] = (self.coord[1] + 35) % 36 157 | elif action == 4: 158 | self.coord[1] = (self.coord[1] + 1) % 36 159 | 160 | # if self.coord in self.path: 161 | # invalid_ops = True 162 | # else: 163 | # self.path.append(self.coord) 164 | 165 | done = False 166 | with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'vis_demo.json'), 'r') as f: 167 | vis_dic = json.loads(f.read()) 168 | vis = vis_dic.get(inv_cls_map[str(self.target_object)], 0) 169 | 170 | reward = vis - self.pre_vis - int(invalid_ops) 171 | # reward = 0 172 | self.pre_vis = vis 173 | if action == 0: 174 | done = vis > self.end_thres 175 | self.end_flag = done 176 | if done: 177 | reward = vis * 0.25 178 | else: 179 | reward = -0.5 - (1 - vis) - 0.05 * (20 - self.timestep) 180 | # reward = 0 181 | done = True 182 | 183 | gt = self._get_gt() 184 | 185 | if self.timestep >= 20: 186 | done = True 187 | # reward -= self.timestep * 0.1 188 | reward = -1 189 | 190 | return self._get_pcd_from_gt(gt), reward, done 191 | 192 | def _get_gt(self): 193 | if self.mode == 'sim': 194 | gt = imread(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'direct_mask.png')).astype(np.int8) 195 | elif self.mode == 'semantic': 196 | # get img 197 | img = np.array(Image.open(os.path.join( 198 | self.scene_path, 'RGB', '%d_RGB_%d.jpg' % tuple(self.coord) 199 | )).convert('RGB')).astype(np.float32) / 255.0 200 | depth = np.load(os.path.join( 201 | self.scene_path, 'depth', '%d_depth_%d.npy' % tuple(self.coord) 202 | )) 203 | img = self.input_transform(img) 204 | if self.args.depth_fusion != 'no-depth': 205 | depth_trans = transforms.ToTensor()(np.expand_dims(depth.astype(np.float32) / 1000.0, axis=2)) 206 | depth_trans = depth_trans.unsqueeze(0).cuda() 207 | else: 208 | depth_trans = None 209 | 210 | # forward segment model 211 | with torch.no_grad(): 212 | probs = self.segmodel(img.unsqueeze(0).cuda(), depth_trans) 213 | preds = torch.argmax(probs, dim=1).data.cpu().numpy() 214 | gt = preds.astype(np.int8).squeeze(0) 215 | elif self.mode == 'instance': 216 | pass 217 | 218 | return gt 219 | 220 | def _get_pcd_from_gt(self, gt, step=3): 221 | tgt_mask = np.ones(gt.shape).astype(np.int8) 222 | tgt_mask[gt != self.target_object] = -1 223 | 224 | d_im = np.load(os.path.join(self.scene_path, 'depth', '%d_depth_%d.npy' % tuple(self.coord))) 225 | points = [] 226 | 227 | width = d_im.shape[1] 228 | height = d_im.shape[0] 229 | X = np.tile(np.arange(width), (height, 1)) 230 | Y = np.tile(np.arange(height), (width, 1)).T 231 | Z = d_im / self.scalingFactor 232 | x = (X - self.centerX) * Z / self.focalLength_x 233 | y = (Y - self.centerY) * Z / self.focalLength_y 234 | 235 | for v in range(0, d_im.shape[1], step): 236 | for u in range(0, d_im.shape[0], step): 237 | if Z[u][v] < 1e-4: 238 | continue 239 | points.append([x[u][v], y[u][v], Z[u][v], tgt_mask[u][v]]) 240 | points = np.array(points, dtype=np.float32) 241 | # points[:, 0] = (points[:, 0] - points[:, 0].mean()) / points[:, 0].std() 242 | # points[:, 1] = (points[:, 1] - points[:, 1].mean()) / points[:, 1].std() 243 | # points[:, 2] = (points[:, 2] - points[:, 2].mean()) / points[:, 2].std() 244 | 245 | points = points[np.random.randint(0, points.shape[0], size=self.n_points), :] 246 | 247 | return points.transpose(1, 0) 248 | 249 | def seed(self, seed): 250 | random.seed(seed) 251 | np.random.seed(seed) 252 | 253 | def __del__(self): 254 | self.logger.close() 255 | 256 | 257 | if __name__ == '__main__': 258 | pass -------------------------------------------------------------------------------- /viewpoint_optim/RL_pointnet/evaluate.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import argparse 5 | import os 6 | import sys 7 | sys.path.append('..') 8 | import time 9 | 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from torch.autograd import Variable 14 | from torch.utils.cpp_extension import load 15 | 16 | from environment import ActiveAgent 17 | from pointnet import PointNetActorCritic 18 | from utils import setup_logger 19 | 20 | 21 | # Training settings 22 | parser = argparse.ArgumentParser(description='A2C') 23 | parser.add_argument('--lr', type=float, default=0.0001, 24 | help='learning rate (default: 0.0001)') 25 | parser.add_argument('--hidden-size', type=int, default=1024, 26 | help='Hidden size for LSTM') 27 | parser.add_argument('--gamma', type=float, default=0.99, 28 | help='discount factor for rewards (default: 0.99)') 29 | parser.add_argument('--tau', type=float, default=1.00, 30 | help='parameter for GAE (default: 1.00)') 31 | parser.add_argument('--entropy-coef', type=float, default=0.01, 32 | help='entropy term coefficient (default: 0.01)') 33 | parser.add_argument('--value-loss-coef', type=float, default=0.5, 34 | help='value loss coefficient (default: 0.5)') 35 | parser.add_argument('--max-grad-norm', type=float, default=20, 36 | help='value loss coefficient (default: 50)') 37 | parser.add_argument('--seed', type=int, default=456, 38 | help='random seed (default: 1)') 39 | parser.add_argument('--num-steps', type=int, default=20, 40 | help='number of forward steps in A2C (default: 20)') 41 | parser.add_argument('--max-episode-length', type=int, default=50, 42 | help='maximum length of an episode (default: 1000000)') 43 | parser.add_argument('--env-name', default='PointNetActorCritic', 44 | help='environment to train on') 45 | parser.add_argument('--no-shared', default=False, 46 | help='use an optimizer without shared momentum.') 47 | parser.add_argument('--n-points', type=int, default=3000, 48 | help='the number of points feed to pointnet') 49 | parser.add_argument('--log-dir', type=str, default='logs', 50 | help='Folder to save logs') 51 | parser.add_argument('--model-dir', type=str, default='trained_models', 52 | help='Folder to save models') 53 | parser.add_argument('--data-dir', type=str, default='data', 54 | help='Folder to IORD') 55 | parser.add_argument('--resume', default=True, 56 | help='resume latest model or not') 57 | parser.add_argument('--num-actions', type=int, default=5, 58 | help='discrete action space') 59 | parser.add_argument('--num-test', type=int, default=50, 60 | help='test time') 61 | parser.add_argument('--min', type=bool, default=True, 62 | help='use min-vis or not') 63 | parser.add_argument('--mode', type=str, default='semantic', 64 | help='vision mode') 65 | 66 | # segmentation settings 67 | parser.add_argument("--depth-fusion", type=str, default='no-depth', 68 | choices=['no-depth', 'pixel-concat', 'feature-concat']) 69 | parser.add_argument("--vote-mode", metavar="NAME", 70 | type=str, choices=["plain", "mean", "voting", "max", 71 | "mean+flip", "voting+flip", "max+flip"], default="mean") 72 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2]) 73 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"], 74 | default="class", 75 | help="How the output files are formatted." 76 | " -- palette: color coded predictions" 77 | " -- raw: gray-scale predictions" 78 | " -- prob: gray-scale predictions plus probabilities") 79 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load") 80 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model") 81 | 82 | 83 | if __name__ == '__main__': 84 | args = parser.parse_args() 85 | if not os.path.isdir(args.log_dir): 86 | os.makedirs(args.log_dir) 87 | 88 | torch.manual_seed(args.seed) 89 | np.random.seed(args.seed) 90 | 91 | model = PointNetActorCritic(num_points=args.n_points, num_actions=args.num_actions) 92 | model = model.cuda() 93 | env = ActiveAgent(idx=0, n_points=args.n_points, 94 | seg_args=args, mode='semantic', root_path=args.data_dir) 95 | env.seed(args.seed) 96 | 97 | # resume latest model 98 | if args.resume: 99 | model_path = os.path.join(args.model_dir, 'latest.pth') 100 | if not os.path.isdir(args.model_dir): 101 | os.makedirs(args.model_dir) 102 | elif os.path.exists(model_path): 103 | print('Loading model from %s...' % model_path) 104 | model.load_state_dict(torch.load(model_path)) 105 | 106 | itr = 0 107 | epoch = 0 108 | training_time = 50 109 | # train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt')) 110 | # test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt')) 111 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 112 | 113 | # test parameters 114 | all_success_time = 0 115 | all_time = 0 116 | ep_success_time = 0 117 | success_phase = 0.1 118 | check_flag = False 119 | 120 | for _ in range(5): 121 | epoch += 1 122 | ################### testing phase ################### 123 | model = model.eval() 124 | 125 | state, _ = env.reset(min_vis=args.min) 126 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 127 | if torch.cuda.is_available(): 128 | state = state.cuda() 129 | reward_sum = 0 130 | done = True 131 | 132 | episode_length = 0 133 | testing = True 134 | while testing: 135 | episode_length += 1 136 | # Sync with the shared model 137 | if done: 138 | with torch.no_grad(): 139 | cx = torch.zeros(1, args.hidden_size) 140 | hx = torch.zeros(1, args.hidden_size) 141 | else: 142 | with torch.no_grad(): 143 | cx = cx.data 144 | hx = hx.data 145 | if torch.cuda.is_available(): 146 | hx = hx.cuda() 147 | cx = cx.cuda() 148 | 149 | with torch.no_grad(): 150 | value, logit, (hx, cx) = model((state, (hx, cx))) 151 | prob = F.softmax(logit, dim=1) 152 | action = prob.max(1, keepdim=True)[1].data.cpu().numpy() 153 | 154 | # path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0]) 155 | # test_logger.info(path_info) 156 | 157 | state, reward, done = env.step(action[0, 0]) 158 | reward_sum += reward 159 | 160 | if done: 161 | # print('testing: ', all_time) 162 | success = env.end_flag 163 | all_success_time += success 164 | ep_success_time += success 165 | all_time += 1 166 | if all_time % args.num_test == 0: 167 | check_flag = True 168 | 169 | state, _ = env.reset(min_vis=args.min) 170 | time.sleep(0.1) 171 | 172 | print('testing: ', all_time) 173 | 174 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 175 | if torch.cuda.is_available(): 176 | state = state.cuda() 177 | 178 | if check_flag: 179 | all_success_rate = all_success_time / all_time 180 | log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.4f, ALL Success: %0.4f' \ 181 | % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate) 182 | # test_logger.info(log_info) 183 | print(log_info) 184 | 185 | reward_sum = 0 186 | episode_length = 0 187 | ep_success_time = 0 188 | check_flag = False 189 | testing = False 190 | 191 | time.sleep(1) -------------------------------------------------------------------------------- /viewpoint_optim/RL_pointnet/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import argparse 5 | import os 6 | import sys 7 | sys.path.append('..') 8 | import time 9 | 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from torch.autograd import Variable 14 | 15 | from environment import ActiveAgent 16 | from pointnet import PointNetActorCritic 17 | from utils import setup_logger 18 | 19 | 20 | # Training settings 21 | parser = argparse.ArgumentParser(description='A2C') 22 | parser.add_argument('--lr', type=float, default=0.0001, 23 | help='learning rate (default: 0.0001)') 24 | parser.add_argument('--hidden-size', type=int, default=1024, 25 | help='Hidden size for LSTM') 26 | parser.add_argument('--gamma', type=float, default=0.99, 27 | help='discount factor for rewards (default: 0.99)') 28 | parser.add_argument('--tau', type=float, default=1.00, 29 | help='parameter for GAE (default: 1.00)') 30 | parser.add_argument('--entropy-coef', type=float, default=0.01, 31 | help='entropy term coefficient (default: 0.01)') 32 | parser.add_argument('--value-loss-coef', type=float, default=0.5, 33 | help='value loss coefficient (default: 0.5)') 34 | parser.add_argument('--max-grad-norm', type=float, default=20, 35 | help='value loss coefficient (default: 50)') 36 | parser.add_argument('--seed', type=int, default=456, 37 | help='random seed (default: 1)') 38 | parser.add_argument('--num-steps', type=int, default=20, 39 | help='number of forward steps in A2C (default: 20)') 40 | parser.add_argument('--max-episode-length', type=int, default=50, 41 | help='maximum length of an episode (default: 1000000)') 42 | parser.add_argument('--env-name', default='PointNetActorCritic', 43 | help='environment to train on') 44 | parser.add_argument('--no-shared', default=False, 45 | help='use an optimizer without shared momentum.') 46 | parser.add_argument('--n-points', type=int, default=3000, 47 | help='the number of points feed to pointnet') 48 | parser.add_argument('--log-dir', type=str, default='logs', 49 | help='Folder to save logs') 50 | parser.add_argument('--model-dir', type=str, default='trained_models', 51 | help='Folder to save models') 52 | parser.add_argument('--data-dir', type=str, default='data', 53 | help='Folder to IORD') 54 | parser.add_argument('--resume', default=False, 55 | help='resume latest model or not') 56 | parser.add_argument('--num-actions', type=int, default=5, 57 | help='discrete action space') 58 | parser.add_argument('--num-test', type=int, default=20, 59 | help='test time') 60 | 61 | # segmentation settings 62 | parser.add_argument("--depth-fusion", type=str, default='no-depth', 63 | choices=['no-depth', 'pixel-concat', 'feature-concat']) 64 | parser.add_argument("--vote-mode", metavar="NAME", 65 | type=str, choices=["plain", "mean", "voting", "max", 66 | "mean+flip", "voting+flip", "max+flip"], default="mean") 67 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2]) 68 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"], 69 | default="class", 70 | help="How the output files are formatted." 71 | " -- palette: color coded predictions" 72 | " -- raw: gray-scale predictions" 73 | " -- prob: gray-scale predictions plus probabilities") 74 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load") 75 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model") 76 | 77 | 78 | if __name__ == '__main__': 79 | args = parser.parse_args() 80 | if not os.path.isdir(args.log_dir): 81 | os.makedirs(args.log_dir) 82 | 83 | torch.manual_seed(args.seed) 84 | np.random.seed(args.seed) 85 | 86 | model = PointNetActorCritic(num_points=args.n_points, num_actions=args.num_actions) 87 | model = model.cuda() 88 | env = ActiveAgent(idx=0, n_points=args.n_points, 89 | seg_args=args, mode='sim', root_path=args.data_dir) 90 | env.seed(args.seed) 91 | 92 | # resume latest model 93 | if args.resume: 94 | model_path = os.path.join(args.model_dir, 'latest.pth') 95 | if not os.path.isdir(args.model_dir): 96 | os.makedirs(args.model_dir) 97 | elif os.path.exists(model_path): 98 | print('Loading model from %s...' % model_path) 99 | model.load_state_dict(torch.load(model_path)) 100 | 101 | itr = 0 102 | epoch = 0 103 | training_time = 50 104 | train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt')) 105 | test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt')) 106 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 107 | 108 | # test parameters 109 | all_success_time = 0 110 | all_time = 0 111 | ep_success_time = 0 112 | success_phase = 0.1 113 | check_flag = False 114 | 115 | while True: 116 | epoch += 1 117 | ################### training phase ################### 118 | model = model.train() 119 | for train_itr in range(training_time): 120 | training = True 121 | episode_length = 0 122 | 123 | state, _ = env.reset(min_vis=True) 124 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 125 | if torch.cuda.is_available(): 126 | state = state.cuda() 127 | done = True 128 | 129 | while training: 130 | if done: 131 | cx = Variable(torch.zeros(1, args.hidden_size)) 132 | hx = Variable(torch.zeros(1, args.hidden_size)) 133 | else: 134 | cx = Variable(cx.data) 135 | hx = Variable(hx.data) 136 | if torch.cuda.is_available(): 137 | hx = hx.cuda() 138 | cx = cx.cuda() 139 | 140 | values = [] 141 | log_probs = [] 142 | rewards = [] 143 | entropies = [] 144 | 145 | for step in range(args.num_steps): 146 | itr += 1 147 | episode_length += 1 148 | 149 | value, logit, (hx, cx) = model((state, (hx, cx))) 150 | prob = F.softmax(logit, dim=1) 151 | log_prob = F.log_softmax(logit, dim=1) 152 | entropy = -(log_prob * prob).sum(1, keepdim=True) 153 | entropies.append(entropy) 154 | 155 | action = prob.multinomial(num_samples=1).data.cpu() 156 | _action = Variable(action) 157 | if torch.cuda.is_available(): 158 | _action = _action.cuda() 159 | log_prob = log_prob.gather(1, _action) 160 | 161 | path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action.numpy()) 162 | train_logger.info(path_info) 163 | 164 | state, reward, done = env.step(action.numpy()) 165 | 166 | if done: 167 | training = False 168 | success = env.end_flag 169 | log_info = 'Training Step: [%d - %d], Episode length: %d, Reward: %0.2f, Success: %s' \ 170 | % (epoch, train_itr, episode_length, sum(rewards) + reward, str(success)) 171 | train_logger.info(log_info) 172 | print(log_info) 173 | episode_length = 0 174 | # state, _ = env.reset(up=min(max(itr // 2500, 3), 6)) 175 | state, _ = env.reset(min_vis=True) 176 | 177 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 178 | if torch.cuda.is_available(): 179 | state = state.cuda() 180 | values.append(value) 181 | log_probs.append(log_prob) 182 | rewards.append(reward) 183 | 184 | if done: 185 | break 186 | 187 | R = torch.zeros(1, 1) 188 | if not done: 189 | value, _, _ = model((state, (hx, cx))) 190 | R = value.data 191 | 192 | policy_loss = 0 193 | value_loss = 0 194 | R = Variable(R) 195 | gae = torch.zeros(1, 1) 196 | if torch.cuda.is_available(): 197 | R = R.cuda() 198 | gae = gae.cuda() 199 | values.append(R) 200 | for i in reversed(range(len(rewards))): 201 | R = args.gamma * R + rewards[i] 202 | advantage = R - values[i] 203 | value_loss = value_loss + 0.5 * advantage.pow(2) 204 | 205 | # Generalized Advantage Estimataion 206 | delta_t = rewards[i] + args.gamma * \ 207 | values[i + 1].data - values[i].data 208 | gae = gae * args.gamma * args.tau + delta_t 209 | 210 | policy_loss = policy_loss - \ 211 | log_probs[i] * Variable(gae) - args.entropy_coef * entropies[i] 212 | 213 | optimizer.zero_grad() 214 | 215 | (policy_loss + args.value_loss_coef * value_loss).backward() 216 | torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) 217 | 218 | optimizer.step() 219 | 220 | 221 | ################### testing phase ################### 222 | model = model.eval() 223 | 224 | state, _ = env.reset() 225 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 226 | if torch.cuda.is_available(): 227 | state = state.cuda() 228 | reward_sum = 0 229 | done = True 230 | 231 | episode_length = 0 232 | testing = True 233 | while testing: 234 | episode_length += 1 235 | # Sync with the shared model 236 | if done: 237 | with torch.no_grad(): 238 | cx = torch.zeros(1, args.hidden_size) 239 | hx = torch.zeros(1, args.hidden_size) 240 | else: 241 | with torch.no_grad(): 242 | cx = cx.data 243 | hx = hx.data 244 | if torch.cuda.is_available(): 245 | hx = hx.cuda() 246 | cx = cx.cuda() 247 | 248 | with torch.no_grad(): 249 | value, logit, (hx, cx) = model((state, (hx, cx))) 250 | prob = F.softmax(logit, dim=1) 251 | action = prob.max(1, keepdim=True)[1].data.cpu().numpy() 252 | 253 | path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0]) 254 | test_logger.info(path_info) 255 | 256 | state, reward, done = env.step(action[0, 0]) 257 | reward_sum += reward 258 | 259 | if done: 260 | # print('testing: ', all_time) 261 | success = env.end_flag 262 | all_success_time += success 263 | ep_success_time += success 264 | all_time += 1 265 | if all_time % args.num_test == 0: 266 | check_flag = True 267 | 268 | state, _ = env.reset() 269 | time.sleep(0.1) 270 | 271 | state = Variable(torch.from_numpy(state).unsqueeze(0)) 272 | if torch.cuda.is_available(): 273 | state = state.cuda() 274 | 275 | if check_flag: 276 | all_success_rate = all_success_time / all_time 277 | log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.2f, ALL Success: %0.3f' \ 278 | % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate) 279 | test_logger.info(log_info) 280 | print(log_info) 281 | torch.save(model.state_dict(), os.path.join(args.model_dir, 'latest.pth')) 282 | 283 | # save models in some important phases 284 | if all_success_rate > success_phase: 285 | torch.save(model.state_dict(), 286 | os.path.join(args.model_dir, 'success_rate_%0.2f.pth' % success_phase)) 287 | success_phase += 0.1 288 | 289 | # save models according to steps 290 | if epoch % 20 == 0: 291 | torch.save(model.state_dict(), 292 | os.path.join(args.model_dir, 'model_%d.pth' % epoch)) 293 | 294 | reward_sum = 0 295 | episode_length = 0 296 | ep_success_time = 0 297 | check_flag = False 298 | testing = False 299 | 300 | time.sleep(1) -------------------------------------------------------------------------------- /viewpoint_optim/RL_pointnet/pointnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import os 4 | import random 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.parallel 8 | import torch.backends.cudnn as cudnn 9 | import torch.optim as optim 10 | import torch.utils.data 11 | import torchvision.transforms as transforms 12 | import torchvision.utils as vutils 13 | from torch.autograd import Variable 14 | from PIL import Image 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | import torch.nn.functional as F 18 | 19 | 20 | def weights_init(m): 21 | classname = m.__class__.__name__ 22 | if classname.find('Conv') != -1: 23 | weight_shape = list(m.weight.data.size()) 24 | fan_in = np.prod(weight_shape[1: 4]) 25 | fan_out = np.prod(weight_shape[2: 4]) * weight_shape[0] 26 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 27 | m.weight.data.uniform_(-w_bound, w_bound) 28 | m.bias.data.fill_(0.0) 29 | elif classname.find('Linear') != -1: 30 | weight_shape = list(m.weight.data.size()) 31 | fan_in = weight_shape[1] 32 | fan_out = weight_shape[0] 33 | w_bound = np.sqrt(6.0 / (fan_in + fan_out)) 34 | m.weight.data.uniform_(-w_bound, w_bound) 35 | m.bias.data.fill_(0.0) 36 | elif classname.find('BatchNorm') != -1: 37 | m.weight.data.fill_(1.0) 38 | m.bias.data.fill_(0.0) 39 | elif classname.find('LSTMCell') != -1: 40 | m.bias_ih.data.fill_(0.0) 41 | m.bias_hh.data.fill_(0.0) 42 | 43 | 44 | class STN3d(nn.Module): 45 | def __init__(self, num_points = 2500): 46 | super(STN3d, self).__init__() 47 | self.num_points = num_points 48 | self.conv1 = torch.nn.Conv1d(4, 64, 1) 49 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 50 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 51 | self.mp1 = torch.nn.MaxPool1d(num_points) 52 | self.fc1 = nn.Linear(1024, 512) 53 | self.fc2 = nn.Linear(512, 256) 54 | self.fc3 = nn.Linear(256, 9) 55 | self.relu = nn.ReLU() 56 | 57 | self.bn1 = nn.BatchNorm1d(64) 58 | self.bn2 = nn.BatchNorm1d(128) 59 | self.bn3 = nn.BatchNorm1d(1024) 60 | self.bn4 = nn.BatchNorm1d(512) 61 | self.bn5 = nn.BatchNorm1d(256) 62 | 63 | def forward(self, x): 64 | # x --> 3 * 3 65 | batchsize = x.shape[0] 66 | if batchsize > 1: 67 | x = F.relu(self.bn1(self.conv1(x))) 68 | x = F.relu(self.bn2(self.conv2(x))) 69 | x = F.relu(self.bn3(self.conv3(x))) 70 | x = self.mp1(x) 71 | x = x.view(-1, 1024) 72 | 73 | x = F.relu(self.bn4(self.fc1(x))) 74 | x = F.relu(self.bn5(self.fc2(x))) 75 | else: 76 | x = F.relu(self.conv1(x)) 77 | x = F.relu(self.conv2(x)) 78 | x = F.relu(self.conv3(x)) 79 | x = self.mp1(x) 80 | x = x.view(-1, 1024) 81 | 82 | x = F.relu(self.fc1(x)) 83 | x = F.relu(self.fc2(x)) 84 | 85 | x = self.fc3(x) 86 | 87 | iden = Variable(torch.eye(3)).view(1, -1).repeat(batchsize, 1) 88 | if x.is_cuda: 89 | device = torch.device('cuda:%d' % x.get_device()) 90 | iden = iden.to(device=device) 91 | x = x + iden 92 | x = x.view(-1, 3, 3) 93 | 94 | return x 95 | 96 | 97 | class PointNetfeat(nn.Module): 98 | def __init__(self, num_points=2500, global_feat=True): 99 | super(PointNetfeat, self).__init__() 100 | self.stn = STN3d(num_points=num_points) 101 | self.conv1 = torch.nn.Conv1d(4, 64, 1) 102 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 103 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 104 | self.bn1 = nn.BatchNorm1d(64) 105 | self.bn2 = nn.BatchNorm1d(128) 106 | self.bn3 = nn.BatchNorm1d(1024) 107 | self.mp1 = torch.nn.MaxPool1d(num_points) 108 | self.num_points = num_points 109 | self.global_feat = global_feat 110 | 111 | def forward(self, x): 112 | trans = self.stn(x) 113 | x = torch.cat([torch.bmm(trans, x[:, :3, :]), x[:, 3, :].unsqueeze(1)], dim=1) 114 | 115 | if x.shape[0] > 1: 116 | x = F.relu(self.bn1(self.conv1(x))) 117 | pointfeat = x 118 | x = F.relu(self.bn2(self.conv2(x))) 119 | x = self.bn3(self.conv3(x)) 120 | else: 121 | x = F.relu(self.conv1(x)) 122 | pointfeat = x 123 | x = F.relu(self.conv2(x)) 124 | x = self.conv3(x) 125 | 126 | x = self.mp1(x) 127 | x = x.view(-1, 1024) 128 | 129 | if self.global_feat: 130 | return x, trans 131 | else: 132 | x = x.view(-1, 1024, 1).repeat(1, 1, self.num_points) 133 | return torch.cat([x, pointfeat], 1), trans 134 | 135 | 136 | class end_layer(nn.Module): 137 | def __init__(self, in_channels=1024, out_channels=1): 138 | super(end_layer, self).__init__() 139 | self.fc1 = nn.Linear(in_channels, 512) 140 | self.fc2 = nn.Linear(512, 256) 141 | self.fc3 = nn.Linear(256, out_channels) 142 | self.bn1 = nn.BatchNorm1d(512) 143 | self.bn2 = nn.BatchNorm1d(256) 144 | 145 | self.apply(weights_init) 146 | 147 | def forward(self, x): 148 | if x.size()[0] == 1: 149 | x = F.relu(self.fc1(x)) 150 | x = F.relu(self.fc2(x)) 151 | else: 152 | x = F.relu(self.bn1(self.fc1(x))) 153 | x = F.relu(self.bn2(self.fc2(x))) 154 | return self.fc3(x) 155 | 156 | 157 | class PointNetActorCritic(nn.Module): 158 | def __init__(self, num_points=2500, num_actions=4): 159 | super(PointNetActorCritic, self).__init__() 160 | self.num_points = num_points 161 | self.feat = PointNetfeat(num_points, global_feat=True) 162 | 163 | self.lstm = nn.LSTMCell(1024, 1024) 164 | 165 | self.critic_linear = end_layer(in_channels=1024, out_channels=1) 166 | self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions) 167 | 168 | self.apply(weights_init) 169 | self.train() 170 | 171 | def forward(self, inputs): 172 | x, (hx, cx) = inputs 173 | x, _ = self.feat(x) 174 | hx, cx = self.lstm(x, (hx, cx)) 175 | x = hx 176 | 177 | return self.critic_linear(x), self.actor_linear(x), (hx, cx) 178 | 179 | 180 | if __name__ == '__main__': 181 | sim_data = Variable(torch.rand(10, 4, 2500)) 182 | 183 | # trans = STN3d() 184 | # out = trans(sim_data) 185 | # print('stn', out.size()) 186 | 187 | # pointfeat = PointNetfeat(global_feat=True) 188 | # out, _ = pointfeat(sim_data) 189 | # print('global feat', out.size()) 190 | 191 | # pointfeat = PointNetfeat(global_feat=False) 192 | # out, _ = pointfeat(sim_data) 193 | # print('point feat', out.size()) 194 | 195 | cls = PointNetActorCritic(num_actions=4) 196 | hx, cx = Variable(torch.zeros(10, 1024)), Variable(torch.zeros(10, 1024)) 197 | if torch.cuda.is_available(): 198 | sim_data = sim_data.cuda() 199 | cls = cls.cuda() 200 | hx, cx = hx.cuda(), cx.cuda() 201 | v, q, (hx ,cx) = cls((sim_data, (hx, cx))) 202 | print(v.shape, q.shape, hx.shape, cx.shape) 203 | print(v) 204 | print(q) 205 | -------------------------------------------------------------------------------- /viewpoint_optim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy1997/Transferable-Active-Grasping/a826889bcdc466a59696e7d65f024a6c8237f6ed/viewpoint_optim/__init__.py -------------------------------------------------------------------------------- /viewpoint_optim/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNSync 2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 3 | from .misc import GlobalAvgPool2d 4 | from .residual import IdentityResidualBlock 5 | from .dense import DenseModule 6 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/bn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | try: 6 | from queue import Queue 7 | except ImportError: 8 | from Queue import Queue 9 | 10 | from .functions import * 11 | 12 | 13 | class ABN(nn.Module): 14 | """Activated Batch Normalization 15 | 16 | This gathers a `BatchNorm2d` and an activation function in a single module 17 | """ 18 | 19 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 20 | """Creates an Activated Batch Normalization module 21 | 22 | Parameters 23 | ---------- 24 | num_features : int 25 | Number of feature channels in the input and output. 26 | eps : float 27 | Small constant to prevent numerical issues. 28 | momentum : float 29 | Momentum factor applied to compute running statistics as. 30 | affine : bool 31 | If `True` apply learned scale and shift transformation after normalization. 32 | activation : str 33 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 34 | slope : float 35 | Negative slope for the `leaky_relu` activation. 36 | """ 37 | super(ABN, self).__init__() 38 | self.num_features = num_features 39 | self.affine = affine 40 | self.eps = eps 41 | self.momentum = momentum 42 | self.activation = activation 43 | self.slope = slope 44 | if self.affine: 45 | self.weight = nn.Parameter(torch.ones(num_features)) 46 | self.bias = nn.Parameter(torch.zeros(num_features)) 47 | else: 48 | self.register_parameter('weight', None) 49 | self.register_parameter('bias', None) 50 | self.register_buffer('running_mean', torch.zeros(num_features)) 51 | self.register_buffer('running_var', torch.ones(num_features)) 52 | self.reset_parameters() 53 | 54 | def reset_parameters(self): 55 | nn.init.constant_(self.running_mean, 0) 56 | nn.init.constant_(self.running_var, 1) 57 | if self.affine: 58 | nn.init.constant_(self.weight, 1) 59 | nn.init.constant_(self.bias, 0) 60 | 61 | def forward(self, x): 62 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, 63 | self.training, self.momentum, self.eps) 64 | 65 | if self.activation == ACT_RELU: 66 | return functional.relu(x, inplace=True) 67 | elif self.activation == ACT_LEAKY_RELU: 68 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) 69 | elif self.activation == ACT_ELU: 70 | return functional.elu(x, inplace=True) 71 | else: 72 | return x 73 | 74 | def __repr__(self): 75 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 76 | ' affine={affine}, activation={activation}' 77 | if self.activation == "leaky_relu": 78 | rep += ', slope={slope})' 79 | else: 80 | rep += ')' 81 | return rep.format(name=self.__class__.__name__, **self.__dict__) 82 | 83 | 84 | class InPlaceABN(ABN): 85 | """InPlace Activated Batch Normalization""" 86 | 87 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 88 | """Creates an InPlace Activated Batch Normalization module 89 | 90 | Parameters 91 | ---------- 92 | num_features : int 93 | Number of feature channels in the input and output. 94 | eps : float 95 | Small constant to prevent numerical issues. 96 | momentum : float 97 | Momentum factor applied to compute running statistics as. 98 | affine : bool 99 | If `True` apply learned scale and shift transformation after normalization. 100 | activation : str 101 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 102 | slope : float 103 | Negative slope for the `leaky_relu` activation. 104 | """ 105 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) 106 | 107 | def forward(self, x): 108 | return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, 109 | self.training, self.momentum, self.eps, self.activation, self.slope) 110 | 111 | 112 | class InPlaceABNSync(ABN): 113 | """InPlace Activated Batch Normalization with cross-GPU synchronization 114 | 115 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`. 116 | """ 117 | 118 | def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", 119 | slope=0.01): 120 | """Creates a synchronized, InPlace Activated Batch Normalization module 121 | 122 | Parameters 123 | ---------- 124 | num_features : int 125 | Number of feature channels in the input and output. 126 | devices : list of int or None 127 | IDs of the GPUs that will run the replicas of this module. 128 | eps : float 129 | Small constant to prevent numerical issues. 130 | momentum : float 131 | Momentum factor applied to compute running statistics as. 132 | affine : bool 133 | If `True` apply learned scale and shift transformation after normalization. 134 | activation : str 135 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 136 | slope : float 137 | Negative slope for the `leaky_relu` activation. 138 | """ 139 | super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope) 140 | self.devices = devices if devices else list(range(torch.cuda.device_count())) 141 | 142 | # Initialize queues 143 | self.worker_ids = self.devices[1:] 144 | self.master_queue = Queue(len(self.worker_ids)) 145 | self.worker_queues = [Queue(1) for _ in self.worker_ids] 146 | 147 | def forward(self, x): 148 | if x.get_device() == self.devices[0]: 149 | # Master mode 150 | extra = { 151 | "is_master": True, 152 | "master_queue": self.master_queue, 153 | "worker_queues": self.worker_queues, 154 | "worker_ids": self.worker_ids 155 | } 156 | else: 157 | # Worker mode 158 | extra = { 159 | "is_master": False, 160 | "master_queue": self.master_queue, 161 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] 162 | } 163 | 164 | return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, 165 | extra, self.training, self.momentum, self.eps, self.activation, self.slope) 166 | 167 | def __repr__(self): 168 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 169 | ' affine={affine}, devices={devices}, activation={activation}' 170 | if self.activation == "leaky_relu": 171 | rep += ', slope={slope})' 172 | else: 173 | rep += ')' 174 | return rep.format(name=self.__class__.__name__, **self.__dict__) 175 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/deeplab.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | from models._util import try_index 6 | from .bn import ABN 7 | 8 | 9 | class DeeplabV3(nn.Module): 10 | def __init__(self, 11 | in_channels, 12 | out_channels, 13 | hidden_channels=256, 14 | dilations=(12, 24, 36), 15 | norm_act=ABN, 16 | pooling_size=None): 17 | super(DeeplabV3, self).__init__() 18 | self.pooling_size = pooling_size 19 | 20 | self.map_convs = nn.ModuleList([ 21 | nn.Conv2d(in_channels, hidden_channels, 1, bias=False), 22 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]), 23 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]), 24 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2]) 25 | ]) 26 | self.map_bn = norm_act(hidden_channels * 4) 27 | 28 | self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False) 29 | self.global_pooling_bn = norm_act(hidden_channels) 30 | 31 | self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False) 32 | self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False) 33 | self.red_bn = norm_act(out_channels) 34 | 35 | self.reset_parameters(self.map_bn.activation, self.map_bn.slope) 36 | 37 | def reset_parameters(self, activation, slope): 38 | gain = nn.init.calculate_gain(activation, slope) 39 | for m in self.modules(): 40 | if isinstance(m, nn.Conv2d): 41 | nn.init.xavier_normal_(m.weight.data, gain) 42 | if hasattr(m, "bias") and m.bias is not None: 43 | nn.init.constant_(m.bias, 0) 44 | elif isinstance(m, ABN): 45 | if hasattr(m, "weight") and m.weight is not None: 46 | nn.init.constant_(m.weight, 1) 47 | if hasattr(m, "bias") and m.bias is not None: 48 | nn.init.constant_(m.bias, 0) 49 | 50 | def forward(self, x): 51 | # Map convolutions 52 | out = torch.cat([m(x) for m in self.map_convs], dim=1) 53 | out = self.map_bn(out) 54 | out = self.red_conv(out) 55 | 56 | # Global pooling 57 | pool = self._global_pooling(x) 58 | pool = self.global_pooling_conv(pool) 59 | pool = self.global_pooling_bn(pool) 60 | pool = self.pool_red_conv(pool) 61 | if self.training or self.pooling_size is None: 62 | pool = pool.repeat(1, 1, x.size(2), x.size(3)) 63 | 64 | out += pool 65 | out = self.red_bn(out) 66 | return out 67 | 68 | def _global_pooling(self, x): 69 | if self.training or self.pooling_size is None: 70 | pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1) 71 | pool = pool.view(x.size(0), x.size(1), 1, 1) 72 | else: 73 | pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]), 74 | min(try_index(self.pooling_size, 1), x.shape[3])) 75 | padding = ( 76 | (pooling_size[1] - 1) // 2, 77 | (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1, 78 | (pooling_size[0] - 1) // 2, 79 | (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1 80 | ) 81 | 82 | pool = functional.avg_pool2d(x, pooling_size, stride=1) 83 | pool = functional.pad(pool, pad=padding, mode="replicate") 84 | return pool 85 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/dense.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .bn import ABN 7 | 8 | 9 | class DenseModule(nn.Module): 10 | def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): 11 | super(DenseModule, self).__init__() 12 | self.in_channels = in_channels 13 | self.growth = growth 14 | self.layers = layers 15 | 16 | self.convs1 = nn.ModuleList() 17 | self.convs3 = nn.ModuleList() 18 | for i in range(self.layers): 19 | self.convs1.append(nn.Sequential(OrderedDict([ 20 | ("bn", norm_act(in_channels)), 21 | ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) 22 | ]))) 23 | self.convs3.append(nn.Sequential(OrderedDict([ 24 | ("bn", norm_act(self.growth * bottleneck_factor)), 25 | ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, 26 | dilation=dilation)) 27 | ]))) 28 | in_channels += self.growth 29 | 30 | @property 31 | def out_channels(self): 32 | return self.in_channels + self.growth * self.layers 33 | 34 | def forward(self, x): 35 | inputs = [x] 36 | for i in range(self.layers): 37 | x = torch.cat(inputs, dim=1) 38 | x = self.convs1[i](x) 39 | x = self.convs3[i](x) 40 | inputs += [x] 41 | 42 | return torch.cat(inputs, dim=1) 43 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/functions.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | import torch.autograd as autograd 4 | import torch.cuda.comm as comm 5 | from torch.autograd.function import once_differentiable 6 | from torch.utils.cpp_extension import load 7 | 8 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src") 9 | _backend = load(name="inplace_abn", 10 | extra_cflags=["-O3"], 11 | sources=[path.join(_src_path, f) for f in [ 12 | "inplace_abn.cpp", 13 | "inplace_abn_cpu.cpp", 14 | "inplace_abn_cuda.cu" 15 | ]], 16 | extra_cuda_cflags=["--expt-extended-lambda"]) 17 | 18 | # Activation names 19 | ACT_RELU = "relu" 20 | ACT_LEAKY_RELU = "leaky_relu" 21 | ACT_ELU = "elu" 22 | ACT_NONE = "none" 23 | 24 | 25 | def _check(fn, *args, **kwargs): 26 | success = fn(*args, **kwargs) 27 | if not success: 28 | raise RuntimeError("CUDA Error encountered in {}".format(fn)) 29 | 30 | 31 | def _broadcast_shape(x): 32 | out_size = [] 33 | for i, s in enumerate(x.size()): 34 | if i != 1: 35 | out_size.append(1) 36 | else: 37 | out_size.append(s) 38 | return out_size 39 | 40 | 41 | def _reduce(x): 42 | if len(x.size()) == 2: 43 | return x.sum(dim=0) 44 | else: 45 | n, c = x.size()[0:2] 46 | return x.contiguous().view((n, c, -1)).sum(2).sum(0) 47 | 48 | 49 | def _count_samples(x): 50 | count = 1 51 | for i, s in enumerate(x.size()): 52 | if i != 1: 53 | count *= s 54 | return count 55 | 56 | 57 | def _act_forward(ctx, x): 58 | if ctx.activation == ACT_LEAKY_RELU: 59 | _backend.leaky_relu_forward(x, ctx.slope) 60 | elif ctx.activation == ACT_ELU: 61 | _backend.elu_forward(x) 62 | elif ctx.activation == ACT_NONE: 63 | pass 64 | 65 | 66 | def _act_backward(ctx, x, dx): 67 | if ctx.activation == ACT_LEAKY_RELU: 68 | _backend.leaky_relu_backward(x, dx, ctx.slope) 69 | elif ctx.activation == ACT_ELU: 70 | _backend.elu_backward(x, dx) 71 | elif ctx.activation == ACT_NONE: 72 | pass 73 | 74 | 75 | class InPlaceABN(autograd.Function): 76 | @staticmethod 77 | def forward(ctx, x, weight, bias, running_mean, running_var, 78 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 79 | # Save context 80 | ctx.training = training 81 | ctx.momentum = momentum 82 | ctx.eps = eps 83 | ctx.activation = activation 84 | ctx.slope = slope 85 | ctx.affine = weight is not None and bias is not None 86 | 87 | # Prepare inputs 88 | count = _count_samples(x) 89 | x = x.contiguous() 90 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 91 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 92 | 93 | if ctx.training: 94 | mean, var = _backend.mean_var(x) 95 | 96 | # Update running stats 97 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 98 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 99 | 100 | # Mark in-place modified tensors 101 | ctx.mark_dirty(x, running_mean, running_var) 102 | else: 103 | mean, var = running_mean.contiguous(), running_var.contiguous() 104 | ctx.mark_dirty(x) 105 | 106 | # BN forward + activation 107 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 108 | _act_forward(ctx, x) 109 | 110 | # Output 111 | ctx.var = var 112 | ctx.save_for_backward(x, var, weight, bias) 113 | return x 114 | 115 | @staticmethod 116 | @once_differentiable 117 | def backward(ctx, dz): 118 | z, var, weight, bias = ctx.saved_tensors 119 | dz = dz.contiguous() 120 | 121 | # Undo activation 122 | _act_backward(ctx, z, dz) 123 | 124 | if ctx.training: 125 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 126 | else: 127 | # TODO: implement simplified CUDA backward for inference mode 128 | edz = dz.new_zeros(dz.size(1)) 129 | eydz = dz.new_zeros(dz.size(1)) 130 | 131 | dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 132 | dweight = dweight if ctx.affine else None 133 | dbias = dbias if ctx.affine else None 134 | 135 | return dx, dweight, dbias, None, None, None, None, None, None, None 136 | 137 | 138 | class InPlaceABNSync(autograd.Function): 139 | @classmethod 140 | def forward(cls, ctx, x, weight, bias, running_mean, running_var, 141 | extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 142 | # Save context 143 | cls._parse_extra(ctx, extra) 144 | ctx.training = training 145 | ctx.momentum = momentum 146 | ctx.eps = eps 147 | ctx.activation = activation 148 | ctx.slope = slope 149 | ctx.affine = weight is not None and bias is not None 150 | 151 | # Prepare inputs 152 | count = _count_samples(x) * (ctx.master_queue.maxsize + 1) 153 | x = x.contiguous() 154 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 155 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 156 | 157 | if ctx.training: 158 | mean, var = _backend.mean_var(x) 159 | 160 | if ctx.is_master: 161 | means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)] 162 | for _ in range(ctx.master_queue.maxsize): 163 | mean_w, var_w = ctx.master_queue.get() 164 | ctx.master_queue.task_done() 165 | means.append(mean_w.unsqueeze(0)) 166 | vars.append(var_w.unsqueeze(0)) 167 | 168 | means = comm.gather(means) 169 | vars = comm.gather(vars) 170 | 171 | mean = means.mean(0) 172 | var = (vars + (mean - means) ** 2).mean(0) 173 | 174 | tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids) 175 | for ts, queue in zip(tensors[1:], ctx.worker_queues): 176 | queue.put(ts) 177 | else: 178 | ctx.master_queue.put((mean, var)) 179 | mean, var = ctx.worker_queue.get() 180 | ctx.worker_queue.task_done() 181 | 182 | # Update running stats 183 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 184 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 185 | 186 | # Mark in-place modified tensors 187 | ctx.mark_dirty(x, running_mean, running_var) 188 | else: 189 | mean, var = running_mean.contiguous(), running_var.contiguous() 190 | ctx.mark_dirty(x) 191 | 192 | # BN forward + activation 193 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 194 | _act_forward(ctx, x) 195 | 196 | # Output 197 | ctx.var = var 198 | ctx.save_for_backward(x, var, weight, bias) 199 | return x 200 | 201 | @staticmethod 202 | @once_differentiable 203 | def backward(ctx, dz): 204 | z, var, weight, bias = ctx.saved_tensors 205 | dz = dz.contiguous() 206 | 207 | # Undo activation 208 | _act_backward(ctx, z, dz) 209 | 210 | if ctx.training: 211 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 212 | 213 | if ctx.is_master: 214 | edzs, eydzs = [edz], [eydz] 215 | for _ in range(len(ctx.worker_queues)): 216 | edz_w, eydz_w = ctx.master_queue.get() 217 | ctx.master_queue.task_done() 218 | edzs.append(edz_w) 219 | eydzs.append(eydz_w) 220 | 221 | edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1) 222 | eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1) 223 | 224 | tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids) 225 | for ts, queue in zip(tensors[1:], ctx.worker_queues): 226 | queue.put(ts) 227 | else: 228 | ctx.master_queue.put((edz, eydz)) 229 | edz, eydz = ctx.worker_queue.get() 230 | ctx.worker_queue.task_done() 231 | else: 232 | edz = dz.new_zeros(dz.size(1)) 233 | eydz = dz.new_zeros(dz.size(1)) 234 | 235 | dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 236 | dweight = dweight if ctx.affine else None 237 | dbias = dbias if ctx.affine else None 238 | 239 | return dx, dweight, dbias, None, None, None, None, None, None, None, None 240 | 241 | @staticmethod 242 | def _parse_extra(ctx, extra): 243 | ctx.is_master = extra["is_master"] 244 | if ctx.is_master: 245 | ctx.master_queue = extra["master_queue"] 246 | ctx.worker_queues = extra["worker_queues"] 247 | ctx.worker_ids = extra["worker_ids"] 248 | else: 249 | ctx.master_queue = extra["master_queue"] 250 | ctx.worker_queue = extra["worker_queue"] 251 | 252 | 253 | inplace_abn = InPlaceABN.apply 254 | inplace_abn_sync = InPlaceABNSync.apply 255 | 256 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] 257 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/misc.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class GlobalAvgPool2d(nn.Module): 5 | def __init__(self): 6 | """Global average pooling over the input's spatial dimensions""" 7 | super(GlobalAvgPool2d, self).__init__() 8 | 9 | def forward(self, inputs): 10 | in_size = inputs.size() 11 | return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) 12 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/residual.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.nn as nn 4 | 5 | from .bn import ABN 6 | 7 | 8 | class IdentityResidualBlock(nn.Module): 9 | def __init__(self, 10 | in_channels, 11 | channels, 12 | stride=1, 13 | dilation=1, 14 | groups=1, 15 | norm_act=ABN, 16 | dropout=None): 17 | """Configurable identity-mapping residual block 18 | 19 | Parameters 20 | ---------- 21 | in_channels : int 22 | Number of input channels. 23 | channels : list of int 24 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct 25 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then 26 | `3 x 3` then `1 x 1` convolutions. 27 | stride : int 28 | Stride of the first `3 x 3` convolution 29 | dilation : int 30 | Dilation to apply to the `3 x 3` convolutions. 31 | groups : int 32 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with 33 | bottleneck blocks. 34 | norm_act : callable 35 | Function to create normalization / activation Module. 36 | dropout: callable 37 | Function to create Dropout Module. 38 | """ 39 | super(IdentityResidualBlock, self).__init__() 40 | 41 | # Check parameters for inconsistencies 42 | if len(channels) != 2 and len(channels) != 3: 43 | raise ValueError("channels must contain either two or three values") 44 | if len(channels) == 2 and groups != 1: 45 | raise ValueError("groups > 1 are only valid if len(channels) == 3") 46 | 47 | is_bottleneck = len(channels) == 3 48 | need_proj_conv = stride != 1 or in_channels != channels[-1] 49 | 50 | self.bn1 = norm_act(in_channels) 51 | if not is_bottleneck: 52 | layers = [ 53 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, 54 | dilation=dilation)), 55 | ("bn2", norm_act(channels[0])), 56 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 57 | dilation=dilation)) 58 | ] 59 | if dropout is not None: 60 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:] 61 | else: 62 | layers = [ 63 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), 64 | ("bn2", norm_act(channels[0])), 65 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 66 | groups=groups, dilation=dilation)), 67 | ("bn3", norm_act(channels[1])), 68 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) 69 | ] 70 | if dropout is not None: 71 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:] 72 | self.convs = nn.Sequential(OrderedDict(layers)) 73 | 74 | if need_proj_conv: 75 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) 76 | 77 | def forward(self, x): 78 | if hasattr(self, "proj_conv"): 79 | bn1 = self.bn1(x) 80 | shortcut = self.proj_conv(bn1) 81 | else: 82 | shortcut = x.clone() 83 | bn1 = self.bn1(x) 84 | 85 | out = self.convs(bn1) 86 | out.add_(shortcut) 87 | 88 | return out 89 | -------------------------------------------------------------------------------- /viewpoint_optim/modules/src/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | * General settings 7 | */ 8 | const int WARP_SIZE = 32; 9 | const int MAX_BLOCK_SIZE = 512; 10 | 11 | template 12 | struct Pair { 13 | T v1, v2; 14 | __device__ Pair() {} 15 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} 16 | __device__ Pair(T v) : v1(v), v2(v) {} 17 | __device__ Pair(int v) : v1(v), v2(v) {} 18 | __device__ Pair &operator+=(const Pair &a) { 19 | v1 += a.v1; 20 | v2 += a.v2; 21 | return *this; 22 | } 23 | }; 24 | 25 | /* 26 | * Utility functions 27 | */ 28 | template 29 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 30 | unsigned int mask = 0xffffffff) { 31 | #if CUDART_VERSION >= 9000 32 | return __shfl_xor_sync(mask, value, laneMask, width); 33 | #else 34 | return __shfl_xor(value, laneMask, width); 35 | #endif 36 | } 37 | 38 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 39 | 40 | static int getNumThreads(int nElem) { 41 | int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE}; 42 | for (int i = 0; i != 5; ++i) { 43 | if (nElem <= threadSizes[i]) { 44 | return threadSizes[i]; 45 | } 46 | } 47 | return MAX_BLOCK_SIZE; 48 | } 49 | 50 | template 51 | static __device__ __forceinline__ T warpSum(T val) { 52 | #if __CUDA_ARCH__ >= 300 53 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 54 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 55 | } 56 | #else 57 | __shared__ T values[MAX_BLOCK_SIZE]; 58 | values[threadIdx.x] = val; 59 | __threadfence_block(); 60 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 61 | for (int i = 1; i < WARP_SIZE; i++) { 62 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 63 | } 64 | #endif 65 | return val; 66 | } 67 | 68 | template 69 | static __device__ __forceinline__ Pair warpSum(Pair value) { 70 | value.v1 = warpSum(value.v1); 71 | value.v2 = warpSum(value.v2); 72 | return value; 73 | } 74 | 75 | template 76 | __device__ T reduce(Op op, int plane, int N, int C, int S) { 77 | T sum = (T)0; 78 | for (int batch = 0; batch < N; ++batch) { 79 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 80 | sum += op(batch, plane, x); 81 | } 82 | } 83 | 84 | // sum over NumThreads within a warp 85 | sum = warpSum(sum); 86 | 87 | // 'transpose', and reduce within warp again 88 | __shared__ T shared[32]; 89 | __syncthreads(); 90 | if (threadIdx.x % WARP_SIZE == 0) { 91 | shared[threadIdx.x / WARP_SIZE] = sum; 92 | } 93 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 94 | // zero out the other entries in shared 95 | shared[threadIdx.x] = (T)0; 96 | } 97 | __syncthreads(); 98 | if (threadIdx.x / WARP_SIZE == 0) { 99 | sum = warpSum(shared[threadIdx.x]); 100 | if (threadIdx.x == 0) { 101 | shared[0] = sum; 102 | } 103 | } 104 | __syncthreads(); 105 | 106 | // Everyone picks it up, should be broadcast into the whole gradInput 107 | return shared[0]; 108 | } -------------------------------------------------------------------------------- /viewpoint_optim/modules/src/inplace_abn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) { 8 | if (x.is_cuda()) { 9 | return mean_var_cuda(x); 10 | } else { 11 | return mean_var_cpu(x); 12 | } 13 | } 14 | 15 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps) { 17 | if (x.is_cuda()) { 18 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 19 | } else { 20 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 21 | } 22 | } 23 | 24 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 25 | bool affine, float eps) { 26 | if (z.is_cuda()) { 27 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 28 | } else { 29 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 30 | } 31 | } 32 | 33 | std::vector backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 34 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 35 | if (z.is_cuda()) { 36 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 37 | } else { 38 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 39 | } 40 | } 41 | 42 | void leaky_relu_forward(at::Tensor z, float slope) { 43 | at::leaky_relu_(z, slope); 44 | } 45 | 46 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { 47 | if (z.is_cuda()) { 48 | return leaky_relu_backward_cuda(z, dz, slope); 49 | } else { 50 | return leaky_relu_backward_cpu(z, dz, slope); 51 | } 52 | } 53 | 54 | void elu_forward(at::Tensor z) { 55 | at::elu_(z); 56 | } 57 | 58 | void elu_backward(at::Tensor z, at::Tensor dz) { 59 | if (z.is_cuda()) { 60 | return elu_backward_cuda(z, dz); 61 | } else { 62 | return elu_backward_cpu(z, dz); 63 | } 64 | } 65 | 66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 67 | m.def("mean_var", &mean_var, "Mean and variance computation"); 68 | m.def("forward", &forward, "In-place forward computation"); 69 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 70 | m.def("backward", &backward, "Second part of backward computation"); 71 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 72 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 73 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 74 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 75 | } -------------------------------------------------------------------------------- /viewpoint_optim/modules/src/inplace_abn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | std::vector mean_var_cpu(at::Tensor x); 8 | std::vector mean_var_cuda(at::Tensor x); 9 | 10 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 11 | bool affine, float eps); 12 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 13 | bool affine, float eps); 14 | 15 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps); 17 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 18 | bool affine, float eps); 19 | 20 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 21 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 22 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 23 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 24 | 25 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); 26 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); 27 | 28 | void elu_backward_cpu(at::Tensor z, at::Tensor dz); 29 | void elu_backward_cuda(at::Tensor z, at::Tensor dz); -------------------------------------------------------------------------------- /viewpoint_optim/modules/src/inplace_abn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | at::Tensor reduce_sum(at::Tensor x) { 8 | if (x.ndimension() == 2) { 9 | return x.sum(0); 10 | } else { 11 | auto x_view = x.view({x.size(0), x.size(1), -1}); 12 | return x_view.sum(-1).sum(0); 13 | } 14 | } 15 | 16 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 17 | if (x.ndimension() == 2) { 18 | return v; 19 | } else { 20 | std::vector broadcast_size = {1, -1}; 21 | for (int64_t i = 2; i < x.ndimension(); ++i) 22 | broadcast_size.push_back(1); 23 | 24 | return v.view(broadcast_size); 25 | } 26 | } 27 | 28 | int64_t count(at::Tensor x) { 29 | int64_t count = x.size(0); 30 | for (int64_t i = 2; i < x.ndimension(); ++i) 31 | count *= x.size(i); 32 | 33 | return count; 34 | } 35 | 36 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { 37 | if (affine) { 38 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); 39 | } else { 40 | return z; 41 | } 42 | } 43 | 44 | std::vector mean_var_cpu(at::Tensor x) { 45 | auto num = count(x); 46 | auto mean = reduce_sum(x) / num; 47 | auto diff = x - broadcast_to(mean, x); 48 | auto var = reduce_sum(diff.pow(2)) / num; 49 | 50 | return {mean, var}; 51 | } 52 | 53 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 54 | bool affine, float eps) { 55 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); 56 | auto mul = at::rsqrt(var + eps) * gamma; 57 | 58 | x.sub_(broadcast_to(mean, x)); 59 | x.mul_(broadcast_to(mul, x)); 60 | if (affine) x.add_(broadcast_to(bias, x)); 61 | 62 | return x; 63 | } 64 | 65 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 66 | bool affine, float eps) { 67 | auto edz = reduce_sum(dz); 68 | auto y = invert_affine(z, weight, bias, affine, eps); 69 | auto eydz = reduce_sum(y * dz); 70 | 71 | return {edz, eydz}; 72 | } 73 | 74 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 75 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 76 | auto y = invert_affine(z, weight, bias, affine, eps); 77 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); 78 | 79 | auto num = count(z); 80 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); 81 | 82 | auto dweight = at::empty(z.type(), {0}); 83 | auto dbias = at::empty(z.type(), {0}); 84 | if (affine) { 85 | dweight = eydz * at::sign(weight); 86 | dbias = edz; 87 | } 88 | 89 | return {dx, dweight, dbias}; 90 | } 91 | 92 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { 93 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { 94 | int64_t count = z.numel(); 95 | auto *_z = z.data(); 96 | auto *_dz = dz.data(); 97 | 98 | for (int64_t i = 0; i < count; ++i) { 99 | if (_z[i] < 0) { 100 | _z[i] *= 1 / slope; 101 | _dz[i] *= slope; 102 | } 103 | } 104 | })); 105 | } 106 | 107 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) { 108 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { 109 | int64_t count = z.numel(); 110 | auto *_z = z.data(); 111 | auto *_dz = dz.data(); 112 | 113 | for (int64_t i = 0; i < count; ++i) { 114 | if (_z[i] < 0) { 115 | _z[i] = log1p(_z[i]); 116 | _dz[i] *= (_z[i] + 1.f); 117 | } 118 | } 119 | })); 120 | } -------------------------------------------------------------------------------- /viewpoint_optim/modules/src/inplace_abn_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "common.h" 9 | #include "inplace_abn.h" 10 | 11 | // Checks 12 | #ifndef AT_CHECK 13 | #define AT_CHECK AT_ASSERT 14 | #endif 15 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 16 | #define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous") 17 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 18 | 19 | // Utilities 20 | void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { 21 | num = x.size(0); 22 | chn = x.size(1); 23 | sp = 1; 24 | for (int64_t i = 2; i < x.ndimension(); ++i) 25 | sp *= x.size(i); 26 | } 27 | 28 | // Operations for reduce 29 | template 30 | struct SumOp { 31 | __device__ SumOp(const T *t, int c, int s) 32 | : tensor(t), chn(c), sp(s) {} 33 | __device__ __forceinline__ T operator()(int batch, int plane, int n) { 34 | return tensor[(batch * chn + plane) * sp + n]; 35 | } 36 | const T *tensor; 37 | const int chn; 38 | const int sp; 39 | }; 40 | 41 | template 42 | struct VarOp { 43 | __device__ VarOp(T m, const T *t, int c, int s) 44 | : mean(m), tensor(t), chn(c), sp(s) {} 45 | __device__ __forceinline__ T operator()(int batch, int plane, int n) { 46 | T val = tensor[(batch * chn + plane) * sp + n]; 47 | return (val - mean) * (val - mean); 48 | } 49 | const T mean; 50 | const T *tensor; 51 | const int chn; 52 | const int sp; 53 | }; 54 | 55 | template 56 | struct GradOp { 57 | __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s) 58 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} 59 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { 60 | T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight; 61 | T _dz = dz[(batch * chn + plane) * sp + n]; 62 | return Pair(_dz, _y * _dz); 63 | } 64 | const T weight; 65 | const T bias; 66 | const T *z; 67 | const T *dz; 68 | const int chn; 69 | const int sp; 70 | }; 71 | 72 | /*********** 73 | * mean_var 74 | ***********/ 75 | 76 | template 77 | __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) { 78 | int plane = blockIdx.x; 79 | T norm = T(1) / T(num * sp); 80 | 81 | T _mean = reduce>(SumOp(x, chn, sp), plane, num, chn, sp) * norm; 82 | __syncthreads(); 83 | T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, chn, sp) * norm; 84 | 85 | if (threadIdx.x == 0) { 86 | mean[plane] = _mean; 87 | var[plane] = _var; 88 | } 89 | } 90 | 91 | std::vector mean_var_cuda(at::Tensor x) { 92 | CHECK_INPUT(x); 93 | 94 | // Extract dimensions 95 | int64_t num, chn, sp; 96 | get_dims(x, num, chn, sp); 97 | 98 | // Prepare output tensors 99 | auto mean = at::empty({chn}, x.options()); 100 | auto var = at::empty({chn}, x.options()); 101 | 102 | // Run kernel 103 | dim3 blocks(chn); 104 | dim3 threads(getNumThreads(sp)); 105 | AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] { 106 | mean_var_kernel<<>>( 107 | x.data(), 108 | mean.data(), 109 | var.data(), 110 | num, chn, sp); 111 | })); 112 | 113 | return {mean, var}; 114 | } 115 | 116 | /********** 117 | * forward 118 | **********/ 119 | 120 | template 121 | __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias, 122 | bool affine, float eps, int num, int chn, int sp) { 123 | int plane = blockIdx.x; 124 | 125 | T _mean = mean[plane]; 126 | T _var = var[plane]; 127 | T _weight = affine ? abs(weight[plane]) + eps : T(1); 128 | T _bias = affine ? bias[plane] : T(0); 129 | 130 | T mul = rsqrt(_var + eps) * _weight; 131 | 132 | for (int batch = 0; batch < num; ++batch) { 133 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 134 | T _x = x[(batch * chn + plane) * sp + n]; 135 | T _y = (_x - _mean) * mul + _bias; 136 | 137 | x[(batch * chn + plane) * sp + n] = _y; 138 | } 139 | } 140 | } 141 | 142 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 143 | bool affine, float eps) { 144 | CHECK_INPUT(x); 145 | CHECK_INPUT(mean); 146 | CHECK_INPUT(var); 147 | CHECK_INPUT(weight); 148 | CHECK_INPUT(bias); 149 | 150 | // Extract dimensions 151 | int64_t num, chn, sp; 152 | get_dims(x, num, chn, sp); 153 | 154 | // Run kernel 155 | dim3 blocks(chn); 156 | dim3 threads(getNumThreads(sp)); 157 | AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] { 158 | forward_kernel<<>>( 159 | x.data(), 160 | mean.data(), 161 | var.data(), 162 | weight.data(), 163 | bias.data(), 164 | affine, eps, num, chn, sp); 165 | })); 166 | 167 | return x; 168 | } 169 | 170 | /*********** 171 | * edz_eydz 172 | ***********/ 173 | 174 | template 175 | __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias, 176 | T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) { 177 | int plane = blockIdx.x; 178 | 179 | T _weight = affine ? abs(weight[plane]) + eps : 1.f; 180 | T _bias = affine ? bias[plane] : 0.f; 181 | 182 | Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, chn, sp); 183 | __syncthreads(); 184 | 185 | if (threadIdx.x == 0) { 186 | edz[plane] = res.v1; 187 | eydz[plane] = res.v2; 188 | } 189 | } 190 | 191 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 192 | bool affine, float eps) { 193 | CHECK_INPUT(z); 194 | CHECK_INPUT(dz); 195 | CHECK_INPUT(weight); 196 | CHECK_INPUT(bias); 197 | 198 | // Extract dimensions 199 | int64_t num, chn, sp; 200 | get_dims(z, num, chn, sp); 201 | 202 | auto edz = at::empty({chn}, z.options()); 203 | auto eydz = at::empty({chn}, z.options()); 204 | 205 | // Run kernel 206 | dim3 blocks(chn); 207 | dim3 threads(getNumThreads(sp)); 208 | AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] { 209 | edz_eydz_kernel<<>>( 210 | z.data(), 211 | dz.data(), 212 | weight.data(), 213 | bias.data(), 214 | edz.data(), 215 | eydz.data(), 216 | affine, eps, num, chn, sp); 217 | })); 218 | 219 | return {edz, eydz}; 220 | } 221 | 222 | /*********** 223 | * backward 224 | ***********/ 225 | 226 | template 227 | __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz, 228 | const T *eydz, T *dx, T *dweight, T *dbias, 229 | bool affine, float eps, int num, int chn, int sp) { 230 | int plane = blockIdx.x; 231 | 232 | T _weight = affine ? abs(weight[plane]) + eps : 1.f; 233 | T _bias = affine ? bias[plane] : 0.f; 234 | T _var = var[plane]; 235 | T _edz = edz[plane]; 236 | T _eydz = eydz[plane]; 237 | 238 | T _mul = _weight * rsqrt(_var + eps); 239 | T count = T(num * sp); 240 | 241 | for (int batch = 0; batch < num; ++batch) { 242 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 243 | T _dz = dz[(batch * chn + plane) * sp + n]; 244 | T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight; 245 | 246 | dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul; 247 | } 248 | } 249 | 250 | if (threadIdx.x == 0) { 251 | if (affine) { 252 | dweight[plane] = weight[plane] > 0 ? _eydz : -_eydz; 253 | dbias[plane] = _edz; 254 | } 255 | } 256 | } 257 | 258 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 259 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 260 | CHECK_INPUT(z); 261 | CHECK_INPUT(dz); 262 | CHECK_INPUT(var); 263 | CHECK_INPUT(weight); 264 | CHECK_INPUT(bias); 265 | CHECK_INPUT(edz); 266 | CHECK_INPUT(eydz); 267 | 268 | // Extract dimensions 269 | int64_t num, chn, sp; 270 | get_dims(z, num, chn, sp); 271 | 272 | auto dx = at::zeros_like(z); 273 | auto dweight = at::zeros_like(weight); 274 | auto dbias = at::zeros_like(bias); 275 | 276 | // Run kernel 277 | dim3 blocks(chn); 278 | dim3 threads(getNumThreads(sp)); 279 | AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] { 280 | backward_kernel<<>>( 281 | z.data(), 282 | dz.data(), 283 | var.data(), 284 | weight.data(), 285 | bias.data(), 286 | edz.data(), 287 | eydz.data(), 288 | dx.data(), 289 | dweight.data(), 290 | dbias.data(), 291 | affine, eps, num, chn, sp); 292 | })); 293 | 294 | return {dx, dweight, dbias}; 295 | } 296 | 297 | /************** 298 | * activations 299 | **************/ 300 | 301 | template 302 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { 303 | // Create thrust pointers 304 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 305 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 306 | 307 | thrust::transform_if(th_dz, th_dz + count, th_z, th_dz, 308 | [slope] __device__ (const T& dz) { return dz * slope; }, 309 | [] __device__ (const T& z) { return z < 0; }); 310 | thrust::transform_if(th_z, th_z + count, th_z, 311 | [slope] __device__ (const T& z) { return z / slope; }, 312 | [] __device__ (const T& z) { return z < 0; }); 313 | } 314 | 315 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) { 316 | CHECK_INPUT(z); 317 | CHECK_INPUT(dz); 318 | 319 | int64_t count = z.numel(); 320 | 321 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { 322 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count); 323 | })); 324 | } 325 | 326 | template 327 | inline void elu_backward_impl(T *z, T *dz, int64_t count) { 328 | // Create thrust pointers 329 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 330 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 331 | 332 | thrust::transform_if(th_dz, th_dz + count, th_z, th_z, th_dz, 333 | [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); }, 334 | [] __device__ (const T& z) { return z < 0; }); 335 | thrust::transform_if(th_z, th_z + count, th_z, 336 | [] __device__ (const T& z) { return log1p(z); }, 337 | [] __device__ (const T& z) { return z < 0; }); 338 | } 339 | 340 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) { 341 | CHECK_INPUT(z); 342 | CHECK_INPUT(dz); 343 | 344 | int64_t count = z.numel(); 345 | 346 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { 347 | elu_backward_impl(z.data(), dz.data(), count); 348 | })); 349 | } 350 | -------------------------------------------------------------------------------- /viewpoint_optim/segmodel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def flip(x, dim): 7 | indices = [slice(None)] * x.dim() 8 | indices[dim] = torch.arange(x.size(dim) - 1, -1, -1, 9 | dtype=torch.long, device=x.device) 10 | return x[tuple(indices)] 11 | 12 | 13 | class SegmentationModule(nn.Module): 14 | _IGNORE_INDEX = 255 15 | 16 | class _MeanFusion: 17 | def __init__(self, x, classes): 18 | self.buffer = x.new_zeros(x.size(0), classes, x.size(2), x.size(3)) 19 | self.counter = 0 20 | 21 | def update(self, sem_logits): 22 | probs = F.softmax(sem_logits, dim=1) 23 | self.counter += 1 24 | self.buffer.add_((probs - self.buffer) / self.counter) 25 | 26 | def output(self): 27 | probs, cls = self.buffer.max(1) 28 | return probs, cls 29 | 30 | class _VotingFusion: 31 | def __init__(self, x, classes): 32 | self.votes = x.new_zeros(x.size(0), classes, x.size(2), x.size(3)) 33 | self.probs = x.new_zeros(x.size(0), classes, x.size(2), x.size(3)) 34 | 35 | def update(self, sem_logits): 36 | probs = F.softmax(sem_logits, dim=1) 37 | probs, cls = probs.max(1, keepdim=True) 38 | 39 | self.votes.scatter_add_(1, cls, self.votes.new_ones(cls.size())) 40 | self.probs.scatter_add_(1, cls, probs) 41 | 42 | def output(self): 43 | cls, idx = self.votes.max(1, keepdim=True) 44 | probs = self.probs / self.votes.clamp(min=1) 45 | probs = probs.gather(1, idx) 46 | return probs.squeeze(1), cls.squeeze(1) 47 | 48 | class _MaxFusion: 49 | def __init__(self, x, _): 50 | self.buffer_cls = x.new_zeros(x.size(0), x.size(2), x.size(3), dtype=torch.long) 51 | self.buffer_prob = x.new_zeros(x.size(0), x.size(2), x.size(3)) 52 | 53 | def update(self, sem_logits): 54 | probs = F.softmax(sem_logits, dim=1) 55 | max_prob, max_cls = probs.max(1) 56 | 57 | replace_idx = max_prob > self.buffer_prob 58 | self.buffer_cls[replace_idx] = max_cls[replace_idx] 59 | self.buffer_prob[replace_idx] = max_prob[replace_idx] 60 | 61 | def output(self): 62 | return self.buffer_prob, self.buffer_cls 63 | 64 | def __init__(self, model_dict, head_channels, classes, depth_fusion, vote_mode="plain", vote_scales=[0.7, 1.2]): 65 | super(SegmentationModule, self).__init__() 66 | self.depth_fusion = depth_fusion 67 | self.vote_mode = vote_mode 68 | self.vote_scales = vote_scales 69 | self.body = model_dict['body'] 70 | if depth_fusion == 'feature-concat': 71 | self.depth_body = model_dict['depth_body'] 72 | self.head = model_dict['head'] 73 | self.cls = nn.Conv2d(head_channels, classes, 1) 74 | 75 | self.classes = classes 76 | if not self.train: 77 | if "mean" in self.vote_mode: 78 | self.fusion_cls = SegmentationModule._MeanFusion 79 | elif "voting" in self.vote_mode: 80 | self.fusion_cls = SegmentationModule._VotingFusion 81 | elif "max" in self.vote_mode: 82 | self.fusion_cls = SegmentationModule._MaxFusion 83 | 84 | def _forward(self, x, depth): 85 | img_shape = x.shape[-2:] 86 | if self.depth_fusion == 'pixel-concat': 87 | x = torch.cat([x, depth], dim=1) 88 | x = self.body(x) 89 | if self.depth_fusion == 'feature-concat': 90 | depth = self.depth_body(depth) 91 | x = torch.cat([x, depth], dim=1) 92 | x = self.head(x) 93 | x = self.cls(x) 94 | x = F.interpolate(x, size=img_shape, mode='bilinear', align_corners=True) 95 | return x 96 | 97 | def forward(self, x, depth): 98 | if self.train or self.vote_mode == 'plain': 99 | return self._forward(x, depth) 100 | else: 101 | # Prepare data_dict 102 | feed_dict = [{"x": x, "depth": depth}] 103 | feed_scales = [1] 104 | for scale in self.vote_scales: 105 | scaled_size = [round(s * scale) for s in x.shape[-2:]] 106 | feed_dict.append( 107 | { 108 | "x": F.interpolate(x, size=scaled_size, mode="bilinear"), 109 | "depth": F.interpolate(depth, size=scaled_size, mode="bilinear") 110 | }) 111 | feed_scales.append(scale) 112 | if "flip" in self.vote_mode: 113 | for i in range(len(feed_scales)): 114 | feed_dict.append( 115 | { 116 | "x": flip(feed_dict[i]["x"], -1), 117 | "depth": flip(feed_dict[i]["depth"], -1) 118 | }) 119 | feed_scales.append(-feed_scales[i]) 120 | 121 | fusion = self.fusion_cls(x, self.classes) 122 | for i in range(len(feed_scales)): 123 | sem_logits = self._forward(x, **feed_dict[i]) 124 | if feed_scales[i] < 0: 125 | sem_logits = flip(sem_logits, -1) 126 | if abs(feed_scales[i]) != 1: 127 | sem_logits = F.interpolate(sem_logits, size=x.shape[-2:], mode="bilinear") 128 | fusion.update(sem_logits) 129 | 130 | return fusion.output() 131 | -------------------------------------------------------------------------------- /viewpoint_optim/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from functools import partial 5 | 6 | 7 | def load_snapshot(snapshot_file, depth_fusion): 8 | """Load a training snapshot""" 9 | print("--- Loading model from snapshot") 10 | 11 | # Create network 12 | norm_act = partial(InPlaceABN, activation="leaky_relu", slope=.01) 13 | model_dict = dict() 14 | if depth_fusion == 'no-depth': 15 | model_dict['body'] = models.__dict__["net_wider_resnet38_a2"]( 16 | norm_act=norm_act, 17 | dilation=(1, 2, 4, 4) 18 | ) 19 | elif depth_fusion == 'pixel-concat': 20 | model_dict['body'] = models.__dict__["net_wider_resnet38_a2"]( 21 | norm_act=norm_act, 22 | dilation=(1, 2, 4, 4), 23 | channels_in=4 24 | ) 25 | elif depth_fusion == 'feature-concat': 26 | model_dict['body'] = models.__dict__["net_wider_resnet38_a2"]( 27 | norm_act=norm_act, 28 | dilation=(1, 2, 4, 4) 29 | ) 30 | model_dict['depth_body'] = models.__dict__["net_wider_resnet38_a2"]( 31 | norm_act=norm_act, 32 | dilation=(1, 2, 4, 4), 33 | channels_in=1 34 | ) 35 | if depth_fusion == 'feature-concat': 36 | model_dict['head'] = DeeplabV3(8192, 256, 256, norm_act=norm_act, pooling_size=(84, 84)) 37 | else: 38 | model_dict['head'] = DeeplabV3(4096, 256, 256, norm_act=norm_act, pooling_size=(84, 84)) 39 | 40 | # Load snapshot and recover network state 41 | data = torch.load(snapshot_file) 42 | if depth_fusion == 'feature-concat' or depth_fusion == 'no_depth': 43 | model_dict['body'].load_state_dict(data["state_dict"]["body"]) 44 | 45 | if depth_fusion == 'pixel-concat' or depth_fusion == 'no_depth': 46 | model_dict['head'].load_state_dict(data["state_dict"]["head"]) 47 | 48 | return model_dict 49 | 50 | 51 | def setup_logger(logger_name, log_file, level=logging.INFO, verbose=False): 52 | l = logging.getLogger(logger_name) 53 | formatter = logging.Formatter('%(asctime)s : %(message)s') 54 | fileHandler = logging.FileHandler(log_file, mode='w') 55 | fileHandler.setFormatter(formatter) 56 | 57 | l.setLevel(level) 58 | l.addHandler(fileHandler) 59 | 60 | if verbose: 61 | streamHandler = logging.StreamHandler() 62 | streamHandler.setFormatter(formatter) 63 | l.addHandler(streamHandler) 64 | return l 65 | 66 | 67 | if __name__ == '__main__': 68 | pass --------------------------------------------------------------------------------