├── .gitignore
├── LICENSE
├── README.md
└── viewpoint_optim
    ├── IL
        ├── evaluate.py
        ├── expert_traj.py
        ├── main.py
        └── pointnet.py
    ├── LICENSE
    ├── README.md
    ├── RL_CNN
        ├── cnn.py
        ├── environment.py
        └── main.py
    ├── RL_pointnet
        ├── environment.py
        ├── evaluate.py
        ├── main.py
        └── pointnet.py
    ├── __init__.py
    ├── modules
        ├── __init__.py
        ├── bn.py
        ├── deeplab.py
        ├── dense.py
        ├── functions.py
        ├── misc.py
        ├── residual.py
        └── src
        │   ├── common.h
        │   ├── inplace_abn.cpp
        │   ├── inplace_abn.h
        │   ├── inplace_abn_cpu.cpp
        │   └── inplace_abn_cuda.cu
    ├── segmodel.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .idea/
  6 | *.zip
  7 | shapenetcore_partanno_segmentation_benchmark_v0/*
  8 | logs/
  9 | trained_models/
 10 | a2c/models
 11 | *.png
 12 | *.jpg
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .env
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Xiangyu Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Transferable-Active-Grasping
2 | Transferable Active Grasping and Real Embodied Dataset (ICRA 2020)
3 | 
4 | We will release the code and dataset soon.
5 | 


--------------------------------------------------------------------------------
/viewpoint_optim/IL/evaluate.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import argparse
  5 | import os
  6 | import sys
  7 | import time
  8 | 
  9 | import torch
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | from torch.autograd import Variable
 13 | 
 14 | from environment import ActiveAgent
 15 | from pointnet import *
 16 | from utils import setup_logger
 17 | 
 18 | 
 19 | class CB(nn.Module):
 20 |     def __init__(self, num_points=3000, output=5):
 21 |         super(CB, self).__init__()
 22 |         self.num_points = num_points
 23 |         self.feat = PointNetfeat(num_points, global_feat=True)
 24 |         self.fc = end_layer(in_channels=1024, out_channels=128)
 25 | 
 26 |         self.fc11 = nn.Linear(128, 64)
 27 |         self.fc12 = nn.Linear(64, 32)
 28 |         self.fc13 = nn.Linear(32, 16)
 29 |         self.fc1 = nn.Linear(16, 8)
 30 |         self.fc2 = nn.Linear(8, output)
 31 | 
 32 |         self.apply(weights_init)
 33 |         self.train()
 34 | 
 35 |     def forward(self, x):
 36 |         x, _ = self.feat(x)
 37 |         x = F.relu(self.fc(x))
 38 |         x = F.relu(self.fc11(x))
 39 |         x = F.relu(self.fc12(x))
 40 |         x = F.relu(self.fc13(x))
 41 |         x = F.relu(self.fc1(x))
 42 |         x = self.fc2(x)
 43 | 
 44 |         return x
 45 | 
 46 | 
 47 | parser = argparse.ArgumentParser(description='A2C')
 48 | parser.add_argument('--lr', type=float, default=0.0001,
 49 |                     help='learning rate (default: 0.0001)')
 50 | parser.add_argument('--hidden-size', type=int, default=1024,
 51 |                     help='Hidden size for LSTM')
 52 | parser.add_argument('--gamma', type=float, default=0.99,
 53 |                     help='discount factor for rewards (default: 0.99)')
 54 | parser.add_argument('--tau', type=float, default=1.00,
 55 |                     help='parameter for GAE (default: 1.00)')
 56 | parser.add_argument('--entropy-coef', type=float, default=0.01,
 57 |                     help='entropy term coefficient (default: 0.01)')
 58 | parser.add_argument('--value-loss-coef', type=float, default=0.5,
 59 |                     help='value loss coefficient (default: 0.5)')
 60 | parser.add_argument('--max-grad-norm', type=float, default=20,
 61 |                     help='value loss coefficient (default: 50)')
 62 | parser.add_argument('--seed', type=int, default=456,
 63 |                     help='random seed (default: 1)')
 64 | parser.add_argument('--num-steps', type=int, default=20,
 65 |                     help='number of forward steps in A2C (default: 20)')
 66 | parser.add_argument('--max-episode-length', type=int, default=50,
 67 |                     help='maximum length of an episode (default: 1000000)')
 68 | parser.add_argument('--env-name', default='PointNetActorCritic',
 69 |                     help='environment to train on')
 70 | parser.add_argument('--no-shared', default=False,
 71 |                     help='use an optimizer without shared momentum.')
 72 | parser.add_argument('--n-points', type=int, default=3000,
 73 |                     help='the number of points feed to pointnet')
 74 | parser.add_argument('--log-dir', type=str, default='logs',
 75 |                     help='Folder to save logs')
 76 | parser.add_argument('--model-dir', type=str, default='trained_models',
 77 |                         help='Folder to save models')
 78 | parser.add_argument('--data-dir', type=str, default='data',
 79 |                         help='Folder to IORD')
 80 | parser.add_argument('--resume', default=True,
 81 |                     help='resume latest model or not')
 82 | parser.add_argument('--num-actions', type=int, default=5,
 83 |                     help='discrete action space')
 84 | parser.add_argument('--num-test', type=int, default=50,
 85 |                     help='test time')
 86 | parser.add_argument('--min', type=bool, default=True,
 87 |                     help='use min-vis or not')
 88 | 
 89 | # segmentation settings
 90 | parser.add_argument("--depth-fusion", type=str, default='no-depth',
 91 |                     choices=['no-depth', 'pixel-concat', 'feature-concat'])
 92 | parser.add_argument("--vote-mode", metavar="NAME",
 93 |                     type=str, choices=["plain", "mean", "voting", "max",
 94 |                     "mean+flip", "voting+flip", "max+flip"], default="mean")
 95 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2])
 96 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"],
 97 |                     default="class",
 98 |                     help="How the output files are formatted."
 99 |                          " -- palette: color coded predictions"
100 |                          " -- raw: gray-scale predictions"
101 |                          " -- prob: gray-scale predictions plus probabilities")
102 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load")
103 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model")
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     args = parser.parse_args()
108 |     if not os.path.isdir(args.log_dir):
109 |         os.makedirs(args.log_dir)
110 | 
111 |     torch.manual_seed(args.seed)
112 |     np.random.seed(args.seed)
113 | 
114 |     model = CB()
115 |     model = model.cuda()
116 |     env = ActiveAgent(idx=0, n_points=args.n_points, 
117 |         seg_args=args, mode='sim', mode='semantic')
118 |     env.seed(args.seed)
119 | 
120 |     # resume latest model
121 |     if args.resume:
122 |         model_path = os.path.join(args.model_dir, 'latest.pth')
123 |         if not os.path.isdir(args.model_dir):
124 |             os.makedirs(args.model_dir)
125 |         elif os.path.exists(model_path):
126 |             print('Loading model from %s...' % model_path)
127 |             model.load_state_dict(torch.load(model_path))
128 | 
129 |     itr = 0
130 |     epoch = 0
131 |     training_time = 50
132 |     # train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt'))
133 |     # test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt'))
134 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
135 | 
136 |     # test parameters
137 |     all_success_time = 0
138 |     all_time = 0
139 |     ep_success_time = 0
140 |     success_phase = 0.1
141 |     check_flag = False
142 | 
143 |     for _ in range(5):
144 |         epoch += 1
145 |         ################### testing phase ###################
146 |         model = model.eval()
147 | 
148 |         state, _ = env.reset(min_vis=args.min)
149 |         state = Variable(torch.from_numpy(state).unsqueeze(0))
150 |         if torch.cuda.is_available():
151 |             state = state.cuda()
152 |         reward_sum = 0
153 |         done = True
154 | 
155 |         episode_length = 0
156 |         testing = True
157 |         while testing:
158 |             episode_length += 1
159 | 
160 |             with torch.no_grad():
161 |                 logit = model(state)
162 |             prob = F.softmax(logit, dim=1)
163 |             action = prob.max(1, keepdim=True)[1].data.cpu().numpy()
164 | 
165 |             # path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0])
166 |             # test_logger.info(path_info)
167 | 
168 |             state, reward, done = env.step(action[0, 0])
169 |             reward_sum += reward
170 | 
171 |             if done:
172 |                 # print('testing: ', all_time)
173 |                 success = env.end_flag
174 |                 all_success_time += success
175 |                 ep_success_time += success
176 |                 all_time += 1
177 |                 if all_time % args.num_test == 0:
178 |                     check_flag = True
179 | 
180 |                 state, _ = env.reset(min_vis=args.min)
181 | 
182 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
183 |             if torch.cuda.is_available():
184 |                 state = state.cuda()
185 | 
186 |             if check_flag:
187 |                 all_success_rate = all_success_time / all_time
188 |                 log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.4f, ALL Success: %0.4f' \
189 |                             % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate)
190 |                 # test_logger.info(log_info)
191 |                 print(log_info)
192 | 
193 |                 reward_sum = 0
194 |                 episode_length = 0
195 |                 ep_success_time = 0
196 |                 check_flag = False
197 |                 testing = False
198 | 
199 |                 time.sleep(1)


--------------------------------------------------------------------------------
/viewpoint_optim/IL/expert_traj.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import argparse
  3 | 
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | 
  8 | import os
  9 | import sys
 10 | 
 11 | # for PCD
 12 | # sys.path.append('..')
 13 | # from environment import ActiveAgent
 14 | 
 15 | # for CNN
 16 | sys.path.append('../a2cCNN')
 17 | from environment import ActiveAgent
 18 | 
 19 | from pointnet import PointNetActorCritic
 20 | from utils import setup_logger
 21 | 
 22 | 
 23 | def init_parser():
 24 |     parser = argparse.ArgumentParser(description='expert_traj')
 25 | 
 26 |     parser.add_argument('--model-dir', type=str, default='trained_models',
 27 |                     help='Folder to expert models')
 28 |     parser.add_argument('--mode', type=str, default='pointnet',
 29 |                     help='Feature extraction mode')
 30 | 
 31 |     args = parser.parse_args()
 32 |     return args
 33 | 
 34 | 
 35 | def collect_expert_traj(model_path='latest.pth', mode='pointnet'):
 36 |     hidden_size = 1024
 37 |     n_traj = 100
 38 | 
 39 |     env = ActiveAgent(idx=666, n_points=3000)
 40 |     env.seed(456)
 41 |     logger = setup_logger('test', 'logs/expert_traj.txt')
 42 |     expert_traj = []
 43 |     traj = []
 44 | 
 45 |     model = PointNetActorCritic(num_points=env.n_points, num_actions=env.n_actions)
 46 |     model.load_state_dict(torch.load(model_path))
 47 |     model.eval()
 48 |     if torch.cuda.is_available():
 49 |         model = model.cuda()
 50 | 
 51 |     state, _ = env.reset(verbose=True)
 52 |     state = Variable(torch.from_numpy(state).unsqueeze(0))
 53 |     if torch.cuda.is_available():
 54 |         state = state.cuda()
 55 |     reward_sum = 0
 56 |     done = True
 57 |     episode_length = 0
 58 | 
 59 |     while True:
 60 |         episode_length += 1
 61 |         # Sync with the shared model
 62 |         if done:
 63 |             with torch.no_grad():
 64 |                 cx = torch.zeros(1, hidden_size)
 65 |                 hx = torch.zeros(1, hidden_size)
 66 |         else:
 67 |             with torch.no_grad():
 68 |                 cx = cx.data
 69 |                 hx = hx.data
 70 |         if torch.cuda.is_available():
 71 |             hx = hx.cuda()
 72 |             cx = cx.cuda()
 73 | 
 74 |         with torch.no_grad():
 75 |             value, logit, (hx, cx) = model((state, (hx, cx)))
 76 |         prob = F.softmax(logit, dim=1)
 77 |         action = prob.max(1, keepdim=True)[1].data.cpu().numpy()
 78 | 
 79 |         path_info = '%s %s %s %d' % (env.target_group, env.target_scene, env.coord, action[0, 0])
 80 |         logger.info(path_info)
 81 |         print(path_info)
 82 |         traj.append((state.data.cpu().numpy(), action))
 83 | 
 84 |         state, reward, done = env.step(action[0, 0])
 85 |         reward_sum += reward
 86 | 
 87 |         if done:
 88 |             success = env.end_flag
 89 |             state, _ = env.reset()
 90 | 
 91 |             # collect an expert trajectory
 92 |             if success and episode_length <= 20:
 93 |                 log_info = 'Traj %d, episode_length %d, reward %0.2f' \
 94 |                             % (n_traj, episode_length, reward_sum)
 95 |                 logger.info(log_info)
 96 |                 print(log_info)
 97 |                 expert_traj.append(traj)
 98 |                 n_traj -= 1
 99 | 
100 |             traj = []
101 |             episode_length = 0
102 |             reward_sum = 0
103 | 
104 |         state = Variable(torch.from_numpy(state).unsqueeze(0))
105 |         if torch.cuda.is_available():
106 |             state = state.cuda()
107 | 
108 |         if not n_traj:
109 |             break
110 | 
111 |     # save expert trajectory
112 |     with open('expert_traj_%s.pkl' % mode, 'wb') as f:
113 |         pickle.dump(expert_traj, f)
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     args = init_parser()
118 |     collect_expert_traj(model_path=args.model_dir, mode=args.mode)


--------------------------------------------------------------------------------
/viewpoint_optim/IL/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | from torch.autograd import Variable
  7 | 
  8 | import argparse
  9 | import os
 10 | import pickle
 11 | import random
 12 | import sys
 13 | sys.path.append('/media/sdc/seg3d/T-Pointnet')
 14 | sys.path.append('/media/sdc/seg3d/T-Pointnet/a2c')
 15 | 
 16 | from pointnet import *
 17 | from environment import ActiveAgent
 18 | from utils import setup_logger
 19 | 
 20 | 
 21 | class CB(nn.Module):
 22 |     def __init__(self, num_points=3000, output=5):
 23 |         super(CB, self).__init__()
 24 |         self.num_points = num_points
 25 |         self.feat = PointNetfeat(num_points, global_feat=True)
 26 |         self.fc = end_layer(in_channels=1024, out_channels=128)
 27 | 
 28 |         self.fc11 = nn.Linear(128, 64)
 29 |         self.fc12 = nn.Linear(64, 32)
 30 |         self.fc13 = nn.Linear(32, 16)
 31 |         self.fc1 = nn.Linear(16, 8)
 32 |         self.fc2 = nn.Linear(8, output)
 33 | 
 34 |         self.apply(weights_init)
 35 |         self.train()
 36 | 
 37 |     def forward(self, x):
 38 |         x, _ = self.feat(x)
 39 |         x = F.relu(self.fc(x))
 40 |         x = F.relu(self.fc11(x))
 41 |         x = F.relu(self.fc12(x))
 42 |         x = F.relu(self.fc13(x))
 43 |         x = F.relu(self.fc1(x))
 44 |         x = self.fc2(x)
 45 | 
 46 |         return x
 47 | 
 48 | 
 49 | class CNNCB(nn.Module):
 50 |     def __init__(self, output=5):
 51 |         super(CNNCB, self).__init__()
 52 |         self.feat = CNNfeat()
 53 |         self.fc = end_layer(in_channels=1024, out_channels=128)
 54 | 
 55 |         self.fc11 = nn.Linear(128, 64)
 56 |         self.fc12 = nn.Linear(64, 32)
 57 |         self.fc13 = nn.Linear(32, 16)
 58 |         self.fc1 = nn.Linear(16, 8)
 59 |         self.fc2 = nn.Linear(8, output)
 60 | 
 61 |         self.apply(weights_init)
 62 |         self.train()
 63 | 
 64 |     def forward(self, x):
 65 |         x = self.feat(x)
 66 |         x = F.relu(self.fc(x))
 67 |         x = F.relu(self.fc11(x))
 68 |         x = F.relu(self.fc12(x))
 69 |         x = F.relu(self.fc13(x))
 70 |         x = F.relu(self.fc1(x))
 71 |         x = self.fc2(x)
 72 | 
 73 |         return x
 74 | 
 75 | 
 76 | def para_setting():
 77 |     # Training settings
 78 |     parser = argparse.ArgumentParser(description='A2C')
 79 |     parser.add_argument('--lr', type=float, default=0.0001,
 80 |                         help='learning rate (default: 0.0001)')
 81 |     parser.add_argument('--hidden-size', type=int, default=1024,
 82 |                         help='Hidden size for LSTM')
 83 |     parser.add_argument('--gamma', type=float, default=0.99,
 84 |                         help='discount factor for rewards (default: 0.99)')
 85 |     parser.add_argument('--tau', type=float, default=1.00,
 86 |                         help='parameter for GAE (default: 1.00)')
 87 |     parser.add_argument('--entropy-coef', type=float, default=0.01,
 88 |                         help='entropy term coefficient (default: 0.01)')
 89 |     parser.add_argument('--value-loss-coef', type=float, default=0.5,
 90 |                         help='value loss coefficient (default: 0.5)')
 91 |     parser.add_argument('--max-grad-norm', type=float, default=50,
 92 |                         help='value loss coefficient (default: 50)')
 93 |     parser.add_argument('--seed', type=int, default=456,
 94 |                         help='random seed (default: 1)')
 95 |     parser.add_argument('--num-steps', type=int, default=20,
 96 |                         help='number of forward steps in A2C (default: 20)')
 97 |     parser.add_argument('--max-episode-length', type=int, default=20,
 98 |                         help='maximum length of an episode (default: 1000000)')
 99 |     parser.add_argument('--env-name', default='PointNetActorCritic',
100 |                         help='environment to train on')
101 |     parser.add_argument('--no-shared', default=False,
102 |                         help='use an optimizer without shared momentum.')
103 |     parser.add_argument('--n-points', type=int, default=3000,
104 |                         help='the number of points feed to pointnet')
105 |     parser.add_argument('--log-dir', type=str, default='logs',
106 |                         help='Folder to save logs')
107 |     parser.add_argument('--model-dir', type=str, default='trained_models',
108 |                         help='Folder to save models')
109 |     parser.add_argument('--data-dir', type=str, default='data',
110 |                         help='Folder to IORD')
111 |     parser.add_argument('--resume', default=False,
112 |                         help='resume latest model or not')
113 |     parser.add_argument('--num-actions', type=int, default=5,
114 |                         help='discrete action space')
115 |     parser.add_argument('--num-test', type=int, default=10,
116 |                         help='test time')
117 |     parser.add_argument('--feat-archi', type=bool, default='cnn',
118 |                         help='Feature extraction mode (pointnet or cnn)')
119 | 
120 |     # segmentation settings
121 |     parser.add_argument("--depth-fusion", type=str, default='no-depth',
122 |                         choices=['no-depth', 'pixel-concat', 'feature-concat'])
123 |     parser.add_argument("--vote-mode", metavar="NAME",
124 |                         type=str, choices=["plain", "mean", "voting", "max",
125 |                         "mean+flip", "voting+flip", "max+flip"], default="mean")
126 |     parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2])
127 |     parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"],
128 |                         default="class",
129 |                         help="How the output files are formatted."
130 |                              " -- palette: color coded predictions"
131 |                              " -- raw: gray-scale predictions"
132 |                              " -- prob: gray-scale predictions plus probabilities")
133 |     parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load")
134 |     parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model")
135 | 
136 |     return parser
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     parser = para_setting()
141 |     seg_args = init_seg_parser()
142 |     if not os.path.isdir(args.log_dir):
143 |         os.makedirs(args.log_dir)
144 | 
145 |     torch.manual_seed(args.seed)
146 |     np.random.seed(args.seed)
147 | 
148 |     if args.feat_archi == 'pointnet':
149 |         model = CB()
150 |     elif args.feat_archi == 'cnn':
151 |         model = CNNCB()
152 |     criterion = nn.CrossEntropyLoss()
153 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
154 | 
155 |     model = model.cuda()
156 |     criterion = criterion.cuda()
157 | 
158 |     env = ActiveAgent(idx=0, n_points=args.n_points, 
159 |         seg_args=args, mode='sim', root_path=args.data_dir)
160 |     env.seed(args.seed)
161 | 
162 |     # resume latest model
163 |     if args.resume:
164 |         model_path = os.path.join(args.model_dir, 'latest.pth')
165 |         if not os.path.isdir(args.model_dir):
166 |             os.makedirs(args.model_dir)
167 |         elif os.path.exists(model_path):
168 |             print('Loading model from %s...' % model_path)
169 |             model.load_state_dict(torch.load(model_path))
170 | 
171 |     # expert
172 |     with open('expert_traj.pkl', 'rb') as f:
173 |         expert_traj_all = pickle.load(f)
174 | 
175 |     itr = 0
176 |     epoch = 0
177 |     training_time = 50
178 |     train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt'))
179 |     test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt'))
180 | 
181 |     # test parameters
182 |     all_success_time = 0
183 |     all_time = 0
184 |     ep_success_time = 0
185 |     success_phase = 0.1
186 |     check_flag = False
187 | 
188 |     while True:
189 |         epoch += 1
190 |         ################### training phase ###################
191 |         model = model.train()
192 |         for train_itr in range(training_time):
193 |             expert_traj = random.choice(expert_traj_all)
194 | 
195 |             grasp_expert = random.random()
196 |             if grasp_expert < 0.3:
197 |                 expert_s, expert_a = expert_traj[-1]
198 |             else:
199 |                 expert_s, expert_a = random.choice(expert_traj)
200 |             expert_a = expert_a.squeeze(1)
201 | 
202 |             # expert_s = np.array([x[0] for x in expert_traj]).squeeze(1)
203 |             # expert_a = np.array([x[1] for x in expert_traj]).squeeze(1).squeeze(1)
204 | 
205 |             logit = model(torch.from_numpy(expert_s).cuda())
206 |             itr += 1
207 | 
208 |             loss = criterion(logit, torch.from_numpy(expert_a).cuda())
209 |             print('behaviour cloning loss: ', loss.data.cpu().numpy())
210 |             train_logger.info('behaviour cloning loss: ' + str(loss.data.cpu().numpy()))
211 |             optimizer.zero_grad()
212 |             loss.backward()
213 |             optimizer.step()
214 | 
215 |         ################### testing phase ###################
216 |         model = model.eval()
217 | 
218 |         state, _ = env.reset()
219 |         state = Variable(torch.from_numpy(state).unsqueeze(0))
220 |         if torch.cuda.is_available():
221 |             state = state.cuda()
222 |         reward_sum = 0
223 |         done = True
224 | 
225 |         episode_length = 0
226 |         testing = True
227 |         while testing:
228 |             episode_length += 1
229 | 
230 |             with torch.no_grad():
231 |                 logit = model(state)
232 |             prob = F.softmax(logit, dim=1)
233 |             action = prob.max(1, keepdim=True)[1].data.cpu().numpy()
234 | 
235 |             # path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0])
236 |             # test_logger.info(path_info)
237 | 
238 |             state, reward, done = env.step(action[0, 0])
239 |             reward_sum += reward
240 | 
241 |             if done:
242 |                 success = env.end_flag
243 |                 all_success_time += success
244 |                 ep_success_time += success
245 |                 all_time += 1
246 |                 if all_time % args.num_test == 0:
247 |                     check_flag = True
248 | 
249 |                 state, _ = env.reset()
250 | 
251 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
252 |             if torch.cuda.is_available():
253 |                 state = state.cuda()
254 | 
255 |             if check_flag:
256 |                 all_success_rate = all_success_time / all_time
257 |                 log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.2f, ALL Success: %0.3f' \
258 |                             % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate)
259 |                 test_logger.info(log_info)
260 |                 print(log_info)
261 |                 torch.save(model.state_dict(), os.path.join(args.model_dir, 'latest.pth'))
262 | 
263 |                 # save models in some important phases
264 |                 if all_success_rate > success_phase:
265 |                     torch.save(model.state_dict(),
266 |                                os.path.join(args.model_dir, 'success_rate_%0.2f.pth' % success_phase))
267 |                     success_phase += 0.1
268 | 
269 |                 # save models according to steps
270 |                 if epoch % 20 == 0:
271 |                     torch.save(model.state_dict(),
272 |                                os.path.join(args.model_dir, 'model_%d.pth' % epoch))
273 | 
274 |                 reward_sum = 0
275 |                 episode_length = 0
276 |                 ep_success_time = 0
277 |                 check_flag = False
278 |                 testing = False


--------------------------------------------------------------------------------
/viewpoint_optim/IL/pointnet.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.parallel
  7 | import torch.backends.cudnn as cudnn
  8 | import torch.optim as optim
  9 | import torch.utils.data
 10 | import torchvision.transforms as transforms
 11 | import torchvision.utils as vutils
 12 | from torch.autograd import Variable
 13 | from PIL import Image
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | import torch.nn.functional as F
 17 | 
 18 | 
 19 | def weights_init(m):
 20 |     classname = m.__class__.__name__
 21 |     if classname.find('Conv') != -1:
 22 |         weight_shape = list(m.weight.data.size())
 23 |         fan_in = np.prod(weight_shape[1: 4])
 24 |         fan_out = np.prod(weight_shape[2: 4]) * weight_shape[0]
 25 |         w_bound = np.sqrt(6. / (fan_in + fan_out))
 26 |         m.weight.data.uniform_(-w_bound, w_bound)
 27 |         m.bias.data.fill_(0.0)
 28 |     elif classname.find('Linear') != -1:
 29 |         weight_shape = list(m.weight.data.size())
 30 |         fan_in = weight_shape[1]
 31 |         fan_out = weight_shape[0]
 32 |         w_bound = np.sqrt(6.0 / (fan_in + fan_out))
 33 |         m.weight.data.uniform_(-w_bound, w_bound)
 34 |         m.bias.data.fill_(0.0)
 35 |     elif classname.find('BatchNorm') != -1:
 36 |         m.weight.data.fill_(1.0)
 37 |         m.bias.data.fill_(0.0)
 38 |     elif classname.find('LSTMCell') != -1:
 39 |         m.bias_ih.data.fill_(0.0)
 40 |         m.bias_hh.data.fill_(0.0)
 41 | 
 42 | 
 43 | class STN3d(nn.Module):
 44 |     def __init__(self, num_points = 2500):
 45 |         super(STN3d, self).__init__()
 46 |         self.num_points = num_points
 47 |         self.conv1 = torch.nn.Conv1d(4, 64, 1)
 48 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
 49 |         self.conv3 = torch.nn.Conv1d(128, 1024, 1)
 50 |         self.mp1 = torch.nn.MaxPool1d(num_points)
 51 |         self.fc1 = nn.Linear(1024, 512)
 52 |         self.fc2 = nn.Linear(512, 256)
 53 |         self.fc3 = nn.Linear(256, 9)
 54 |         self.relu = nn.ReLU()
 55 | 
 56 |         self.bn1 = nn.BatchNorm1d(64)
 57 |         self.bn2 = nn.BatchNorm1d(128)
 58 |         self.bn3 = nn.BatchNorm1d(1024)
 59 |         self.bn4 = nn.BatchNorm1d(512)
 60 |         self.bn5 = nn.BatchNorm1d(256)
 61 | 
 62 |     def forward(self, x):
 63 |         # x --> 3 * 3
 64 |         batchsize = x.shape[0]
 65 |         if batchsize > 1:
 66 |             x = F.relu(self.bn1(self.conv1(x)))
 67 |             x = F.relu(self.bn2(self.conv2(x)))
 68 |             x = F.relu(self.bn3(self.conv3(x)))
 69 |             x = self.mp1(x)
 70 |             x = x.view(-1, 1024)
 71 | 
 72 |             x = F.relu(self.bn4(self.fc1(x)))
 73 |             x = F.relu(self.bn5(self.fc2(x)))
 74 |         else:
 75 |             x = F.relu(self.conv1(x))
 76 |             x = F.relu(self.conv2(x))
 77 |             x = F.relu(self.conv3(x))
 78 |             x = self.mp1(x)
 79 |             x = x.view(-1, 1024)
 80 | 
 81 |             x = F.relu(self.fc1(x))
 82 |             x = F.relu(self.fc2(x))
 83 | 
 84 |         x = self.fc3(x)
 85 | 
 86 |         iden = Variable(torch.eye(3)).view(1, -1).repeat(batchsize, 1)
 87 |         if x.is_cuda:
 88 |             device = torch.device('cuda:%d' % x.get_device())
 89 |             iden = iden.to(device=device)
 90 |         x = x + iden
 91 |         x = x.view(-1, 3, 3)
 92 | 
 93 |         return x
 94 | 
 95 | 
 96 | class PointNetfeat(nn.Module):
 97 |     def __init__(self, num_points=2500, global_feat=True):
 98 |         super(PointNetfeat, self).__init__()
 99 |         self.stn = STN3d(num_points=num_points)
100 |         self.conv1 = torch.nn.Conv1d(4, 64, 1)
101 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
102 |         self.conv3 = torch.nn.Conv1d(128, 1024, 1)
103 |         self.bn1 = nn.BatchNorm1d(64)
104 |         self.bn2 = nn.BatchNorm1d(128)
105 |         self.bn3 = nn.BatchNorm1d(1024)
106 |         self.mp1 = torch.nn.MaxPool1d(num_points)
107 |         self.num_points = num_points
108 |         self.global_feat = global_feat
109 | 
110 |     def forward(self, x):
111 |         trans = self.stn(x)
112 |         x = torch.cat([torch.bmm(trans, x[:, :3, :]), x[:, 3, :].unsqueeze(1)], dim=1)
113 | 
114 |         if x.shape[0] > 1:
115 |             x = F.relu(self.bn1(self.conv1(x)))
116 |             pointfeat = x
117 |             x = F.relu(self.bn2(self.conv2(x)))
118 |             x = self.bn3(self.conv3(x))
119 |         else:
120 |             x = F.relu(self.conv1(x))
121 |             pointfeat = x
122 |             x = F.relu(self.conv2(x))
123 |             x = self.conv3(x)
124 | 
125 |         x = self.mp1(x)
126 |         x = x.view(-1, 1024)
127 | 
128 |         if self.global_feat:
129 |             return x, trans
130 |         else:
131 |             x = x.view(-1, 1024, 1).repeat(1, 1, self.num_points)
132 |             return torch.cat([x, pointfeat], 1), trans
133 | 
134 | 
135 | class CNNfeat(nn.Module):
136 |     def __init__(self):
137 |         super(CNNfeat, self).__init__()
138 |         self.conv1 = nn.Sequential(  # 960 * 640
139 |             nn.Conv2d(in_channels=3, out_channels=64, kernel_size=10,
140 |                       stride=3, padding=0),  # 317 * 211
141 |             nn.ReLU(),
142 |         )
143 |         self.conv2 = nn.Sequential(
144 |             nn.Conv2d(64, 64, 5, 2, 0),  # 157 * 104
145 |             nn.ReLU(),
146 |         )
147 |         self.conv3 = nn.Sequential(
148 |             nn.Conv2d(64, 32, 3, 2, 0),  # 78 * 51
149 |             nn.ReLU(),
150 |             # nn.MaxPool2d(kernel_size=2)  # 19 * 25
151 |         )
152 |         self.conv4 = nn.Sequential(
153 |             nn.Conv2d(32, 16, 3, 2, 0),  # 38 * 25
154 |             nn.ReLU(),
155 |         )
156 |         self.out = nn.Sequential(
157 |             nn.Linear(16 * 38 * 25, 4096),
158 |             nn.ReLU(),
159 |             nn.Linear(4096, 2048),
160 |             nn.ReLU(),
161 |             nn.Linear(2048, 1024)
162 |         )
163 |         # self.out = nn.MaxPool2d(kernel_size=(18, 25))
164 | 
165 |         self.apply(weights_init)
166 | 
167 |     def forward(self, x):
168 |         x = self.conv1(x)
169 |         x = self.conv2(x)
170 |         x = self.conv3(x)
171 |         x = self.conv4(x)
172 | 
173 |         x = x.view(x.size(0), -1)
174 |         x = self.out(x)
175 |         x = x.view(-1, 1024)
176 | 
177 |         return x
178 | 
179 | 
180 | class end_layer(nn.Module):
181 |     def __init__(self, in_channels=1024, out_channels=1):
182 |         super(end_layer, self).__init__()
183 |         self.fc1 = nn.Linear(in_channels, 512)
184 |         self.fc2 = nn.Linear(512, 256)
185 |         self.fc3 = nn.Linear(256, out_channels)
186 |         self.bn1 = nn.BatchNorm1d(512)
187 |         self.bn2 = nn.BatchNorm1d(256)
188 | 
189 |         self.apply(weights_init)
190 | 
191 |     def forward(self, x):
192 |         if x.size()[0] == 1:
193 |             x = F.relu(self.fc1(x))
194 |             x = F.relu(self.fc2(x))
195 |         else:
196 |             x = F.relu(self.bn1(self.fc1(x)))
197 |             x = F.relu(self.bn2(self.fc2(x)))
198 |         return self.fc3(x)
199 | 
200 | 
201 | class PointNetActorCritic(nn.Module):
202 |     def __init__(self, num_points=2500, num_actions=5):
203 |         super(PointNetActorCritic, self).__init__()
204 |         self.num_points = num_points
205 |         self.feat = PointNetfeat(num_points, global_feat=True)
206 | 
207 |         self.lstm = nn.LSTMCell(1024, 1024)
208 | 
209 |         self.critic_linear = end_layer(in_channels=1024, out_channels=1)
210 |         self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions)
211 | 
212 |         self.apply(weights_init)
213 |         self.train()
214 | 
215 |     def forward(self, inputs):
216 |         x, (hx, cx) = inputs
217 |         x, _ = self.feat(x)
218 |         hx, cx = self.lstm(x, (hx, cx))
219 |         x = hx
220 | 
221 |         return self.critic_linear(x), self.actor_linear(x), (hx, cx)
222 | 
223 | 
224 | class PointNetGail(nn.Module):
225 |     def __init__(self, num_points=2500, num_actions=5):
226 |         super(PointNetGail, self).__init__()
227 |         self.num_points = num_points
228 |         self.feat = PointNetfeat(num_points, global_feat=True)
229 | 
230 |         self.critic_linear = end_layer(in_channels=1024, out_channels=1)
231 |         self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions)
232 | 
233 |         self.apply(weights_init)
234 |         self.train()
235 | 
236 |     def forward(self, inputs):
237 |         x = inputs
238 |         x, _ = self.feat(x)
239 | 
240 |         return self.critic_linear(x), self.actor_linear(x)
241 | 
242 | 
243 | if __name__ == '__main__':
244 |     sim_data = Variable(torch.rand(10, 4, 2500))
245 | 
246 |     # trans = STN3d()
247 |     # out = trans(sim_data)
248 |     # print('stn', out.size())
249 | 
250 |     # pointfeat = PointNetfeat(global_feat=True)
251 |     # out, _ = pointfeat(sim_data)
252 |     # print('global feat', out.size())
253 | 
254 |     # pointfeat = PointNetfeat(global_feat=False)
255 |     # out, _ = pointfeat(sim_data)
256 |     # print('point feat', out.size())
257 | 
258 |     cls = PointNetGail(num_actions=5)
259 |     v, q= cls(sim_data)
260 |     print(v.shape, q.shape)
261 |     print(v)
262 |     print(q)
263 | 


--------------------------------------------------------------------------------
/viewpoint_optim/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Shin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/viewpoint_optim/README.md:
--------------------------------------------------------------------------------
 1 | # Viewpoint Optimization
 2 | 
 3 | Here is the code for training an agent with RL (reinforcement learning) or IL (imitation learning) to optimize the viewpoint.
 4 | 
 5 | ## Requirements
 6 | 
 7 | * Python 3.x
 8 | 
 9 | * pytorch 0.4.1
10 | 
11 | ## To run
12 | 
13 | ### Training & Evaluation with RL
14 | 
15 | Use PointNet to extract feature:
16 | 
17 |   1. `cd RL_pointnet`
18 |   2. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate] --n_points [number of points feed to PointNet]`
19 | 
20 |   For evaluation:
21 |   `python evaluate.py --data_dir [folder to IORD] --model_dir [folder to save models]`
22 | 
23 | Use CNN to extract feature:
24 | 
25 |   1. `cd RL_CNN`
26 |   2. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate]`
27 | 
28 | ### Training & Evaluation with IL
29 | 
30 | Use PointNet to extract feature:
31 | 
32 |   1. `cd IL`
33 |   2. `python expert_traj.py --model_dir [expert model] --mode pointnet`
34 |   3. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate --n_points [number of points feed to PointNet]`
35 | 
36 |   For evaluation:
37 |   `python evaluate.py --data_dir [folder to IORD] --model_dir [folder to save models]`
38 | 
39 | 
40 | Use CNN to extract feature:
41 | 
42 |   1. `cd il`
43 |   2. `python expert_traj.py --model_dir [expert model] --mode cnn`
44 |   3. `python main.py --data_dir [folder to IORD] --model_dir [folder to save models] --lr [learning rate]`


--------------------------------------------------------------------------------
/viewpoint_optim/RL_CNN/cnn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import os
  4 | import random
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.parallel
  8 | import torch.backends.cudnn as cudnn
  9 | import torch.optim as optim
 10 | import torch.utils.data
 11 | import torchvision.transforms as transforms
 12 | import torchvision.utils as vutils
 13 | from torch.autograd import Variable
 14 | from PIL import Image
 15 | import numpy as np
 16 | import matplotlib.pyplot as plt
 17 | import torch.nn.functional as F
 18 | 
 19 | 
 20 | def weights_init(m):
 21 |     classname = m.__class__.__name__
 22 |     if classname.find('Conv') != -1:
 23 |         weight_shape = list(m.weight.data.size())
 24 |         fan_in = np.prod(weight_shape[1: 4])
 25 |         fan_out = np.prod(weight_shape[2: 4]) * weight_shape[0]
 26 |         w_bound = np.sqrt(6. / (fan_in + fan_out))
 27 |         m.weight.data.uniform_(-w_bound, w_bound)
 28 |         m.bias.data.fill_(0.0)
 29 |     elif classname.find('Linear') != -1:
 30 |         weight_shape = list(m.weight.data.size())
 31 |         fan_in = weight_shape[1]
 32 |         fan_out = weight_shape[0]
 33 |         w_bound = np.sqrt(6.0 / (fan_in + fan_out))
 34 |         m.weight.data.uniform_(-w_bound, w_bound)
 35 |         m.bias.data.fill_(0.0)
 36 |     elif classname.find('BatchNorm') != -1:
 37 |         m.weight.data.fill_(1.0)
 38 |         m.bias.data.fill_(0.0)
 39 |     elif classname.find('LSTMCell') != -1:
 40 |         m.bias_ih.data.fill_(0.0)
 41 |         m.bias_hh.data.fill_(0.0)
 42 | 
 43 | 
 44 | class CNNfeat(nn.Module):
 45 |     def __init__(self):
 46 |         super(CNNfeat, self).__init__()
 47 |         self.conv1 = nn.Sequential( # 960 * 640
 48 |              nn.Conv2d(in_channels=3, out_channels=64, kernel_size=10,
 49 |                        stride=3, padding=0), # 317 * 211
 50 |              nn.ReLU(),
 51 |         )
 52 |         self.conv2 = nn.Sequential(
 53 |              nn.Conv2d(64, 64, 5, 2, 0), # 157 * 104
 54 |              nn.ReLU(),
 55 |         )
 56 |         self.conv3 = nn.Sequential(
 57 |             nn.Conv2d(64, 32, 3, 2, 0),  # 78 * 51
 58 |             nn.ReLU(),
 59 |             # nn.MaxPool2d(kernel_size=2)  # 19 * 25
 60 |         )
 61 |         self.conv4 = nn.Sequential(
 62 |             nn.Conv2d(32, 16, 3, 2, 0),  # 38 * 25
 63 |             nn.ReLU(),
 64 |         )
 65 |         self.out = nn.Sequential(
 66 |             nn.Linear(16 * 38 * 25, 4096),
 67 |             nn.ReLU(),
 68 |             nn.Linear(4096, 2048),
 69 |             nn.ReLU(),
 70 |             nn.Linear(2048, 1024)
 71 |         )
 72 |         # self.out = nn.MaxPool2d(kernel_size=(18, 25))
 73 | 
 74 |         self.apply(weights_init)
 75 |  
 76 |     def forward(self, x):
 77 |         x = self.conv1(x)
 78 |         x = self.conv2(x)
 79 |         x = self.conv3(x)
 80 |         x = self.conv4(x)
 81 | 
 82 |         x = x.view(x.size(0), -1)
 83 |         x = self.out(x)
 84 |         x = x.view(-1, 1024)
 85 | 
 86 |         return x
 87 | 
 88 | 
 89 | class end_layer(nn.Module):
 90 |     def __init__(self, in_channels=1024, out_channels=1):
 91 |         super(end_layer, self).__init__()
 92 |         self.fc1 = nn.Linear(in_channels, 512)
 93 |         self.fc2 = nn.Linear(512, 256)
 94 |         self.fc3 = nn.Linear(256, out_channels)
 95 |         self.bn1 = nn.BatchNorm1d(512)
 96 |         self.bn2 = nn.BatchNorm1d(256)
 97 | 
 98 |         self.apply(weights_init)
 99 | 
100 |     def forward(self, x):
101 |         if x.size()[0] == 1:
102 |             x = F.relu(self.fc1(x))
103 |             x = F.relu(self.fc2(x))
104 |         else:
105 |             x = F.relu(self.bn1(self.fc1(x)))
106 |             x = F.relu(self.bn2(self.fc2(x)))
107 | 
108 |         return self.fc3(x)
109 | 
110 | 
111 | class PointNetActorCritic(nn.Module):
112 |     def __init__(self, num_points=2500, num_actions=5):
113 |         super(PointNetActorCritic, self).__init__()
114 |         self.num_points = num_points
115 |         self.feat = CNNfeat()
116 | 
117 |         self.lstm = nn.LSTMCell(1024, 1024)
118 | 
119 |         self.critic_linear = end_layer(in_channels=1024, out_channels=1)
120 |         self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions)
121 | 
122 |         self.apply(weights_init)
123 |         self.train()
124 | 
125 |     def forward(self, inputs):
126 |         x, (hx, cx) = inputs
127 |         x = self.feat(x)
128 |         hx, cx = self.lstm(x, (hx, cx))
129 |         x = hx
130 | 
131 |         return self.critic_linear(x), self.actor_linear(x), (hx, cx)
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     sim_data = Variable(torch.rand(10, 4, 2500))
136 | 
137 |     # trans = STN3d()
138 |     # out = trans(sim_data)
139 |     # print('stn', out.size())
140 | 
141 |     # pointfeat = PointNetfeat(global_feat=True)
142 |     # out, _ = pointfeat(sim_data)
143 |     # print('global feat', out.size())
144 | 
145 |     # pointfeat = PointNetfeat(global_feat=False)
146 |     # out, _ = pointfeat(sim_data)
147 |     # print('point feat', out.size())
148 | 
149 |     cls = PointNetActorCritic(num_actions=4)
150 |     hx, cx = Variable(torch.zeros(10, 1024)), Variable(torch.zeros(10, 1024))
151 |     if torch.cuda.is_available():
152 |         sim_data = sim_data.cuda()
153 |         cls = cls.cuda()
154 |         hx, cx = hx.cuda(), cx.cuda()
155 |     v, q, (hx ,cx) = cls((sim_data, (hx, cx)))
156 |     print(v.shape, q.shape, hx.shape, cx.shape)
157 |     print(v)
158 |     print(q)


--------------------------------------------------------------------------------
/viewpoint_optim/RL_CNN/environment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | np.set_printoptions(threshold=np.nan)
  3 | import random
  4 | import os
  5 | import sys
  6 | import json
  7 | from scipy.misc import imread
  8 | from PIL import Image
  9 | import argparse
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | from torchvision import transforms
 14 | 
 15 | from segmodel import SegmentationModule
 16 | from utils import load_snapshot
 17 | 
 18 | 
 19 | class MaskToTensor(object):
 20 |     def __call__(self, img):
 21 |         return torch.from_numpy(img.astype(np.int32)).long()
 22 | 
 23 | 
 24 | object_map = {
 25 |     'cube': 1,
 26 |     'stapler': 2,
 27 |     'cup': 3,
 28 |     'orange': 4,
 29 |     'tape': 5,
 30 |     'bowl': 6,
 31 |     'box': 7,
 32 |     'cola': 8,
 33 |     'chip_jar': 9,
 34 |     'juice': 10,
 35 |     'sugar_jar': 11,
 36 |     'spoon': 12,
 37 |     'triangle': 13,
 38 |     'knife': 14,
 39 |     'notebook': 15,
 40 |     'rubik_cube': 16,
 41 |     'laundry_liquid': 17
 42 | }
 43 | inv_cls_map = {str(v): k for k, v in object_map.items()}
 44 | 
 45 | 
 46 | class ActiveAgent():
 47 |     def __init__(self, idx, n_points, seg_args,
 48 |                  mode='sim', root_path='the root path of IORD'):
 49 |         self.root_path = root_path
 50 |         self.scene_path = None
 51 |         self.group_list = [4, 6, 9] + list(range(10, 15)) + list(range(20, 36))
 52 |         self.idx = idx
 53 |         self.logger = open('logs/env_%d.txt' % idx, 'w')
 54 | 
 55 |         self.mode = mode
 56 |         self.n_points = n_points
 57 |         self.n_actions = 5
 58 | 
 59 |         # moving information
 60 |         self.target_object = 0
 61 |         self.coord = [30, 0]
 62 |         self.pre_vis = 0
 63 |         self.end_thres = 0.85
 64 |         self.end_flag = False
 65 | 
 66 |         # camera parameters
 67 |         self.focalLength_x = 615.747
 68 |         self.focalLength_y = 616.041
 69 |         self.centerX = 317.017
 70 |         self.centerY = 241.722
 71 |         self.scalingFactor = 1000.0
 72 | 
 73 |         # load segment model
 74 |         self.args = seg_args
 75 |         print('using mode ', self.mode)
 76 |         if self.mode == 'semantic':
 77 |             model_dict = load_snapshot(self.args.snapshot, self.args.depth_fusion)
 78 |             self.segmodel = SegmentationModule(
 79 |                 model_dict, 256, 18, self.args.depth_fusion,
 80 |                 self.args.vote_mode, self.args.vote_scales
 81 |             )
 82 |             self.segmodel = nn.DataParallel(self.segmodel)
 83 |             self.segmodel.load_state_dict(torch.load(
 84 |                 os.path.join(self.args.model_dir, self.args.depth_fusion, 'epoch_2.pth')
 85 |             ))
 86 |             self.segmodel = self.segmodel.cuda().eval()
 87 | 
 88 |         self.input_transform = transforms.Compose([
 89 |             transforms.ToTensor(),
 90 |             transforms.Normalize(
 91 |                 [0.40384353, 0.45469216, 0.48145765],
 92 |                 [0.20519882, 0.21251819, 0.22867874]
 93 |             )
 94 |         ])
 95 |         self.target_transform = MaskToTensor()
 96 | 
 97 |     def reset(self, min_vis=True, up=3, verbose=False):
 98 |         self.timestep = 0
 99 |         self.path = []
100 | 
101 |         # choose starting point
102 |         self.target_group = 'Group_%d_a' % random.choice(self.group_list)
103 |         self.scene_idx = random.randint(3, 6)
104 |         self.target_scene = sorted(os.listdir(os.path.join(self.root_path, self.target_group)))[self.scene_idx]
105 |         self.scene_path = os.path.join(self.root_path, self.target_group, self.target_scene)
106 |         self.coord = [random.randint(0, 4) * 10 + 30, random.randint(0, 35)]
107 |         self.path.append(self.coord)
108 | 
109 |         # get objects
110 |         gt = self._get_gt()
111 |         objects = np.unique(gt)[1:]
112 | 
113 |         # choose target object according to visibility
114 |         done = False
115 |         self.end_flag = False
116 |         with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d'
117 |                     % tuple(self.coord), 'vis_demo.json'), 'r') as f:
118 |             object_vis_dic = json.loads(f.read())
119 |             vis_order = sorted(object_vis_dic.items(), key=lambda x: x[1])
120 |             if min_vis:
121 |                 target_object_name = vis_order[random.randint(0, min(len(vis_order), 1))][0]
122 |             else:
123 |                 min_objects = [object_map[vis_order[0][0]]] * 2
124 |                 target_object_name = inv_cls_map[str(random.choice(objects.tolist() + min_objects))]
125 |             self.target_object = object_map[target_object_name]
126 |             self.pre_vis = object_vis_dic.get(target_object_name, 0)
127 | 
128 |         if verbose:
129 |             self.logger.write('Agent %d starting at %s, scene %s, coord %s \n' % \
130 |                   (self.idx, self.target_group, self.target_scene, str(self.coord)))
131 |             self.logger.write('Agent %d target object is [%d : %s] \n' % (self.idx, self.target_object, target_object_name))
132 |             self.logger.write('Agent %d the initial visibility is %f \n' % (self.idx, self.pre_vis))
133 |             self.logger.flush()
134 | 
135 |         return self._get_state_from_gt(gt), done
136 | 
137 |     def step(self, action):
138 |         self.timestep += 1
139 | 
140 |         # 1-up 2-down 3-left 4-right 0-finish
141 |         assert action in list(range(self.n_actions))
142 |         invalid_ops = False
143 |         if action == 1:
144 |             if self.coord[0] < 70:
145 |                 self.coord[0] += 10
146 |             else:
147 |                 # self.coord[1] = (self.coord[1] + 18) % 36
148 |                 invalid_ops = True
149 |         elif action == 2:
150 |             if self.coord[0] > 30:
151 |                 self.coord[0] -= 10
152 |             else:
153 |                 invalid_ops = True
154 |         elif action == 3:
155 |             self.coord[1] = (self.coord[1] + 35) % 36
156 |         elif action == 4:
157 |             self.coord[1] = (self.coord[1] + 1) % 36
158 | 
159 |         # if self.coord in self.path:
160 |         #     invalid_ops = True
161 |         # else:
162 |         #     self.path.append(self.coord)
163 | 
164 |         done = False
165 |         with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'vis_demo.json'), 'r') as f:
166 |             vis_dic = json.loads(f.read())
167 |             vis = vis_dic.get(inv_cls_map[str(self.target_object)], 0)
168 | 
169 |         reward = vis - self.pre_vis - int(invalid_ops)
170 |         # reward = 0
171 |         self.pre_vis = vis
172 |         if action == 0:
173 |             done = vis > self.end_thres
174 |             self.end_flag = done
175 |             if done:
176 |                 reward = vis * 0.25
177 |             else:
178 |                 reward = -0.5 - (1 - vis) - 0.05 * (20 - self.timestep)
179 |                 # reward = -1
180 |             done = True
181 | 
182 |         gt = self._get_gt()
183 | 
184 |         if self.timestep >= 20:
185 |             done = True
186 |             # reward -= self.timestep * 0.1
187 |             reward = -1
188 | 
189 |         return self._get_state_from_gt(gt), reward, done
190 | 
191 |     def _get_gt(self):
192 |         if self.mode == 'sim':
193 |             gt = imread(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'direct_mask.png')).astype(np.int8)
194 |         elif self.mode == 'semantic':
195 |             # get img
196 |             img = np.array(Image.open(os.path.join(
197 |                 self.scene_path, 'RGB', '%d_RGB_%d.jpg' % tuple(self.coord)
198 |             )).convert('RGB')).astype(np.float32) / 255.0
199 |             depth = np.load(os.path.join(
200 |                 self.scene_path, 'depth', '%d_depth_%d.npy' % tuple(self.coord)
201 |             ))
202 |             img = self.input_transform(img)
203 |             if self.args.depth_fusion != 'no-depth':
204 |                 depth_trans = transforms.ToTensor()(np.expand_dims(depth.astype(np.float32) / 1000.0, axis=2))
205 |                 depth_trans = depth_trans.cuda()
206 |             else:
207 |                 depth_trans = None
208 | 
209 |             # forward segment model
210 |             with torch.no_grad():
211 |                 probs = self.segmodel(img.unsqueeze(0).cuda(), depth_trans)
212 |             preds = torch.argmax(probs, dim=1).data.cpu().numpy()
213 |             gt = preds.astype(np.int8).squeeze(0)
214 |         elif self.mode == 'instance':
215 |             pass
216 | 
217 |         return gt
218 | 
219 |     def _get_state_from_gt(self, gt, step=3):
220 |         tgt_mask = np.zeros(gt.shape).astype(np.int8)
221 |         tgt_mask[gt == self.target_object] = 1
222 |         tgt_mask = tgt_mask.reshape(gt.shape + (1,))
223 | 
224 |         im_arr = imread(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d.jpg' % tuple(self.coord))).astype(np.float32) / 255.0
225 |         # rgbt = np.concatenate((im_arr, tgt_mask), axis=2).astype(np.float32)
226 | 
227 |         tgt_mask = np.concatenate((tgt_mask, tgt_mask, tgt_mask), axis=2).astype(np.float32)
228 |         rgbt = np.concatenate((im_arr, tgt_mask), axis=0)
229 | 
230 |         return rgbt.transpose(2, 0, 1)
231 | 
232 |     def seed(self, seed):
233 |         random.seed(seed)
234 |         np.random.seed(seed)
235 | 
236 |     def __del__(self):
237 |         self.logger.close()
238 | 
239 | 
240 | if __name__ == '__main__':
241 |     pass


--------------------------------------------------------------------------------
/viewpoint_optim/RL_CNN/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import argparse
  5 | import os
  6 | import sys
  7 | sys.path.append('..')
  8 | import time
  9 | 
 10 | import torch
 11 | import torch.nn.functional as F
 12 | import torch.optim as optim
 13 | from torch.autograd import Variable
 14 | 
 15 | from environment import ActiveAgent
 16 | from cnn import PointNetActorCritic
 17 | from utils import setup_logger
 18 | 
 19 | 
 20 | # Training settings
 21 | parser = argparse.ArgumentParser(description='A2C')
 22 | parser.add_argument('--lr', type=float, default=0.0001,
 23 |                     help='learning rate (default: 0.0001)')
 24 | parser.add_argument('--hidden-size', type=int, default=1024,
 25 |                     help='Hidden size for LSTM')
 26 | parser.add_argument('--gamma', type=float, default=0.99,
 27 |                     help='discount factor for rewards (default: 0.99)')
 28 | parser.add_argument('--tau', type=float, default=1.00,
 29 |                     help='parameter for GAE (default: 1.00)')
 30 | parser.add_argument('--entropy-coef', type=float, default=0.01,
 31 |                     help='entropy term coefficient (default: 0.01)')
 32 | parser.add_argument('--value-loss-coef', type=float, default=0.5,
 33 |                     help='value loss coefficient (default: 0.5)')
 34 | parser.add_argument('--max-grad-norm', type=float, default=20,
 35 |                     help='value loss coefficient (default: 50)')
 36 | parser.add_argument('--seed', type=int, default=456,
 37 |                     help='random seed (default: 1)')
 38 | parser.add_argument('--num-steps', type=int, default=20,
 39 |                     help='number of forward steps in A2C (default: 20)')
 40 | parser.add_argument('--max-episode-length', type=int, default=20,
 41 |                     help='maximum length of an episode (default: 1000000)')
 42 | parser.add_argument('--env-name', default='PointNetActorCritic',
 43 |                     help='environment to train on')
 44 | parser.add_argument('--no-shared', default=False,
 45 |                     help='use an optimizer without shared momentum.')
 46 | parser.add_argument('--n-points', type=int, default=3000,
 47 |                     help='the number of points feed to pointnet')
 48 | parser.add_argument('--log-dir', type=str, default='logs',
 49 |                     help='Folder to save logs')
 50 | parser.add_argument('--model-dir', type=str, default='trained_models',
 51 |                     help='Folder to save models')
 52 | parser.add_argument('--data-dir', type=str, default='data',
 53 |                     help='Folder to IORD')
 54 | parser.add_argument('--resume', default=False,
 55 |                     help='resume latest model or not')
 56 | parser.add_argument('--num-actions', type=int, default=5,
 57 |                     help='discrete action space')
 58 | parser.add_argument('--num-test', type=int, default=20,
 59 |                     help='test time')
 60 | 
 61 | # segmentation settings
 62 | parser.add_argument("--depth-fusion", type=str, default='no-depth',
 63 |                     choices=['no-depth', 'pixel-concat', 'feature-concat'])
 64 | parser.add_argument("--vote-mode", metavar="NAME",
 65 |                     type=str, choices=["plain", "mean", "voting", "max",
 66 |                     "mean+flip", "voting+flip", "max+flip"], default="mean")
 67 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2])
 68 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"],
 69 |                     default="class",
 70 |                     help="How the output files are formatted."
 71 |                          " -- palette: color coded predictions"
 72 |                          " -- raw: gray-scale predictions"
 73 |                          " -- prob: gray-scale predictions plus probabilities")
 74 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load")
 75 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model")
 76 | 
 77 | 
 78 | if __name__ == '__main__':
 79 |     args = parser.parse_args()
 80 |     if not os.path.isdir(args.log_dir):
 81 |         os.makedirs(args.log_dir)
 82 | 
 83 |     torch.manual_seed(args.seed)
 84 |     np.random.seed(args.seed)
 85 | 
 86 |     model = PointNetActorCritic(num_points=args.n_points, num_actions=args.num_actions)
 87 |     model = model.cuda()
 88 |     env = ActiveAgent(idx=0, n_points=args.n_points, 
 89 |         seg_args=args, mode='sim', root_path=args.data_dir)
 90 |     env.seed(args.seed)
 91 | 
 92 |     # resume latest model
 93 |     if args.resume:
 94 |         model_path = os.path.join(args.model_dir, 'latest.pth')
 95 |         if not os.path.isdir(args.model_dir):
 96 |             os.makedirs(args.model_dir)
 97 |         elif os.path.exists(model_path):
 98 |             print('Loading model from %s...' % model_path)
 99 |             model.load_state_dict(torch.load(model_path))
100 | 
101 |     itr = 0
102 |     epoch = 0
103 |     training_time = 50
104 |     train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt'))
105 |     test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt'))
106 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
107 | 
108 |     # test parameters
109 |     all_success_time = 0
110 |     all_time = 0
111 |     ep_success_time = 0
112 |     success_phase = 0.1
113 |     check_flag = False
114 | 
115 |     while True:
116 |         epoch += 1
117 |         ################### training phase ###################
118 |         model = model.train()
119 |         for train_itr in range(training_time):
120 |             training = True
121 |             episode_length = 0
122 | 
123 |             state, _ = env.reset(min_vis=False)
124 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
125 |             if torch.cuda.is_available():
126 |                 state = state.cuda()
127 |             done = True
128 | 
129 |             while training:
130 |                 if done:
131 |                     cx = Variable(torch.zeros(1, args.hidden_size))
132 |                     hx = Variable(torch.zeros(1, args.hidden_size))
133 |                 else:
134 |                     cx = Variable(cx.data)
135 |                     hx = Variable(hx.data)
136 |                 if torch.cuda.is_available():
137 |                     hx = hx.cuda()
138 |                     cx = cx.cuda()
139 | 
140 |                 values = []
141 |                 log_probs = []
142 |                 rewards = []
143 |                 entropies = []
144 | 
145 |                 for step in range(args.num_steps):
146 |                     itr += 1
147 |                     episode_length += 1
148 | 
149 |                     value, logit, (hx, cx) = model((state, (hx, cx)))
150 |                     prob = F.softmax(logit, dim=1)
151 |                     log_prob = F.log_softmax(logit, dim=1)
152 |                     entropy = -(log_prob * prob).sum(1, keepdim=True)
153 |                     entropies.append(entropy)
154 | 
155 |                     action = prob.multinomial(num_samples=1).data.cpu()
156 |                     _action = Variable(action)
157 |                     if torch.cuda.is_available():
158 |                         _action = _action.cuda()
159 |                     log_prob = log_prob.gather(1, _action)
160 | 
161 |                     path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action.numpy())
162 |                     # train_logger.info(path_info)
163 | 
164 |                     state, reward, done = env.step(action.numpy())
165 | 
166 |                     if done:
167 |                         training = False
168 |                         success = env.end_flag
169 |                         log_info = 'Training Step: [%d - %d], Episode length: %d, Reward: %0.2f, Success: %s' \
170 |                                    % (epoch, train_itr, episode_length, sum(rewards) + reward, str(success))
171 |                         train_logger.info(log_info)
172 |                         print(log_info)
173 |                         print(prob.cpu().detach().numpy()[0])
174 |                         episode_length = 0
175 |                         # state, _ = env.reset(up=min(max(itr // 2500, 3), 6))
176 |                         state, _ = env.reset(min_vis=False)
177 | 
178 |                     state = Variable(torch.from_numpy(state).unsqueeze(0))
179 |                     if torch.cuda.is_available():
180 |                         state = state.cuda()
181 |                     values.append(value)
182 |                     log_probs.append(log_prob)
183 |                     rewards.append(reward)
184 | 
185 |                     if done:
186 |                         break
187 | 
188 |                 R = torch.zeros(1, 1)
189 |                 if not done:
190 |                     value, _, _ = model((state, (hx, cx)))
191 |                     R = value.data
192 | 
193 |                 policy_loss = 0
194 |                 value_loss = 0
195 |                 R = Variable(R)
196 |                 gae = torch.zeros(1, 1)
197 |                 if torch.cuda.is_available():
198 |                     R = R.cuda()
199 |                     gae = gae.cuda()
200 |                 values.append(R)
201 |                 for i in reversed(range(len(rewards))):
202 |                     R = args.gamma * R + rewards[i]
203 |                     advantage = R - values[i]
204 |                     value_loss = value_loss + 0.5 * advantage.pow(2)
205 | 
206 |                     # Generalized Advantage Estimataion
207 |                     delta_t = rewards[i] + args.gamma * \
208 |                               values[i + 1].data - values[i].data
209 |                     gae = gae * args.gamma * args.tau + delta_t
210 | 
211 |                     policy_loss = policy_loss - \
212 |                                   log_probs[i] * Variable(gae) - args.entropy_coef * entropies[i]
213 | 
214 |                 optimizer.zero_grad()
215 | 
216 |                 (policy_loss + args.value_loss_coef * value_loss).backward()
217 |                 torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
218 | 
219 |                 optimizer.step()
220 | 
221 | 
222 |         ################### testing phase ###################
223 |         model = model.eval()
224 | 
225 |         state, _ = env.reset(min_vis=False)
226 |         state = Variable(torch.from_numpy(state).unsqueeze(0))
227 |         if torch.cuda.is_available():
228 |             state = state.cuda()
229 |         reward_sum = 0
230 |         done = True
231 | 
232 |         episode_length = 0
233 |         testing = True
234 |         while testing:
235 |             episode_length += 1
236 |             # Sync with the shared model
237 |             if done:
238 |                 with torch.no_grad():
239 |                     cx = torch.zeros(1, args.hidden_size)
240 |                     hx = torch.zeros(1, args.hidden_size)
241 |             else:
242 |                 with torch.no_grad():
243 |                     cx = cx.data
244 |                     hx = hx.data
245 |             if torch.cuda.is_available():
246 |                 hx = hx.cuda()
247 |                 cx = cx.cuda()
248 | 
249 |             with torch.no_grad():
250 |                 value, logit, (hx, cx) = model((state, (hx, cx)))
251 |             prob = F.softmax(logit, dim=1)
252 |             action = prob.max(1, keepdim=True)[1].data.cpu().numpy()
253 | 
254 |             path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0])
255 |             # test_logger.info(path_info)
256 | 
257 |             state, reward, done = env.step(action[0, 0])
258 |             reward_sum += reward
259 | 
260 |             if done:
261 |                 # print('testing: ', all_time)
262 |                 success = env.end_flag
263 |                 all_success_time += success
264 |                 ep_success_time += success
265 |                 all_time += 1
266 |                 if all_time % args.num_test == 0:
267 |                     check_flag = True
268 | 
269 |                 state, _ = env.reset(min_vis=False)
270 |                 time.sleep(0.1)
271 | 
272 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
273 |             if torch.cuda.is_available():
274 |                 state = state.cuda()
275 | 
276 |             if check_flag:
277 |                 all_success_rate = all_success_time / all_time
278 |                 log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.2f, ALL Success: %0.3f' \
279 |                             % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate)
280 |                 test_logger.info(log_info)
281 |                 print(log_info)
282 |                 torch.save(model.state_dict(), os.path.join(args.model_dir, 'latest.pth'))
283 | 
284 |                 # save models in some important phases
285 |                 if all_success_rate > success_phase:
286 |                     torch.save(model.state_dict(),
287 |                                os.path.join(args.model_dir, 'success_rate_%0.2f.pth' % success_phase))
288 |                     success_phase += 0.1
289 | 
290 |                 # save models according to steps
291 |                 if epoch % 20 == 0:
292 |                     torch.save(model.state_dict(),
293 |                                os.path.join(args.model_dir, 'model_%d.pth' % epoch))
294 | 
295 |                 reward_sum = 0
296 |                 episode_length = 0
297 |                 ep_success_time = 0
298 |                 check_flag = False
299 |                 testing = False
300 | 
301 |                 time.sleep(1)


--------------------------------------------------------------------------------
/viewpoint_optim/RL_pointnet/environment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | np.set_printoptions(threshold=np.nan)
  3 | import random
  4 | import os
  5 | import sys
  6 | sys.path.append('..')
  7 | import json
  8 | from scipy.misc import imread
  9 | from PIL import Image
 10 | import argparse
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | from torchvision import transforms
 15 | 
 16 | from segmodel import SegmentationModule
 17 | from utils import load_snapshot
 18 | 
 19 | 
 20 | class MaskToTensor(object):
 21 |     def __call__(self, img):
 22 |         return torch.from_numpy(img.astype(np.int32)).long()
 23 | 
 24 | 
 25 | object_map = {
 26 |     'cube': 1,
 27 |     'stapler': 2,
 28 |     'cup': 3,
 29 |     'orange': 4,
 30 |     'tape': 5,
 31 |     'bowl': 6,
 32 |     'box': 7,
 33 |     'cola': 8,
 34 |     'chip_jar': 9,
 35 |     'juice': 10,
 36 |     'sugar_jar': 11,
 37 |     'spoon': 12,
 38 |     'triangle': 13,
 39 |     'knife': 14,
 40 |     'notebook': 15,
 41 |     'rubik_cube': 16,
 42 |     'laundry_liquid': 17
 43 | }
 44 | inv_cls_map = {str(v): k for k, v in object_map.items()}
 45 | 
 46 | 
 47 | class ActiveAgent():
 48 |     def __init__(self, idx, n_points, seg_args=init_parser(),
 49 |                  mode='sim', root_path='the root path of IORD'):
 50 |         self.root_path = root_path
 51 |         self.scene_path = None
 52 |         self.group_list = [4, 6, 9] + list(range(10, 15)) + list(range(20, 36))
 53 |         self.idx = idx
 54 |         self.logger = open('logs/env_%d.txt' % idx, 'w')
 55 | 
 56 |         self.mode = mode
 57 |         self.n_points = n_points
 58 |         self.n_actions = 5
 59 | 
 60 |         # moving information
 61 |         self.target_object = 0
 62 |         self.coord = [30, 0]
 63 |         self.pre_vis = 0
 64 |         self.end_thres = 0.85
 65 |         self.end_flag = False
 66 | 
 67 |         # camera parameters
 68 |         self.focalLength_x = 615.747
 69 |         self.focalLength_y = 616.041
 70 |         self.centerX = 317.017
 71 |         self.centerY = 241.722
 72 |         self.scalingFactor = 1000.0
 73 | 
 74 |         # load segment model
 75 |         self.args = seg_args
 76 |         print('using mode ', self.mode)
 77 |         if self.mode == 'semantic':
 78 |             model_dict = load_snapshot(self.args.snapshot, self.args.depth_fusion)
 79 |             self.segmodel = SegmentationModule(
 80 |                 model_dict, 256, 18, self.args.depth_fusion,
 81 |                 self.args.vote_mode, self.args.vote_scales
 82 |             )
 83 |             self.segmodel = nn.DataParallel(self.segmodel)
 84 |             self.segmodel.load_state_dict(torch.load(
 85 |                 os.path.join(self.args.model_dir, self.args.depth_fusion, 'epoch_15.pth')
 86 |             ))
 87 |             self.segmodel = self.segmodel.cuda().eval()
 88 | 
 89 |         self.input_transform = transforms.Compose([
 90 |             transforms.ToTensor(),
 91 |             transforms.Normalize(
 92 |                 [0.40384353, 0.45469216, 0.48145765],
 93 |                 [0.20519882, 0.21251819, 0.22867874]
 94 |             )
 95 |         ])
 96 |         self.target_transform = MaskToTensor()
 97 | 
 98 |     def reset(self, min_vis=True, up=3, verbose=False):
 99 |         self.timestep = 0
100 |         self.path = []
101 | 
102 |         # choose starting point
103 |         self.target_group = 'Group_%d_a' % random.choice(self.group_list)
104 |         self.scene_idx = random.randint(3, 6)
105 |         self.target_scene = sorted(os.listdir(os.path.join(self.root_path, self.target_group)))[self.scene_idx]
106 |         self.scene_path = os.path.join(self.root_path, self.target_group, self.target_scene)
107 |         self.coord = [random.randint(0, 4) * 10 + 30, random.randint(0, 35)]
108 |         self.path.append(self.coord)
109 | 
110 |         # get objects
111 |         gt = self._get_gt()
112 |         objects = np.unique(gt)[1:]
113 | 
114 |         # choose target object according to visibility
115 |         done = False
116 |         self.end_flag = False
117 |         with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d'
118 |                     % tuple(self.coord), 'vis_demo.json'), 'r') as f:
119 |             object_vis_dic = json.loads(f.read())
120 |             vis_order = sorted(object_vis_dic.items(), key=lambda x: x[1])
121 |             if min_vis:
122 |                 target_object_name = vis_order[random.randint(0, min(len(vis_order), 1))][0]
123 |             else:
124 |                 min_objects = [object_map[vis_order[0][0]]] * 2
125 |                 target_object_name = inv_cls_map[str(random.choice(objects.tolist() + min_objects))]
126 |             self.target_object = object_map[target_object_name]
127 |             self.pre_vis = object_vis_dic.get(target_object_name, 0)
128 | 
129 |         if verbose:
130 |             self.logger.write('Agent %d starting at %s, scene %s, coord %s \n' % \
131 |                   (self.idx, self.target_group, self.target_scene, str(self.coord)))
132 |             self.logger.write('Agent %d target object is [%d : %s] \n' % (self.idx, self.target_object, target_object_name))
133 |             self.logger.write('Agent %d the initial visibility is %f \n' % (self.idx, self.pre_vis))
134 |             self.logger.flush()
135 | 
136 |         return self._get_pcd_from_gt(gt), done
137 | 
138 |     def step(self, action):
139 |         self.timestep += 1
140 | 
141 |         # 1-up 2-down 3-left 4-right 0-finish
142 |         assert action in list(range(self.n_actions))
143 |         invalid_ops = False
144 |         if action == 1:
145 |             if self.coord[0] < 70:
146 |                 self.coord[0] += 10
147 |             else:
148 |                 # self.coord[1] = (self.coord[1] + 18) % 36
149 |                 invalid_ops = True
150 |         elif action == 2:
151 |             if self.coord[0] > 30:
152 |                 self.coord[0] -= 10
153 |             else:
154 |                 invalid_ops = True
155 |         elif action == 3:
156 |             self.coord[1] = (self.coord[1] + 35) % 36
157 |         elif action == 4:
158 |             self.coord[1] = (self.coord[1] + 1) % 36
159 | 
160 |         # if self.coord in self.path:
161 |         #     invalid_ops = True
162 |         # else:
163 |         #     self.path.append(self.coord)
164 | 
165 |         done = False
166 |         with open(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'vis_demo.json'), 'r') as f:
167 |             vis_dic = json.loads(f.read())
168 |             vis = vis_dic.get(inv_cls_map[str(self.target_object)], 0)
169 | 
170 |         reward = vis - self.pre_vis - int(invalid_ops)
171 |         # reward = 0
172 |         self.pre_vis = vis
173 |         if action == 0:
174 |             done = vis > self.end_thres
175 |             self.end_flag = done
176 |             if done:
177 |                 reward = vis * 0.25
178 |             else:
179 |                 reward = -0.5 - (1 - vis) - 0.05 * (20 - self.timestep)
180 |                 # reward = 0
181 |             done = True
182 | 
183 |         gt = self._get_gt()
184 | 
185 |         if self.timestep >= 20:
186 |             done = True
187 |             # reward -= self.timestep * 0.1
188 |             reward = -1
189 | 
190 |         return self._get_pcd_from_gt(gt), reward, done
191 | 
192 |     def _get_gt(self):
193 |         if self.mode == 'sim':
194 |             gt = imread(os.path.join(self.scene_path, 'RGB', '%d_RGB_%d' % tuple(self.coord), 'direct_mask.png')).astype(np.int8)
195 |         elif self.mode == 'semantic':
196 |             # get img
197 |             img = np.array(Image.open(os.path.join(
198 |                 self.scene_path, 'RGB', '%d_RGB_%d.jpg' % tuple(self.coord)
199 |             )).convert('RGB')).astype(np.float32) / 255.0
200 |             depth = np.load(os.path.join(
201 |                 self.scene_path, 'depth', '%d_depth_%d.npy' % tuple(self.coord)
202 |             ))
203 |             img = self.input_transform(img)
204 |             if self.args.depth_fusion != 'no-depth':
205 |                 depth_trans = transforms.ToTensor()(np.expand_dims(depth.astype(np.float32) / 1000.0, axis=2))
206 |                 depth_trans = depth_trans.unsqueeze(0).cuda()
207 |             else:
208 |                 depth_trans = None
209 | 
210 |             # forward segment model
211 |             with torch.no_grad():
212 |                 probs = self.segmodel(img.unsqueeze(0).cuda(), depth_trans)
213 |             preds = torch.argmax(probs, dim=1).data.cpu().numpy()
214 |             gt = preds.astype(np.int8).squeeze(0)
215 |         elif self.mode == 'instance':
216 |             pass
217 | 
218 |         return gt
219 | 
220 |     def _get_pcd_from_gt(self, gt, step=3):
221 |         tgt_mask = np.ones(gt.shape).astype(np.int8)
222 |         tgt_mask[gt != self.target_object] = -1
223 | 
224 |         d_im = np.load(os.path.join(self.scene_path, 'depth', '%d_depth_%d.npy' % tuple(self.coord)))
225 |         points = []
226 | 
227 |         width = d_im.shape[1]
228 |         height = d_im.shape[0]
229 |         X = np.tile(np.arange(width), (height, 1))
230 |         Y = np.tile(np.arange(height), (width, 1)).T
231 |         Z = d_im / self.scalingFactor
232 |         x = (X - self.centerX) * Z / self.focalLength_x
233 |         y = (Y - self.centerY) * Z / self.focalLength_y
234 | 
235 |         for v in range(0, d_im.shape[1], step):
236 |             for u in range(0, d_im.shape[0], step):
237 |                 if Z[u][v] < 1e-4:
238 |                     continue
239 |                 points.append([x[u][v], y[u][v], Z[u][v], tgt_mask[u][v]])
240 |         points = np.array(points, dtype=np.float32)
241 |         # points[:, 0] = (points[:, 0] - points[:, 0].mean()) / points[:, 0].std()
242 |         # points[:, 1] = (points[:, 1] - points[:, 1].mean()) / points[:, 1].std()
243 |         # points[:, 2] = (points[:, 2] - points[:, 2].mean()) / points[:, 2].std()
244 | 
245 |         points = points[np.random.randint(0, points.shape[0], size=self.n_points), :]
246 | 
247 |         return points.transpose(1, 0)
248 | 
249 |     def seed(self, seed):
250 |         random.seed(seed)
251 |         np.random.seed(seed)
252 | 
253 |     def __del__(self):
254 |         self.logger.close()
255 | 
256 | 
257 | if __name__ == '__main__':
258 |     pass


--------------------------------------------------------------------------------
/viewpoint_optim/RL_pointnet/evaluate.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import argparse
  5 | import os
  6 | import sys
  7 | sys.path.append('..')
  8 | import time
  9 | 
 10 | import torch
 11 | import torch.nn.functional as F
 12 | import torch.optim as optim
 13 | from torch.autograd import Variable
 14 | from torch.utils.cpp_extension import load
 15 | 
 16 | from environment import ActiveAgent
 17 | from pointnet import PointNetActorCritic
 18 | from utils import setup_logger
 19 | 
 20 | 
 21 | # Training settings
 22 | parser = argparse.ArgumentParser(description='A2C')
 23 | parser.add_argument('--lr', type=float, default=0.0001,
 24 |                     help='learning rate (default: 0.0001)')
 25 | parser.add_argument('--hidden-size', type=int, default=1024,
 26 |                     help='Hidden size for LSTM')
 27 | parser.add_argument('--gamma', type=float, default=0.99,
 28 |                     help='discount factor for rewards (default: 0.99)')
 29 | parser.add_argument('--tau', type=float, default=1.00,
 30 |                     help='parameter for GAE (default: 1.00)')
 31 | parser.add_argument('--entropy-coef', type=float, default=0.01,
 32 |                     help='entropy term coefficient (default: 0.01)')
 33 | parser.add_argument('--value-loss-coef', type=float, default=0.5,
 34 |                     help='value loss coefficient (default: 0.5)')
 35 | parser.add_argument('--max-grad-norm', type=float, default=20,
 36 |                     help='value loss coefficient (default: 50)')
 37 | parser.add_argument('--seed', type=int, default=456,
 38 |                     help='random seed (default: 1)')
 39 | parser.add_argument('--num-steps', type=int, default=20,
 40 |                     help='number of forward steps in A2C (default: 20)')
 41 | parser.add_argument('--max-episode-length', type=int, default=50,
 42 |                     help='maximum length of an episode (default: 1000000)')
 43 | parser.add_argument('--env-name', default='PointNetActorCritic',
 44 |                     help='environment to train on')
 45 | parser.add_argument('--no-shared', default=False,
 46 |                     help='use an optimizer without shared momentum.')
 47 | parser.add_argument('--n-points', type=int, default=3000,
 48 |                     help='the number of points feed to pointnet')
 49 | parser.add_argument('--log-dir', type=str, default='logs',
 50 |                     help='Folder to save logs')
 51 | parser.add_argument('--model-dir', type=str, default='trained_models',
 52 |                     help='Folder to save models')
 53 | parser.add_argument('--data-dir', type=str, default='data',
 54 |                     help='Folder to IORD')
 55 | parser.add_argument('--resume', default=True,
 56 |                     help='resume latest model or not')
 57 | parser.add_argument('--num-actions', type=int, default=5,
 58 |                     help='discrete action space')
 59 | parser.add_argument('--num-test', type=int, default=50,
 60 |                     help='test time')
 61 | parser.add_argument('--min', type=bool, default=True,
 62 |                     help='use min-vis or not')
 63 | parser.add_argument('--mode', type=str, default='semantic',
 64 |                     help='vision mode')
 65 | 
 66 | # segmentation settings
 67 | parser.add_argument("--depth-fusion", type=str, default='no-depth',
 68 |                     choices=['no-depth', 'pixel-concat', 'feature-concat'])
 69 | parser.add_argument("--vote-mode", metavar="NAME",
 70 |                     type=str, choices=["plain", "mean", "voting", "max",
 71 |                     "mean+flip", "voting+flip", "max+flip"], default="mean")
 72 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2])
 73 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"],
 74 |                     default="class",
 75 |                     help="How the output files are formatted."
 76 |                          " -- palette: color coded predictions"
 77 |                          " -- raw: gray-scale predictions"
 78 |                          " -- prob: gray-scale predictions plus probabilities")
 79 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load")
 80 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model")
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     args = parser.parse_args()
 85 |     if not os.path.isdir(args.log_dir):
 86 |         os.makedirs(args.log_dir)
 87 | 
 88 |     torch.manual_seed(args.seed)
 89 |     np.random.seed(args.seed)
 90 | 
 91 |     model = PointNetActorCritic(num_points=args.n_points, num_actions=args.num_actions)
 92 |     model = model.cuda()
 93 |     env = ActiveAgent(idx=0, n_points=args.n_points, 
 94 |         seg_args=args, mode='semantic', root_path=args.data_dir)
 95 |     env.seed(args.seed)
 96 | 
 97 |     # resume latest model
 98 |     if args.resume:
 99 |         model_path = os.path.join(args.model_dir, 'latest.pth')
100 |         if not os.path.isdir(args.model_dir):
101 |             os.makedirs(args.model_dir)
102 |         elif os.path.exists(model_path):
103 |             print('Loading model from %s...' % model_path)
104 |             model.load_state_dict(torch.load(model_path))
105 | 
106 |     itr = 0
107 |     epoch = 0
108 |     training_time = 50
109 |     # train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt'))
110 |     # test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt'))
111 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
112 | 
113 |     # test parameters
114 |     all_success_time = 0
115 |     all_time = 0
116 |     ep_success_time = 0
117 |     success_phase = 0.1
118 |     check_flag = False
119 | 
120 |     for _ in range(5):
121 |         epoch += 1
122 |         ################### testing phase ###################
123 |         model = model.eval()
124 | 
125 |         state, _ = env.reset(min_vis=args.min)
126 |         state = Variable(torch.from_numpy(state).unsqueeze(0))
127 |         if torch.cuda.is_available():
128 |             state = state.cuda()
129 |         reward_sum = 0
130 |         done = True
131 | 
132 |         episode_length = 0
133 |         testing = True
134 |         while testing:
135 |             episode_length += 1
136 |             # Sync with the shared model
137 |             if done:
138 |                 with torch.no_grad():
139 |                     cx = torch.zeros(1, args.hidden_size)
140 |                     hx = torch.zeros(1, args.hidden_size)
141 |             else:
142 |                 with torch.no_grad():
143 |                     cx = cx.data
144 |                     hx = hx.data
145 |             if torch.cuda.is_available():
146 |                 hx = hx.cuda()
147 |                 cx = cx.cuda()
148 | 
149 |             with torch.no_grad():
150 |                 value, logit, (hx, cx) = model((state, (hx, cx)))
151 |             prob = F.softmax(logit, dim=1)
152 |             action = prob.max(1, keepdim=True)[1].data.cpu().numpy()
153 | 
154 |             # path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0])
155 |             # test_logger.info(path_info)
156 | 
157 |             state, reward, done = env.step(action[0, 0])
158 |             reward_sum += reward
159 | 
160 |             if done:
161 |                 # print('testing: ', all_time)
162 |                 success = env.end_flag
163 |                 all_success_time += success
164 |                 ep_success_time += success
165 |                 all_time += 1
166 |                 if all_time % args.num_test == 0:
167 |                     check_flag = True
168 | 
169 |                 state, _ = env.reset(min_vis=args.min)
170 |                 time.sleep(0.1)
171 | 
172 |                 print('testing: ', all_time)
173 | 
174 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
175 |             if torch.cuda.is_available():
176 |                 state = state.cuda()
177 | 
178 |             if check_flag:
179 |                 all_success_rate = all_success_time / all_time
180 |                 log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.4f, ALL Success: %0.4f' \
181 |                             % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate)
182 |                 # test_logger.info(log_info)
183 |                 print(log_info)
184 | 
185 |                 reward_sum = 0
186 |                 episode_length = 0
187 |                 ep_success_time = 0
188 |                 check_flag = False
189 |                 testing = False
190 | 
191 |                 time.sleep(1)


--------------------------------------------------------------------------------
/viewpoint_optim/RL_pointnet/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import argparse
  5 | import os
  6 | import sys
  7 | sys.path.append('..')
  8 | import time
  9 | 
 10 | import torch
 11 | import torch.nn.functional as F
 12 | import torch.optim as optim
 13 | from torch.autograd import Variable
 14 | 
 15 | from environment import ActiveAgent
 16 | from pointnet import PointNetActorCritic
 17 | from utils import setup_logger
 18 | 
 19 | 
 20 | # Training settings
 21 | parser = argparse.ArgumentParser(description='A2C')
 22 | parser.add_argument('--lr', type=float, default=0.0001,
 23 |                     help='learning rate (default: 0.0001)')
 24 | parser.add_argument('--hidden-size', type=int, default=1024,
 25 |                     help='Hidden size for LSTM')
 26 | parser.add_argument('--gamma', type=float, default=0.99,
 27 |                     help='discount factor for rewards (default: 0.99)')
 28 | parser.add_argument('--tau', type=float, default=1.00,
 29 |                     help='parameter for GAE (default: 1.00)')
 30 | parser.add_argument('--entropy-coef', type=float, default=0.01,
 31 |                     help='entropy term coefficient (default: 0.01)')
 32 | parser.add_argument('--value-loss-coef', type=float, default=0.5,
 33 |                     help='value loss coefficient (default: 0.5)')
 34 | parser.add_argument('--max-grad-norm', type=float, default=20,
 35 |                     help='value loss coefficient (default: 50)')
 36 | parser.add_argument('--seed', type=int, default=456,
 37 |                     help='random seed (default: 1)')
 38 | parser.add_argument('--num-steps', type=int, default=20,
 39 |                     help='number of forward steps in A2C (default: 20)')
 40 | parser.add_argument('--max-episode-length', type=int, default=50,
 41 |                     help='maximum length of an episode (default: 1000000)')
 42 | parser.add_argument('--env-name', default='PointNetActorCritic',
 43 |                     help='environment to train on')
 44 | parser.add_argument('--no-shared', default=False,
 45 |                     help='use an optimizer without shared momentum.')
 46 | parser.add_argument('--n-points', type=int, default=3000,
 47 |                     help='the number of points feed to pointnet')
 48 | parser.add_argument('--log-dir', type=str, default='logs',
 49 |                     help='Folder to save logs')
 50 | parser.add_argument('--model-dir', type=str, default='trained_models',
 51 |                     help='Folder to save models')
 52 | parser.add_argument('--data-dir', type=str, default='data',
 53 |                     help='Folder to IORD')
 54 | parser.add_argument('--resume', default=False,
 55 |                     help='resume latest model or not')
 56 | parser.add_argument('--num-actions', type=int, default=5,
 57 |                     help='discrete action space')
 58 | parser.add_argument('--num-test', type=int, default=20,
 59 |                     help='test time')
 60 | 
 61 | # segmentation settings
 62 | parser.add_argument("--depth-fusion", type=str, default='no-depth',
 63 |                     choices=['no-depth', 'pixel-concat', 'feature-concat'])
 64 | parser.add_argument("--vote-mode", metavar="NAME",
 65 |                     type=str, choices=["plain", "mean", "voting", "max",
 66 |                     "mean+flip", "voting+flip", "max+flip"], default="mean")
 67 | parser.add_argument("--vote-scales", type=list, default=[0.7, 1.2])
 68 | parser.add_argument("--output-mode", metavar="NAME", type=str, choices=["palette", "raw", "prob"],
 69 |                     default="class",
 70 |                     help="How the output files are formatted."
 71 |                          " -- palette: color coded predictions"
 72 |                          " -- raw: gray-scale predictions"
 73 |                          " -- prob: gray-scale predictions plus probabilities")
 74 | parser.add_argument("--snapshot", metavar="SNAPSHOT_FILE", type=str, default='wide_resnet38_deeplab_vistas.pth.tar', help="Snapshot file to load")
 75 | parser.add_argument("--seg-model-dir", type=str, default="path of segmentation model")
 76 | 
 77 | 
 78 | if __name__ == '__main__':
 79 |     args = parser.parse_args()
 80 |     if not os.path.isdir(args.log_dir):
 81 |         os.makedirs(args.log_dir)
 82 | 
 83 |     torch.manual_seed(args.seed)
 84 |     np.random.seed(args.seed)
 85 | 
 86 |     model = PointNetActorCritic(num_points=args.n_points, num_actions=args.num_actions)
 87 |     model = model.cuda()
 88 |     env = ActiveAgent(idx=0, n_points=args.n_points, 
 89 |         seg_args=args, mode='sim', root_path=args.data_dir)
 90 |     env.seed(args.seed)
 91 | 
 92 |     # resume latest model
 93 |     if args.resume:
 94 |         model_path = os.path.join(args.model_dir, 'latest.pth')
 95 |         if not os.path.isdir(args.model_dir):
 96 |             os.makedirs(args.model_dir)
 97 |         elif os.path.exists(model_path):
 98 |             print('Loading model from %s...' % model_path)
 99 |             model.load_state_dict(torch.load(model_path))
100 | 
101 |     itr = 0
102 |     epoch = 0
103 |     training_time = 50
104 |     train_logger = setup_logger('trainer', os.path.join(args.log_dir, 'trainer_log.txt'))
105 |     test_logger = setup_logger('test', os.path.join(args.log_dir, 'test_log.txt'))
106 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
107 | 
108 |     # test parameters
109 |     all_success_time = 0
110 |     all_time = 0
111 |     ep_success_time = 0
112 |     success_phase = 0.1
113 |     check_flag = False
114 | 
115 |     while True:
116 |         epoch += 1
117 |         ################### training phase ###################
118 |         model = model.train()
119 |         for train_itr in range(training_time):
120 |             training = True
121 |             episode_length = 0
122 | 
123 |             state, _ = env.reset(min_vis=True)
124 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
125 |             if torch.cuda.is_available():
126 |                 state = state.cuda()
127 |             done = True
128 | 
129 |             while training:
130 |                 if done:
131 |                     cx = Variable(torch.zeros(1, args.hidden_size))
132 |                     hx = Variable(torch.zeros(1, args.hidden_size))
133 |                 else:
134 |                     cx = Variable(cx.data)
135 |                     hx = Variable(hx.data)
136 |                 if torch.cuda.is_available():
137 |                     hx = hx.cuda()
138 |                     cx = cx.cuda()
139 | 
140 |                 values = []
141 |                 log_probs = []
142 |                 rewards = []
143 |                 entropies = []
144 | 
145 |                 for step in range(args.num_steps):
146 |                     itr += 1
147 |                     episode_length += 1
148 | 
149 |                     value, logit, (hx, cx) = model((state, (hx, cx)))
150 |                     prob = F.softmax(logit, dim=1)
151 |                     log_prob = F.log_softmax(logit, dim=1)
152 |                     entropy = -(log_prob * prob).sum(1, keepdim=True)
153 |                     entropies.append(entropy)
154 | 
155 |                     action = prob.multinomial(num_samples=1).data.cpu()
156 |                     _action = Variable(action)
157 |                     if torch.cuda.is_available():
158 |                         _action = _action.cuda()
159 |                     log_prob = log_prob.gather(1, _action)
160 | 
161 |                     path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action.numpy())
162 |                     train_logger.info(path_info)
163 | 
164 |                     state, reward, done = env.step(action.numpy())
165 | 
166 |                     if done:
167 |                         training = False
168 |                         success = env.end_flag
169 |                         log_info = 'Training Step: [%d - %d], Episode length: %d, Reward: %0.2f, Success: %s' \
170 |                                    % (epoch, train_itr, episode_length, sum(rewards) + reward, str(success))
171 |                         train_logger.info(log_info)
172 |                         print(log_info)
173 |                         episode_length = 0
174 |                         # state, _ = env.reset(up=min(max(itr // 2500, 3), 6))
175 |                         state, _ = env.reset(min_vis=True)
176 | 
177 |                     state = Variable(torch.from_numpy(state).unsqueeze(0))
178 |                     if torch.cuda.is_available():
179 |                         state = state.cuda()
180 |                     values.append(value)
181 |                     log_probs.append(log_prob)
182 |                     rewards.append(reward)
183 | 
184 |                     if done:
185 |                         break
186 | 
187 |                 R = torch.zeros(1, 1)
188 |                 if not done:
189 |                     value, _, _ = model((state, (hx, cx)))
190 |                     R = value.data
191 | 
192 |                 policy_loss = 0
193 |                 value_loss = 0
194 |                 R = Variable(R)
195 |                 gae = torch.zeros(1, 1)
196 |                 if torch.cuda.is_available():
197 |                     R = R.cuda()
198 |                     gae = gae.cuda()
199 |                 values.append(R)
200 |                 for i in reversed(range(len(rewards))):
201 |                     R = args.gamma * R + rewards[i]
202 |                     advantage = R - values[i]
203 |                     value_loss = value_loss + 0.5 * advantage.pow(2)
204 | 
205 |                     # Generalized Advantage Estimataion
206 |                     delta_t = rewards[i] + args.gamma * \
207 |                               values[i + 1].data - values[i].data
208 |                     gae = gae * args.gamma * args.tau + delta_t
209 | 
210 |                     policy_loss = policy_loss - \
211 |                                   log_probs[i] * Variable(gae) - args.entropy_coef * entropies[i]
212 | 
213 |                 optimizer.zero_grad()
214 | 
215 |                 (policy_loss + args.value_loss_coef * value_loss).backward()
216 |                 torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
217 | 
218 |                 optimizer.step()
219 | 
220 | 
221 |         ################### testing phase ###################
222 |         model = model.eval()
223 | 
224 |         state, _ = env.reset()
225 |         state = Variable(torch.from_numpy(state).unsqueeze(0))
226 |         if torch.cuda.is_available():
227 |             state = state.cuda()
228 |         reward_sum = 0
229 |         done = True
230 | 
231 |         episode_length = 0
232 |         testing = True
233 |         while testing:
234 |             episode_length += 1
235 |             # Sync with the shared model
236 |             if done:
237 |                 with torch.no_grad():
238 |                     cx = torch.zeros(1, args.hidden_size)
239 |                     hx = torch.zeros(1, args.hidden_size)
240 |             else:
241 |                 with torch.no_grad():
242 |                     cx = cx.data
243 |                     hx = hx.data
244 |             if torch.cuda.is_available():
245 |                 hx = hx.cuda()
246 |                 cx = cx.cuda()
247 | 
248 |             with torch.no_grad():
249 |                 value, logit, (hx, cx) = model((state, (hx, cx)))
250 |             prob = F.softmax(logit, dim=1)
251 |             action = prob.max(1, keepdim=True)[1].data.cpu().numpy()
252 | 
253 |             path_info = '%s %s %s %d' % (env.target_group, env.scene_idx, env.coord, action[0, 0])
254 |             test_logger.info(path_info)
255 | 
256 |             state, reward, done = env.step(action[0, 0])
257 |             reward_sum += reward
258 | 
259 |             if done:
260 |                 # print('testing: ', all_time)
261 |                 success = env.end_flag
262 |                 all_success_time += success
263 |                 ep_success_time += success
264 |                 all_time += 1
265 |                 if all_time % args.num_test == 0:
266 |                     check_flag = True
267 | 
268 |                 state, _ = env.reset()
269 |                 time.sleep(0.1)
270 | 
271 |             state = Variable(torch.from_numpy(state).unsqueeze(0))
272 |             if torch.cuda.is_available():
273 |                 state = state.cuda()
274 | 
275 |             if check_flag:
276 |                 all_success_rate = all_success_time / all_time
277 |                 log_info = 'Num steps: %d, Episode length: %d, Reward: %0.2f, EP Success: %0.2f, ALL Success: %0.3f' \
278 |                             % (itr, episode_length, reward_sum, ep_success_time / args.num_test, all_success_rate)
279 |                 test_logger.info(log_info)
280 |                 print(log_info)
281 |                 torch.save(model.state_dict(), os.path.join(args.model_dir, 'latest.pth'))
282 | 
283 |                 # save models in some important phases
284 |                 if all_success_rate > success_phase:
285 |                     torch.save(model.state_dict(),
286 |                                os.path.join(args.model_dir, 'success_rate_%0.2f.pth' % success_phase))
287 |                     success_phase += 0.1
288 | 
289 |                 # save models according to steps
290 |                 if epoch % 20 == 0:
291 |                     torch.save(model.state_dict(),
292 |                                os.path.join(args.model_dir, 'model_%d.pth' % epoch))
293 | 
294 |                 reward_sum = 0
295 |                 episode_length = 0
296 |                 ep_success_time = 0
297 |                 check_flag = False
298 |                 testing = False
299 | 
300 |                 time.sleep(1)


--------------------------------------------------------------------------------
/viewpoint_optim/RL_pointnet/pointnet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import os
  4 | import random
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.parallel
  8 | import torch.backends.cudnn as cudnn
  9 | import torch.optim as optim
 10 | import torch.utils.data
 11 | import torchvision.transforms as transforms
 12 | import torchvision.utils as vutils
 13 | from torch.autograd import Variable
 14 | from PIL import Image
 15 | import numpy as np
 16 | import matplotlib.pyplot as plt
 17 | import torch.nn.functional as F
 18 | 
 19 | 
 20 | def weights_init(m):
 21 |     classname = m.__class__.__name__
 22 |     if classname.find('Conv') != -1:
 23 |         weight_shape = list(m.weight.data.size())
 24 |         fan_in = np.prod(weight_shape[1: 4])
 25 |         fan_out = np.prod(weight_shape[2: 4]) * weight_shape[0]
 26 |         w_bound = np.sqrt(6. / (fan_in + fan_out))
 27 |         m.weight.data.uniform_(-w_bound, w_bound)
 28 |         m.bias.data.fill_(0.0)
 29 |     elif classname.find('Linear') != -1:
 30 |         weight_shape = list(m.weight.data.size())
 31 |         fan_in = weight_shape[1]
 32 |         fan_out = weight_shape[0]
 33 |         w_bound = np.sqrt(6.0 / (fan_in + fan_out))
 34 |         m.weight.data.uniform_(-w_bound, w_bound)
 35 |         m.bias.data.fill_(0.0)
 36 |     elif classname.find('BatchNorm') != -1:
 37 |         m.weight.data.fill_(1.0)
 38 |         m.bias.data.fill_(0.0)
 39 |     elif classname.find('LSTMCell') != -1:
 40 |         m.bias_ih.data.fill_(0.0)
 41 |         m.bias_hh.data.fill_(0.0)
 42 | 
 43 | 
 44 | class STN3d(nn.Module):
 45 |     def __init__(self, num_points = 2500):
 46 |         super(STN3d, self).__init__()
 47 |         self.num_points = num_points
 48 |         self.conv1 = torch.nn.Conv1d(4, 64, 1)
 49 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
 50 |         self.conv3 = torch.nn.Conv1d(128, 1024, 1)
 51 |         self.mp1 = torch.nn.MaxPool1d(num_points)
 52 |         self.fc1 = nn.Linear(1024, 512)
 53 |         self.fc2 = nn.Linear(512, 256)
 54 |         self.fc3 = nn.Linear(256, 9)
 55 |         self.relu = nn.ReLU()
 56 | 
 57 |         self.bn1 = nn.BatchNorm1d(64)
 58 |         self.bn2 = nn.BatchNorm1d(128)
 59 |         self.bn3 = nn.BatchNorm1d(1024)
 60 |         self.bn4 = nn.BatchNorm1d(512)
 61 |         self.bn5 = nn.BatchNorm1d(256)
 62 | 
 63 |     def forward(self, x):
 64 |         # x --> 3 * 3
 65 |         batchsize = x.shape[0]
 66 |         if batchsize > 1:
 67 |             x = F.relu(self.bn1(self.conv1(x)))
 68 |             x = F.relu(self.bn2(self.conv2(x)))
 69 |             x = F.relu(self.bn3(self.conv3(x)))
 70 |             x = self.mp1(x)
 71 |             x = x.view(-1, 1024)
 72 | 
 73 |             x = F.relu(self.bn4(self.fc1(x)))
 74 |             x = F.relu(self.bn5(self.fc2(x)))
 75 |         else:
 76 |             x = F.relu(self.conv1(x))
 77 |             x = F.relu(self.conv2(x))
 78 |             x = F.relu(self.conv3(x))
 79 |             x = self.mp1(x)
 80 |             x = x.view(-1, 1024)
 81 | 
 82 |             x = F.relu(self.fc1(x))
 83 |             x = F.relu(self.fc2(x))
 84 | 
 85 |         x = self.fc3(x)
 86 | 
 87 |         iden = Variable(torch.eye(3)).view(1, -1).repeat(batchsize, 1)
 88 |         if x.is_cuda:
 89 |             device = torch.device('cuda:%d' % x.get_device())
 90 |             iden = iden.to(device=device)
 91 |         x = x + iden
 92 |         x = x.view(-1, 3, 3)
 93 | 
 94 |         return x
 95 | 
 96 | 
 97 | class PointNetfeat(nn.Module):
 98 |     def __init__(self, num_points=2500, global_feat=True):
 99 |         super(PointNetfeat, self).__init__()
100 |         self.stn = STN3d(num_points=num_points)
101 |         self.conv1 = torch.nn.Conv1d(4, 64, 1)
102 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
103 |         self.conv3 = torch.nn.Conv1d(128, 1024, 1)
104 |         self.bn1 = nn.BatchNorm1d(64)
105 |         self.bn2 = nn.BatchNorm1d(128)
106 |         self.bn3 = nn.BatchNorm1d(1024)
107 |         self.mp1 = torch.nn.MaxPool1d(num_points)
108 |         self.num_points = num_points
109 |         self.global_feat = global_feat
110 | 
111 |     def forward(self, x):
112 |         trans = self.stn(x)
113 |         x = torch.cat([torch.bmm(trans, x[:, :3, :]), x[:, 3, :].unsqueeze(1)], dim=1)
114 | 
115 |         if x.shape[0] > 1:
116 |             x = F.relu(self.bn1(self.conv1(x)))
117 |             pointfeat = x
118 |             x = F.relu(self.bn2(self.conv2(x)))
119 |             x = self.bn3(self.conv3(x))
120 |         else:
121 |             x = F.relu(self.conv1(x))
122 |             pointfeat = x
123 |             x = F.relu(self.conv2(x))
124 |             x = self.conv3(x)
125 | 
126 |         x = self.mp1(x)
127 |         x = x.view(-1, 1024)
128 | 
129 |         if self.global_feat:
130 |             return x, trans
131 |         else:
132 |             x = x.view(-1, 1024, 1).repeat(1, 1, self.num_points)
133 |             return torch.cat([x, pointfeat], 1), trans
134 | 
135 | 
136 | class end_layer(nn.Module):
137 |     def __init__(self, in_channels=1024, out_channels=1):
138 |         super(end_layer, self).__init__()
139 |         self.fc1 = nn.Linear(in_channels, 512)
140 |         self.fc2 = nn.Linear(512, 256)
141 |         self.fc3 = nn.Linear(256, out_channels)
142 |         self.bn1 = nn.BatchNorm1d(512)
143 |         self.bn2 = nn.BatchNorm1d(256)
144 | 
145 |         self.apply(weights_init)
146 | 
147 |     def forward(self, x):
148 |         if x.size()[0] == 1:
149 |             x = F.relu(self.fc1(x))
150 |             x = F.relu(self.fc2(x))
151 |         else:
152 |             x = F.relu(self.bn1(self.fc1(x)))
153 |             x = F.relu(self.bn2(self.fc2(x)))
154 |         return self.fc3(x)
155 | 
156 | 
157 | class PointNetActorCritic(nn.Module):
158 |     def __init__(self, num_points=2500, num_actions=4):
159 |         super(PointNetActorCritic, self).__init__()
160 |         self.num_points = num_points
161 |         self.feat = PointNetfeat(num_points, global_feat=True)
162 | 
163 |         self.lstm = nn.LSTMCell(1024, 1024)
164 | 
165 |         self.critic_linear = end_layer(in_channels=1024, out_channels=1)
166 |         self.actor_linear = end_layer(in_channels=1024, out_channels=num_actions)
167 | 
168 |         self.apply(weights_init)
169 |         self.train()
170 | 
171 |     def forward(self, inputs):
172 |         x, (hx, cx) = inputs
173 |         x, _ = self.feat(x)
174 |         hx, cx = self.lstm(x, (hx, cx))
175 |         x = hx
176 | 
177 |         return self.critic_linear(x), self.actor_linear(x), (hx, cx)
178 | 
179 | 
180 | if __name__ == '__main__':
181 |     sim_data = Variable(torch.rand(10, 4, 2500))
182 | 
183 |     # trans = STN3d()
184 |     # out = trans(sim_data)
185 |     # print('stn', out.size())
186 | 
187 |     # pointfeat = PointNetfeat(global_feat=True)
188 |     # out, _ = pointfeat(sim_data)
189 |     # print('global feat', out.size())
190 | 
191 |     # pointfeat = PointNetfeat(global_feat=False)
192 |     # out, _ = pointfeat(sim_data)
193 |     # print('point feat', out.size())
194 | 
195 |     cls = PointNetActorCritic(num_actions=4)
196 |     hx, cx = Variable(torch.zeros(10, 1024)), Variable(torch.zeros(10, 1024))
197 |     if torch.cuda.is_available():
198 |         sim_data = sim_data.cuda()
199 |         cls = cls.cuda()
200 |         hx, cx = hx.cuda(), cx.cuda()
201 |     v, q, (hx ,cx) = cls((sim_data, (hx, cx)))
202 |     print(v.shape, q.shape, hx.shape, cx.shape)
203 |     print(v)
204 |     print(q)
205 | 


--------------------------------------------------------------------------------
/viewpoint_optim/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxy1997/Transferable-Active-Grasping/a826889bcdc466a59696e7d65f024a6c8237f6ed/viewpoint_optim/__init__.py


--------------------------------------------------------------------------------
/viewpoint_optim/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .bn import ABN, InPlaceABN, InPlaceABNSync
2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
3 | from .misc import GlobalAvgPool2d
4 | from .residual import IdentityResidualBlock
5 | from .dense import DenseModule
6 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/bn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as functional
  4 | 
  5 | try:
  6 |     from queue import Queue
  7 | except ImportError:
  8 |     from Queue import Queue
  9 | 
 10 | from .functions import *
 11 | 
 12 | 
 13 | class ABN(nn.Module):
 14 |     """Activated Batch Normalization
 15 | 
 16 |     This gathers a `BatchNorm2d` and an activation function in a single module
 17 |     """
 18 | 
 19 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
 20 |         """Creates an Activated Batch Normalization module
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         num_features : int
 25 |             Number of feature channels in the input and output.
 26 |         eps : float
 27 |             Small constant to prevent numerical issues.
 28 |         momentum : float
 29 |             Momentum factor applied to compute running statistics as.
 30 |         affine : bool
 31 |             If `True` apply learned scale and shift transformation after normalization.
 32 |         activation : str
 33 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
 34 |         slope : float
 35 |             Negative slope for the `leaky_relu` activation.
 36 |         """
 37 |         super(ABN, self).__init__()
 38 |         self.num_features = num_features
 39 |         self.affine = affine
 40 |         self.eps = eps
 41 |         self.momentum = momentum
 42 |         self.activation = activation
 43 |         self.slope = slope
 44 |         if self.affine:
 45 |             self.weight = nn.Parameter(torch.ones(num_features))
 46 |             self.bias = nn.Parameter(torch.zeros(num_features))
 47 |         else:
 48 |             self.register_parameter('weight', None)
 49 |             self.register_parameter('bias', None)
 50 |         self.register_buffer('running_mean', torch.zeros(num_features))
 51 |         self.register_buffer('running_var', torch.ones(num_features))
 52 |         self.reset_parameters()
 53 | 
 54 |     def reset_parameters(self):
 55 |         nn.init.constant_(self.running_mean, 0)
 56 |         nn.init.constant_(self.running_var, 1)
 57 |         if self.affine:
 58 |             nn.init.constant_(self.weight, 1)
 59 |             nn.init.constant_(self.bias, 0)
 60 | 
 61 |     def forward(self, x):
 62 |         x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
 63 |                                   self.training, self.momentum, self.eps)
 64 | 
 65 |         if self.activation == ACT_RELU:
 66 |             return functional.relu(x, inplace=True)
 67 |         elif self.activation == ACT_LEAKY_RELU:
 68 |             return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
 69 |         elif self.activation == ACT_ELU:
 70 |             return functional.elu(x, inplace=True)
 71 |         else:
 72 |             return x
 73 | 
 74 |     def __repr__(self):
 75 |         rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
 76 |               ' affine={affine}, activation={activation}'
 77 |         if self.activation == "leaky_relu":
 78 |             rep += ', slope={slope})'
 79 |         else:
 80 |             rep += ')'
 81 |         return rep.format(name=self.__class__.__name__, **self.__dict__)
 82 | 
 83 | 
 84 | class InPlaceABN(ABN):
 85 |     """InPlace Activated Batch Normalization"""
 86 | 
 87 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
 88 |         """Creates an InPlace Activated Batch Normalization module
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         num_features : int
 93 |             Number of feature channels in the input and output.
 94 |         eps : float
 95 |             Small constant to prevent numerical issues.
 96 |         momentum : float
 97 |             Momentum factor applied to compute running statistics as.
 98 |         affine : bool
 99 |             If `True` apply learned scale and shift transformation after normalization.
100 |         activation : str
101 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
102 |         slope : float
103 |             Negative slope for the `leaky_relu` activation.
104 |         """
105 |         super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
106 | 
107 |     def forward(self, x):
108 |         return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
109 |                            self.training, self.momentum, self.eps, self.activation, self.slope)
110 | 
111 | 
112 | class InPlaceABNSync(ABN):
113 |     """InPlace Activated Batch Normalization with cross-GPU synchronization
114 | 
115 |     This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`.
116 |     """
117 | 
118 |     def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu",
119 |                  slope=0.01):
120 |         """Creates a synchronized, InPlace Activated Batch Normalization module
121 | 
122 |         Parameters
123 |         ----------
124 |         num_features : int
125 |             Number of feature channels in the input and output.
126 |         devices : list of int or None
127 |             IDs of the GPUs that will run the replicas of this module.
128 |         eps : float
129 |             Small constant to prevent numerical issues.
130 |         momentum : float
131 |             Momentum factor applied to compute running statistics as.
132 |         affine : bool
133 |             If `True` apply learned scale and shift transformation after normalization.
134 |         activation : str
135 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
136 |         slope : float
137 |             Negative slope for the `leaky_relu` activation.
138 |         """
139 |         super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope)
140 |         self.devices = devices if devices else list(range(torch.cuda.device_count()))
141 | 
142 |         # Initialize queues
143 |         self.worker_ids = self.devices[1:]
144 |         self.master_queue = Queue(len(self.worker_ids))
145 |         self.worker_queues = [Queue(1) for _ in self.worker_ids]
146 | 
147 |     def forward(self, x):
148 |         if x.get_device() == self.devices[0]:
149 |             # Master mode
150 |             extra = {
151 |                 "is_master": True,
152 |                 "master_queue": self.master_queue,
153 |                 "worker_queues": self.worker_queues,
154 |                 "worker_ids": self.worker_ids
155 |             }
156 |         else:
157 |             # Worker mode
158 |             extra = {
159 |                 "is_master": False,
160 |                 "master_queue": self.master_queue,
161 |                 "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())]
162 |             }
163 | 
164 |         return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
165 |                                 extra, self.training, self.momentum, self.eps, self.activation, self.slope)
166 | 
167 |     def __repr__(self):
168 |         rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
169 |               ' affine={affine}, devices={devices}, activation={activation}'
170 |         if self.activation == "leaky_relu":
171 |             rep += ', slope={slope})'
172 |         else:
173 |             rep += ')'
174 |         return rep.format(name=self.__class__.__name__, **self.__dict__)
175 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/deeplab.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as functional
 4 | 
 5 | from models._util import try_index
 6 | from .bn import ABN
 7 | 
 8 | 
 9 | class DeeplabV3(nn.Module):
10 |     def __init__(self,
11 |                  in_channels,
12 |                  out_channels,
13 |                  hidden_channels=256,
14 |                  dilations=(12, 24, 36),
15 |                  norm_act=ABN,
16 |                  pooling_size=None):
17 |         super(DeeplabV3, self).__init__()
18 |         self.pooling_size = pooling_size
19 | 
20 |         self.map_convs = nn.ModuleList([
21 |             nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
22 |             nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
23 |             nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
24 |             nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
25 |         ])
26 |         self.map_bn = norm_act(hidden_channels * 4)
27 | 
28 |         self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
29 |         self.global_pooling_bn = norm_act(hidden_channels)
30 | 
31 |         self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
32 |         self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
33 |         self.red_bn = norm_act(out_channels)
34 | 
35 |         self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
36 | 
37 |     def reset_parameters(self, activation, slope):
38 |         gain = nn.init.calculate_gain(activation, slope)
39 |         for m in self.modules():
40 |             if isinstance(m, nn.Conv2d):
41 |                 nn.init.xavier_normal_(m.weight.data, gain)
42 |                 if hasattr(m, "bias") and m.bias is not None:
43 |                     nn.init.constant_(m.bias, 0)
44 |             elif isinstance(m, ABN):
45 |                 if hasattr(m, "weight") and m.weight is not None:
46 |                     nn.init.constant_(m.weight, 1)
47 |                 if hasattr(m, "bias") and m.bias is not None:
48 |                     nn.init.constant_(m.bias, 0)
49 | 
50 |     def forward(self, x):
51 |         # Map convolutions
52 |         out = torch.cat([m(x) for m in self.map_convs], dim=1)
53 |         out = self.map_bn(out)
54 |         out = self.red_conv(out)
55 | 
56 |         # Global pooling
57 |         pool = self._global_pooling(x)
58 |         pool = self.global_pooling_conv(pool)
59 |         pool = self.global_pooling_bn(pool)
60 |         pool = self.pool_red_conv(pool)
61 |         if self.training or self.pooling_size is None:
62 |             pool = pool.repeat(1, 1, x.size(2), x.size(3))
63 | 
64 |         out += pool
65 |         out = self.red_bn(out)
66 |         return out
67 | 
68 |     def _global_pooling(self, x):
69 |         if self.training or self.pooling_size is None:
70 |             pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
71 |             pool = pool.view(x.size(0), x.size(1), 1, 1)
72 |         else:
73 |             pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
74 |                             min(try_index(self.pooling_size, 1), x.shape[3]))
75 |             padding = (
76 |                 (pooling_size[1] - 1) // 2,
77 |                 (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
78 |                 (pooling_size[0] - 1) // 2,
79 |                 (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
80 |             )
81 | 
82 |             pool = functional.avg_pool2d(x, pooling_size, stride=1)
83 |             pool = functional.pad(pool, pad=padding, mode="replicate")
84 |         return pool
85 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/dense.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from .bn import ABN
 7 | 
 8 | 
 9 | class DenseModule(nn.Module):
10 |     def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
11 |         super(DenseModule, self).__init__()
12 |         self.in_channels = in_channels
13 |         self.growth = growth
14 |         self.layers = layers
15 | 
16 |         self.convs1 = nn.ModuleList()
17 |         self.convs3 = nn.ModuleList()
18 |         for i in range(self.layers):
19 |             self.convs1.append(nn.Sequential(OrderedDict([
20 |                 ("bn", norm_act(in_channels)),
21 |                 ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
22 |             ])))
23 |             self.convs3.append(nn.Sequential(OrderedDict([
24 |                 ("bn", norm_act(self.growth * bottleneck_factor)),
25 |                 ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
26 |                                    dilation=dilation))
27 |             ])))
28 |             in_channels += self.growth
29 | 
30 |     @property
31 |     def out_channels(self):
32 |         return self.in_channels + self.growth * self.layers
33 | 
34 |     def forward(self, x):
35 |         inputs = [x]
36 |         for i in range(self.layers):
37 |             x = torch.cat(inputs, dim=1)
38 |             x = self.convs1[i](x)
39 |             x = self.convs3[i](x)
40 |             inputs += [x]
41 | 
42 |         return torch.cat(inputs, dim=1)
43 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/functions.py:
--------------------------------------------------------------------------------
  1 | from os import path
  2 | 
  3 | import torch.autograd as autograd
  4 | import torch.cuda.comm as comm
  5 | from torch.autograd.function import once_differentiable
  6 | from torch.utils.cpp_extension import load
  7 | 
  8 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src")
  9 | _backend = load(name="inplace_abn",
 10 |                 extra_cflags=["-O3"],
 11 |                 sources=[path.join(_src_path, f) for f in [
 12 |                     "inplace_abn.cpp",
 13 |                     "inplace_abn_cpu.cpp",
 14 |                     "inplace_abn_cuda.cu"
 15 |                 ]],
 16 |                 extra_cuda_cflags=["--expt-extended-lambda"])
 17 | 
 18 | # Activation names
 19 | ACT_RELU = "relu"
 20 | ACT_LEAKY_RELU = "leaky_relu"
 21 | ACT_ELU = "elu"
 22 | ACT_NONE = "none"
 23 | 
 24 | 
 25 | def _check(fn, *args, **kwargs):
 26 |     success = fn(*args, **kwargs)
 27 |     if not success:
 28 |         raise RuntimeError("CUDA Error encountered in {}".format(fn))
 29 | 
 30 | 
 31 | def _broadcast_shape(x):
 32 |     out_size = []
 33 |     for i, s in enumerate(x.size()):
 34 |         if i != 1:
 35 |             out_size.append(1)
 36 |         else:
 37 |             out_size.append(s)
 38 |     return out_size
 39 | 
 40 | 
 41 | def _reduce(x):
 42 |     if len(x.size()) == 2:
 43 |         return x.sum(dim=0)
 44 |     else:
 45 |         n, c = x.size()[0:2]
 46 |         return x.contiguous().view((n, c, -1)).sum(2).sum(0)
 47 | 
 48 | 
 49 | def _count_samples(x):
 50 |     count = 1
 51 |     for i, s in enumerate(x.size()):
 52 |         if i != 1:
 53 |             count *= s
 54 |     return count
 55 | 
 56 | 
 57 | def _act_forward(ctx, x):
 58 |     if ctx.activation == ACT_LEAKY_RELU:
 59 |         _backend.leaky_relu_forward(x, ctx.slope)
 60 |     elif ctx.activation == ACT_ELU:
 61 |         _backend.elu_forward(x)
 62 |     elif ctx.activation == ACT_NONE:
 63 |         pass
 64 | 
 65 | 
 66 | def _act_backward(ctx, x, dx):
 67 |     if ctx.activation == ACT_LEAKY_RELU:
 68 |         _backend.leaky_relu_backward(x, dx, ctx.slope)
 69 |     elif ctx.activation == ACT_ELU:
 70 |         _backend.elu_backward(x, dx)
 71 |     elif ctx.activation == ACT_NONE:
 72 |         pass
 73 | 
 74 | 
 75 | class InPlaceABN(autograd.Function):
 76 |     @staticmethod
 77 |     def forward(ctx, x, weight, bias, running_mean, running_var,
 78 |                 training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
 79 |         # Save context
 80 |         ctx.training = training
 81 |         ctx.momentum = momentum
 82 |         ctx.eps = eps
 83 |         ctx.activation = activation
 84 |         ctx.slope = slope
 85 |         ctx.affine = weight is not None and bias is not None
 86 | 
 87 |         # Prepare inputs
 88 |         count = _count_samples(x)
 89 |         x = x.contiguous()
 90 |         weight = weight.contiguous() if ctx.affine else x.new_empty(0)
 91 |         bias = bias.contiguous() if ctx.affine else x.new_empty(0)
 92 | 
 93 |         if ctx.training:
 94 |             mean, var = _backend.mean_var(x)
 95 | 
 96 |             # Update running stats
 97 |             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
 98 |             running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
 99 | 
100 |             # Mark in-place modified tensors
101 |             ctx.mark_dirty(x, running_mean, running_var)
102 |         else:
103 |             mean, var = running_mean.contiguous(), running_var.contiguous()
104 |             ctx.mark_dirty(x)
105 | 
106 |         # BN forward + activation
107 |         _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
108 |         _act_forward(ctx, x)
109 | 
110 |         # Output
111 |         ctx.var = var
112 |         ctx.save_for_backward(x, var, weight, bias)
113 |         return x
114 | 
115 |     @staticmethod
116 |     @once_differentiable
117 |     def backward(ctx, dz):
118 |         z, var, weight, bias = ctx.saved_tensors
119 |         dz = dz.contiguous()
120 | 
121 |         # Undo activation
122 |         _act_backward(ctx, z, dz)
123 | 
124 |         if ctx.training:
125 |             edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
126 |         else:
127 |             # TODO: implement simplified CUDA backward for inference mode
128 |             edz = dz.new_zeros(dz.size(1))
129 |             eydz = dz.new_zeros(dz.size(1))
130 | 
131 |         dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
132 |         dweight = dweight if ctx.affine else None
133 |         dbias = dbias if ctx.affine else None
134 | 
135 |         return dx, dweight, dbias, None, None, None, None, None, None, None
136 | 
137 | 
138 | class InPlaceABNSync(autograd.Function):
139 |     @classmethod
140 |     def forward(cls, ctx, x, weight, bias, running_mean, running_var,
141 |                 extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
142 |         # Save context
143 |         cls._parse_extra(ctx, extra)
144 |         ctx.training = training
145 |         ctx.momentum = momentum
146 |         ctx.eps = eps
147 |         ctx.activation = activation
148 |         ctx.slope = slope
149 |         ctx.affine = weight is not None and bias is not None
150 | 
151 |         # Prepare inputs
152 |         count = _count_samples(x) * (ctx.master_queue.maxsize + 1)
153 |         x = x.contiguous()
154 |         weight = weight.contiguous() if ctx.affine else x.new_empty(0)
155 |         bias = bias.contiguous() if ctx.affine else x.new_empty(0)
156 | 
157 |         if ctx.training:
158 |             mean, var = _backend.mean_var(x)
159 | 
160 |             if ctx.is_master:
161 |                 means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)]
162 |                 for _ in range(ctx.master_queue.maxsize):
163 |                     mean_w, var_w = ctx.master_queue.get()
164 |                     ctx.master_queue.task_done()
165 |                     means.append(mean_w.unsqueeze(0))
166 |                     vars.append(var_w.unsqueeze(0))
167 | 
168 |                 means = comm.gather(means)
169 |                 vars = comm.gather(vars)
170 | 
171 |                 mean = means.mean(0)
172 |                 var = (vars + (mean - means) ** 2).mean(0)
173 | 
174 |                 tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
175 |                 for ts, queue in zip(tensors[1:], ctx.worker_queues):
176 |                     queue.put(ts)
177 |             else:
178 |                 ctx.master_queue.put((mean, var))
179 |                 mean, var = ctx.worker_queue.get()
180 |                 ctx.worker_queue.task_done()
181 | 
182 |             # Update running stats
183 |             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
184 |             running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
185 | 
186 |             # Mark in-place modified tensors
187 |             ctx.mark_dirty(x, running_mean, running_var)
188 |         else:
189 |             mean, var = running_mean.contiguous(), running_var.contiguous()
190 |             ctx.mark_dirty(x)
191 | 
192 |         # BN forward + activation
193 |         _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
194 |         _act_forward(ctx, x)
195 | 
196 |         # Output
197 |         ctx.var = var
198 |         ctx.save_for_backward(x, var, weight, bias)
199 |         return x
200 | 
201 |     @staticmethod
202 |     @once_differentiable
203 |     def backward(ctx, dz):
204 |         z, var, weight, bias = ctx.saved_tensors
205 |         dz = dz.contiguous()
206 | 
207 |         # Undo activation
208 |         _act_backward(ctx, z, dz)
209 | 
210 |         if ctx.training:
211 |             edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
212 | 
213 |             if ctx.is_master:
214 |                 edzs, eydzs = [edz], [eydz]
215 |                 for _ in range(len(ctx.worker_queues)):
216 |                     edz_w, eydz_w = ctx.master_queue.get()
217 |                     ctx.master_queue.task_done()
218 |                     edzs.append(edz_w)
219 |                     eydzs.append(eydz_w)
220 | 
221 |                 edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
222 |                 eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)
223 | 
224 |                 tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
225 |                 for ts, queue in zip(tensors[1:], ctx.worker_queues):
226 |                     queue.put(ts)
227 |             else:
228 |                 ctx.master_queue.put((edz, eydz))
229 |                 edz, eydz = ctx.worker_queue.get()
230 |                 ctx.worker_queue.task_done()
231 |         else:
232 |             edz = dz.new_zeros(dz.size(1))
233 |             eydz = dz.new_zeros(dz.size(1))
234 | 
235 |         dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
236 |         dweight = dweight if ctx.affine else None
237 |         dbias = dbias if ctx.affine else None
238 | 
239 |         return dx, dweight, dbias, None, None, None, None, None, None, None, None
240 | 
241 |     @staticmethod
242 |     def _parse_extra(ctx, extra):
243 |         ctx.is_master = extra["is_master"]
244 |         if ctx.is_master:
245 |             ctx.master_queue = extra["master_queue"]
246 |             ctx.worker_queues = extra["worker_queues"]
247 |             ctx.worker_ids = extra["worker_ids"]
248 |         else:
249 |             ctx.master_queue = extra["master_queue"]
250 |             ctx.worker_queue = extra["worker_queue"]
251 | 
252 | 
253 | inplace_abn = InPlaceABN.apply
254 | inplace_abn_sync = InPlaceABNSync.apply
255 | 
256 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
257 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/misc.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class GlobalAvgPool2d(nn.Module):
 5 |     def __init__(self):
 6 |         """Global average pooling over the input's spatial dimensions"""
 7 |         super(GlobalAvgPool2d, self).__init__()
 8 | 
 9 |     def forward(self, inputs):
10 |         in_size = inputs.size()
11 |         return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
12 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/residual.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | from .bn import ABN
 6 | 
 7 | 
 8 | class IdentityResidualBlock(nn.Module):
 9 |     def __init__(self,
10 |                  in_channels,
11 |                  channels,
12 |                  stride=1,
13 |                  dilation=1,
14 |                  groups=1,
15 |                  norm_act=ABN,
16 |                  dropout=None):
17 |         """Configurable identity-mapping residual block
18 | 
19 |         Parameters
20 |         ----------
21 |         in_channels : int
22 |             Number of input channels.
23 |         channels : list of int
24 |             Number of channels in the internal feature maps. Can either have two or three elements: if three construct
25 |             a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
26 |             `3 x 3` then `1 x 1` convolutions.
27 |         stride : int
28 |             Stride of the first `3 x 3` convolution
29 |         dilation : int
30 |             Dilation to apply to the `3 x 3` convolutions.
31 |         groups : int
32 |             Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
33 |             bottleneck blocks.
34 |         norm_act : callable
35 |             Function to create normalization / activation Module.
36 |         dropout: callable
37 |             Function to create Dropout Module.
38 |         """
39 |         super(IdentityResidualBlock, self).__init__()
40 | 
41 |         # Check parameters for inconsistencies
42 |         if len(channels) != 2 and len(channels) != 3:
43 |             raise ValueError("channels must contain either two or three values")
44 |         if len(channels) == 2 and groups != 1:
45 |             raise ValueError("groups > 1 are only valid if len(channels) == 3")
46 | 
47 |         is_bottleneck = len(channels) == 3
48 |         need_proj_conv = stride != 1 or in_channels != channels[-1]
49 | 
50 |         self.bn1 = norm_act(in_channels)
51 |         if not is_bottleneck:
52 |             layers = [
53 |                 ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
54 |                                     dilation=dilation)),
55 |                 ("bn2", norm_act(channels[0])),
56 |                 ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
57 |                                     dilation=dilation))
58 |             ]
59 |             if dropout is not None:
60 |                 layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
61 |         else:
62 |             layers = [
63 |                 ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
64 |                 ("bn2", norm_act(channels[0])),
65 |                 ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
66 |                                     groups=groups, dilation=dilation)),
67 |                 ("bn3", norm_act(channels[1])),
68 |                 ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
69 |             ]
70 |             if dropout is not None:
71 |                 layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
72 |         self.convs = nn.Sequential(OrderedDict(layers))
73 | 
74 |         if need_proj_conv:
75 |             self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
76 | 
77 |     def forward(self, x):
78 |         if hasattr(self, "proj_conv"):
79 |             bn1 = self.bn1(x)
80 |             shortcut = self.proj_conv(bn1)
81 |         else:
82 |             shortcut = x.clone()
83 |             bn1 = self.bn1(x)
84 | 
85 |         out = self.convs(bn1)
86 |         out.add_(shortcut)
87 | 
88 |         return out
89 | 


--------------------------------------------------------------------------------
/viewpoint_optim/modules/src/common.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cuda_runtime_api.h>
  4 | 
  5 | /*
  6 |  * General settings
  7 |  */
  8 | const int WARP_SIZE = 32;
  9 | const int MAX_BLOCK_SIZE = 512;
 10 | 
 11 | template<typename T>
 12 | struct Pair {
 13 |   T v1, v2;
 14 |   __device__ Pair() {}
 15 |   __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
 16 |   __device__ Pair(T v) : v1(v), v2(v) {}
 17 |   __device__ Pair(int v) : v1(v), v2(v) {}
 18 |   __device__ Pair &operator+=(const Pair<T> &a) {
 19 |     v1 += a.v1;
 20 |     v2 += a.v2;
 21 |     return *this;
 22 |   }
 23 | };
 24 | 
 25 | /*
 26 |  * Utility functions
 27 |  */
 28 | template <typename T>
 29 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
 30 |                                            unsigned int mask = 0xffffffff) {
 31 | #if CUDART_VERSION >= 9000
 32 |   return __shfl_xor_sync(mask, value, laneMask, width);
 33 | #else
 34 |   return __shfl_xor(value, laneMask, width);
 35 | #endif
 36 | }
 37 | 
 38 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
 39 | 
 40 | static int getNumThreads(int nElem) {
 41 |   int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE};
 42 |   for (int i = 0; i != 5; ++i) {
 43 |     if (nElem <= threadSizes[i]) {
 44 |       return threadSizes[i];
 45 |     }
 46 |   }
 47 |   return MAX_BLOCK_SIZE;
 48 | }
 49 | 
 50 | template<typename T>
 51 | static __device__ __forceinline__ T warpSum(T val) {
 52 | #if __CUDA_ARCH__ >= 300
 53 |   for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
 54 |     val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
 55 |   }
 56 | #else
 57 |   __shared__ T values[MAX_BLOCK_SIZE];
 58 |   values[threadIdx.x] = val;
 59 |   __threadfence_block();
 60 |   const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
 61 |   for (int i = 1; i < WARP_SIZE; i++) {
 62 |     val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
 63 |   }
 64 | #endif
 65 |   return val;
 66 | }
 67 | 
 68 | template<typename T>
 69 | static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
 70 |   value.v1 = warpSum(value.v1);
 71 |   value.v2 = warpSum(value.v2);
 72 |   return value;
 73 | }
 74 | 
 75 | template <typename T, typename Op>
 76 | __device__ T reduce(Op op, int plane, int N, int C, int S) {
 77 |   T sum = (T)0;
 78 |   for (int batch = 0; batch < N; ++batch) {
 79 |     for (int x = threadIdx.x; x < S; x += blockDim.x) {
 80 |       sum += op(batch, plane, x);
 81 |     }
 82 |   }
 83 | 
 84 |   // sum over NumThreads within a warp
 85 |   sum = warpSum(sum);
 86 | 
 87 |   // 'transpose', and reduce within warp again
 88 |   __shared__ T shared[32];
 89 |   __syncthreads();
 90 |   if (threadIdx.x % WARP_SIZE == 0) {
 91 |     shared[threadIdx.x / WARP_SIZE] = sum;
 92 |   }
 93 |   if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
 94 |     // zero out the other entries in shared
 95 |     shared[threadIdx.x] = (T)0;
 96 |   }
 97 |   __syncthreads();
 98 |   if (threadIdx.x / WARP_SIZE == 0) {
 99 |     sum = warpSum(shared[threadIdx.x]);
100 |     if (threadIdx.x == 0) {
101 |       shared[0] = sum;
102 |     }
103 |   }
104 |   __syncthreads();
105 | 
106 |   // Everyone picks it up, should be broadcast into the whole gradInput
107 |   return shared[0];
108 | }


--------------------------------------------------------------------------------
/viewpoint_optim/modules/src/inplace_abn.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "inplace_abn.h"
 6 | 
 7 | std::vector<at::Tensor> mean_var(at::Tensor x) {
 8 |   if (x.is_cuda()) {
 9 |     return mean_var_cuda(x);
10 |   } else {
11 |     return mean_var_cpu(x);
12 |   }
13 | }
14 | 
15 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
16 |                    bool affine, float eps) {
17 |   if (x.is_cuda()) {
18 |     return forward_cuda(x, mean, var, weight, bias, affine, eps);
19 |   } else {
20 |     return forward_cpu(x, mean, var, weight, bias, affine, eps);
21 |   }
22 | }
23 | 
24 | std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
25 |                                  bool affine, float eps) {
26 |   if (z.is_cuda()) {
27 |     return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
28 |   } else {
29 |     return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
30 |   }
31 | }
32 | 
33 | std::vector<at::Tensor> backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
34 |                                  at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
35 |   if (z.is_cuda()) {
36 |     return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
37 |   } else {
38 |     return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
39 |   }
40 | }
41 | 
42 | void leaky_relu_forward(at::Tensor z, float slope) {
43 |   at::leaky_relu_(z, slope);
44 | }
45 | 
46 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
47 |   if (z.is_cuda()) {
48 |     return leaky_relu_backward_cuda(z, dz, slope);
49 |   } else {
50 |     return leaky_relu_backward_cpu(z, dz, slope);
51 |   }
52 | }
53 | 
54 | void elu_forward(at::Tensor z) {
55 |   at::elu_(z);
56 | }
57 | 
58 | void elu_backward(at::Tensor z, at::Tensor dz) {
59 |   if (z.is_cuda()) {
60 |     return elu_backward_cuda(z, dz);
61 |   } else {
62 |     return elu_backward_cpu(z, dz);
63 |   }
64 | }
65 | 
66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
67 |   m.def("mean_var", &mean_var, "Mean and variance computation");
68 |   m.def("forward", &forward, "In-place forward computation");
69 |   m.def("edz_eydz", &edz_eydz, "First part of backward computation");
70 |   m.def("backward", &backward, "Second part of backward computation");
71 |   m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
72 |   m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
73 |   m.def("elu_forward", &elu_forward, "Elu forward computation");
74 |   m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
75 | }


--------------------------------------------------------------------------------
/viewpoint_optim/modules/src/inplace_abn.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | #include <vector>
 6 | 
 7 | std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
 8 | std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
 9 | 
10 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
11 |                        bool affine, float eps);
12 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
13 |                         bool affine, float eps);
14 | 
15 | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
16 |                                      bool affine, float eps);
17 | std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
18 |                                       bool affine, float eps);
19 | 
20 | std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
21 |                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
22 | std::vector<at::Tensor> backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
23 |                                       at::Tensor edz, at::Tensor eydz, bool affine, float eps);
24 | 
25 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
26 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
27 | 
28 | void elu_backward_cpu(at::Tensor z, at::Tensor dz);
29 | void elu_backward_cuda(at::Tensor z, at::Tensor dz);


--------------------------------------------------------------------------------
/viewpoint_optim/modules/src/inplace_abn_cpu.cpp:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "inplace_abn.h"
  6 | 
  7 | at::Tensor reduce_sum(at::Tensor x) {
  8 |   if (x.ndimension() == 2) {
  9 |     return x.sum(0);
 10 |   } else {
 11 |     auto x_view = x.view({x.size(0), x.size(1), -1});
 12 |     return x_view.sum(-1).sum(0);
 13 |   }
 14 | }
 15 | 
 16 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
 17 |   if (x.ndimension() == 2) {
 18 |     return v;
 19 |   } else {
 20 |     std::vector<int64_t> broadcast_size = {1, -1};
 21 |     for (int64_t i = 2; i < x.ndimension(); ++i)
 22 |       broadcast_size.push_back(1);
 23 | 
 24 |     return v.view(broadcast_size);
 25 |   }
 26 | }
 27 | 
 28 | int64_t count(at::Tensor x) {
 29 |   int64_t count = x.size(0);
 30 |   for (int64_t i = 2; i < x.ndimension(); ++i)
 31 |     count *= x.size(i);
 32 | 
 33 |   return count;
 34 | }
 35 | 
 36 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
 37 |   if (affine) {
 38 |     return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
 39 |   } else {
 40 |     return z;
 41 |   }
 42 | }
 43 | 
 44 | std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
 45 |   auto num = count(x);
 46 |   auto mean = reduce_sum(x) / num;
 47 |   auto diff = x - broadcast_to(mean, x);
 48 |   auto var = reduce_sum(diff.pow(2)) / num;
 49 | 
 50 |   return {mean, var};
 51 | }
 52 | 
 53 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 54 |                        bool affine, float eps) {
 55 |   auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
 56 |   auto mul = at::rsqrt(var + eps) * gamma;
 57 | 
 58 |   x.sub_(broadcast_to(mean, x));
 59 |   x.mul_(broadcast_to(mul, x));
 60 |   if (affine) x.add_(broadcast_to(bias, x));
 61 | 
 62 |   return x;
 63 | }
 64 | 
 65 | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 66 |                                      bool affine, float eps) {
 67 |   auto edz = reduce_sum(dz);
 68 |   auto y = invert_affine(z, weight, bias, affine, eps);
 69 |   auto eydz = reduce_sum(y * dz);
 70 | 
 71 |   return {edz, eydz};
 72 | }
 73 | 
 74 | std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 75 |                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
 76 |   auto y = invert_affine(z, weight, bias, affine, eps);
 77 |   auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
 78 | 
 79 |   auto num = count(z);
 80 |   auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
 81 | 
 82 |   auto dweight = at::empty(z.type(), {0});
 83 |   auto dbias = at::empty(z.type(), {0});
 84 |   if (affine) {
 85 |     dweight = eydz * at::sign(weight);
 86 |     dbias = edz;
 87 |   }
 88 | 
 89 |   return {dx, dweight, dbias};
 90 | }
 91 | 
 92 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
 93 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
 94 |     int64_t count = z.numel();
 95 |     auto *_z = z.data<scalar_t>();
 96 |     auto *_dz = dz.data<scalar_t>();
 97 | 
 98 |     for (int64_t i = 0; i < count; ++i) {
 99 |       if (_z[i] < 0) {
100 |         _z[i] *= 1 / slope;
101 |         _dz[i] *= slope;
102 |       }
103 |     }
104 |   }));
105 | }
106 | 
107 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
108 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
109 |     int64_t count = z.numel();
110 |     auto *_z = z.data<scalar_t>();
111 |     auto *_dz = dz.data<scalar_t>();
112 | 
113 |     for (int64_t i = 0; i < count; ++i) {
114 |       if (_z[i] < 0) {
115 |         _z[i] = log1p(_z[i]);
116 |         _dz[i] *= (_z[i] + 1.f);
117 |       }
118 |     }
119 |   }));
120 | }


--------------------------------------------------------------------------------
/viewpoint_optim/modules/src/inplace_abn_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <thrust/device_ptr.h>
  4 | #include <thrust/transform.h>
  5 | 
  6 | #include <vector>
  7 | 
  8 | #include "common.h"
  9 | #include "inplace_abn.h"
 10 | 
 11 | // Checks
 12 | #ifndef AT_CHECK
 13 |   #define AT_CHECK AT_ASSERT
 14 | #endif
 15 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
 16 | #define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
 17 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 18 | 
 19 | // Utilities
 20 | void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
 21 |   num = x.size(0);
 22 |   chn = x.size(1);
 23 |   sp = 1;
 24 |   for (int64_t i = 2; i < x.ndimension(); ++i)
 25 |     sp *= x.size(i);
 26 | }
 27 | 
 28 | // Operations for reduce
 29 | template<typename T>
 30 | struct SumOp {
 31 |   __device__ SumOp(const T *t, int c, int s)
 32 |       : tensor(t), chn(c), sp(s) {}
 33 |   __device__ __forceinline__ T operator()(int batch, int plane, int n) {
 34 |     return tensor[(batch * chn + plane) * sp + n];
 35 |   }
 36 |   const T *tensor;
 37 |   const int chn;
 38 |   const int sp;
 39 | };
 40 | 
 41 | template<typename T>
 42 | struct VarOp {
 43 |   __device__ VarOp(T m, const T *t, int c, int s)
 44 |       : mean(m), tensor(t), chn(c), sp(s) {}
 45 |   __device__ __forceinline__ T operator()(int batch, int plane, int n) {
 46 |     T val = tensor[(batch * chn + plane) * sp + n];
 47 |     return (val - mean) * (val - mean);
 48 |   }
 49 |   const T mean;
 50 |   const T *tensor;
 51 |   const int chn;
 52 |   const int sp;
 53 | };
 54 | 
 55 | template<typename T>
 56 | struct GradOp {
 57 |   __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
 58 |       : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
 59 |   __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
 60 |     T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
 61 |     T _dz = dz[(batch * chn + plane) * sp + n];
 62 |     return Pair<T>(_dz, _y * _dz);
 63 |   }
 64 |   const T weight;
 65 |   const T bias;
 66 |   const T *z;
 67 |   const T *dz;
 68 |   const int chn;
 69 |   const int sp;
 70 | };
 71 | 
 72 | /***********
 73 |  * mean_var
 74 |  ***********/
 75 | 
 76 | template<typename T>
 77 | __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
 78 |   int plane = blockIdx.x;
 79 |   T norm = T(1) / T(num * sp);
 80 | 
 81 |   T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, chn, sp) * norm;
 82 |   __syncthreads();
 83 |   T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, chn, sp) * norm;
 84 | 
 85 |   if (threadIdx.x == 0) {
 86 |     mean[plane] = _mean;
 87 |     var[plane] = _var;
 88 |   }
 89 | }
 90 | 
 91 | std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
 92 |   CHECK_INPUT(x);
 93 | 
 94 |   // Extract dimensions
 95 |   int64_t num, chn, sp;
 96 |   get_dims(x, num, chn, sp);
 97 | 
 98 |   // Prepare output tensors
 99 |   auto mean = at::empty({chn}, x.options());
100 |   auto var = at::empty({chn}, x.options());
101 | 
102 |   // Run kernel
103 |   dim3 blocks(chn);
104 |   dim3 threads(getNumThreads(sp));
105 |   AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
106 |     mean_var_kernel<scalar_t><<<blocks, threads>>>(
107 |         x.data<scalar_t>(),
108 |         mean.data<scalar_t>(),
109 |         var.data<scalar_t>(),
110 |         num, chn, sp);
111 |   }));
112 | 
113 |   return {mean, var};
114 | }
115 | 
116 | /**********
117 |  * forward
118 |  **********/
119 | 
120 | template<typename T>
121 | __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
122 |                                bool affine, float eps, int num, int chn, int sp) {
123 |   int plane = blockIdx.x;
124 | 
125 |   T _mean = mean[plane];
126 |   T _var = var[plane];
127 |   T _weight = affine ? abs(weight[plane]) + eps : T(1);
128 |   T _bias = affine ? bias[plane] : T(0);
129 | 
130 |   T mul = rsqrt(_var + eps) * _weight;
131 | 
132 |   for (int batch = 0; batch < num; ++batch) {
133 |     for (int n = threadIdx.x; n < sp; n += blockDim.x) {
134 |       T _x = x[(batch * chn + plane) * sp + n];
135 |       T _y = (_x - _mean) * mul + _bias;
136 | 
137 |       x[(batch * chn + plane) * sp + n] = _y;
138 |     }
139 |   }
140 | }
141 | 
142 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
143 |                         bool affine, float eps) {
144 |   CHECK_INPUT(x);
145 |   CHECK_INPUT(mean);
146 |   CHECK_INPUT(var);
147 |   CHECK_INPUT(weight);
148 |   CHECK_INPUT(bias);
149 | 
150 |   // Extract dimensions
151 |   int64_t num, chn, sp;
152 |   get_dims(x, num, chn, sp);
153 | 
154 |   // Run kernel
155 |   dim3 blocks(chn);
156 |   dim3 threads(getNumThreads(sp));
157 |   AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
158 |     forward_kernel<scalar_t><<<blocks, threads>>>(
159 |         x.data<scalar_t>(),
160 |         mean.data<scalar_t>(),
161 |         var.data<scalar_t>(),
162 |         weight.data<scalar_t>(),
163 |         bias.data<scalar_t>(),
164 |         affine, eps, num, chn, sp);
165 |   }));
166 | 
167 |   return x;
168 | }
169 | 
170 | /***********
171 |  * edz_eydz
172 |  ***********/
173 | 
174 | template<typename T>
175 | __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
176 |                                 T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
177 |   int plane = blockIdx.x;
178 | 
179 |   T _weight = affine ? abs(weight[plane]) + eps : 1.f;
180 |   T _bias = affine ? bias[plane] : 0.f;
181 | 
182 |   Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, chn, sp);
183 |   __syncthreads();
184 | 
185 |   if (threadIdx.x == 0) {
186 |     edz[plane] = res.v1;
187 |     eydz[plane] = res.v2;
188 |   }
189 | }
190 | 
191 | std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
192 |                                       bool affine, float eps) {
193 |   CHECK_INPUT(z);
194 |   CHECK_INPUT(dz);
195 |   CHECK_INPUT(weight);
196 |   CHECK_INPUT(bias);
197 | 
198 |   // Extract dimensions
199 |   int64_t num, chn, sp;
200 |   get_dims(z, num, chn, sp);
201 | 
202 |   auto edz = at::empty({chn}, z.options());
203 |   auto eydz = at::empty({chn}, z.options());
204 | 
205 |   // Run kernel
206 |   dim3 blocks(chn);
207 |   dim3 threads(getNumThreads(sp));
208 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
209 |     edz_eydz_kernel<scalar_t><<<blocks, threads>>>(
210 |         z.data<scalar_t>(),
211 |         dz.data<scalar_t>(),
212 |         weight.data<scalar_t>(),
213 |         bias.data<scalar_t>(),
214 |         edz.data<scalar_t>(),
215 |         eydz.data<scalar_t>(),
216 |         affine, eps, num, chn, sp);
217 |   }));
218 | 
219 |   return {edz, eydz};
220 | }
221 | 
222 | /***********
223 |  * backward
224 |  ***********/
225 | 
226 | template<typename T>
227 | __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
228 |                                 const T *eydz, T *dx, T *dweight, T *dbias,
229 |                                 bool affine, float eps, int num, int chn, int sp) {
230 |   int plane = blockIdx.x;
231 | 
232 |   T _weight = affine ? abs(weight[plane]) + eps : 1.f;
233 |   T _bias = affine ? bias[plane] : 0.f;
234 |   T _var = var[plane];
235 |   T _edz = edz[plane];
236 |   T _eydz = eydz[plane];
237 | 
238 |   T _mul = _weight * rsqrt(_var + eps);
239 |   T count = T(num * sp);
240 | 
241 |   for (int batch = 0; batch < num; ++batch) {
242 |     for (int n = threadIdx.x; n < sp; n += blockDim.x) {
243 |       T _dz = dz[(batch * chn + plane) * sp + n];
244 |       T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
245 | 
246 |       dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
247 |     }
248 |   }
249 | 
250 |   if (threadIdx.x == 0) {
251 |     if (affine) {
252 |       dweight[plane] = weight[plane] > 0 ? _eydz : -_eydz;
253 |       dbias[plane] = _edz;
254 |     }
255 |   }
256 | }
257 | 
258 | std::vector<at::Tensor> backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
259 |                                       at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
260 |   CHECK_INPUT(z);
261 |   CHECK_INPUT(dz);
262 |   CHECK_INPUT(var);
263 |   CHECK_INPUT(weight);
264 |   CHECK_INPUT(bias);
265 |   CHECK_INPUT(edz);
266 |   CHECK_INPUT(eydz);
267 | 
268 |   // Extract dimensions
269 |   int64_t num, chn, sp;
270 |   get_dims(z, num, chn, sp);
271 | 
272 |   auto dx = at::zeros_like(z);
273 |   auto dweight = at::zeros_like(weight);
274 |   auto dbias = at::zeros_like(bias);
275 | 
276 |   // Run kernel
277 |   dim3 blocks(chn);
278 |   dim3 threads(getNumThreads(sp));
279 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
280 |     backward_kernel<scalar_t><<<blocks, threads>>>(
281 |         z.data<scalar_t>(),
282 |         dz.data<scalar_t>(),
283 |         var.data<scalar_t>(),
284 |         weight.data<scalar_t>(),
285 |         bias.data<scalar_t>(),
286 |         edz.data<scalar_t>(),
287 |         eydz.data<scalar_t>(),
288 |         dx.data<scalar_t>(),
289 |         dweight.data<scalar_t>(),
290 |         dbias.data<scalar_t>(),
291 |         affine, eps, num, chn, sp);
292 |   }));
293 | 
294 |   return {dx, dweight, dbias};
295 | }
296 | 
297 | /**************
298 |  * activations
299 |  **************/
300 | 
301 | template<typename T>
302 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
303 |   // Create thrust pointers
304 |   thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
305 |   thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
306 | 
307 |   thrust::transform_if(th_dz, th_dz + count, th_z, th_dz,
308 |                        [slope] __device__ (const T& dz) { return dz * slope; },
309 |                        [] __device__ (const T& z) { return z < 0; });
310 |   thrust::transform_if(th_z, th_z + count, th_z,
311 |                        [slope] __device__ (const T& z) { return z / slope; },
312 |                        [] __device__ (const T& z) { return z < 0; });
313 | }
314 | 
315 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
316 |   CHECK_INPUT(z);
317 |   CHECK_INPUT(dz);
318 | 
319 |   int64_t count = z.numel();
320 | 
321 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
322 |     leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
323 |   }));
324 | }
325 | 
326 | template<typename T>
327 | inline void elu_backward_impl(T *z, T *dz, int64_t count) {
328 |   // Create thrust pointers
329 |   thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
330 |   thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
331 | 
332 |   thrust::transform_if(th_dz, th_dz + count, th_z, th_z, th_dz,
333 |                        [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
334 |                        [] __device__ (const T& z) { return z < 0; });
335 |   thrust::transform_if(th_z, th_z + count, th_z,
336 |                        [] __device__ (const T& z) { return log1p(z); },
337 |                        [] __device__ (const T& z) { return z < 0; });
338 | }
339 | 
340 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
341 |   CHECK_INPUT(z);
342 |   CHECK_INPUT(dz);
343 | 
344 |   int64_t count = z.numel();
345 | 
346 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
347 |     elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
348 |   }));
349 | }
350 | 


--------------------------------------------------------------------------------
/viewpoint_optim/segmodel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def flip(x, dim):
  7 |     indices = [slice(None)] * x.dim()
  8 |     indices[dim] = torch.arange(x.size(dim) - 1, -1, -1,
  9 |                                 dtype=torch.long, device=x.device)
 10 |     return x[tuple(indices)]
 11 | 
 12 | 
 13 | class SegmentationModule(nn.Module):
 14 |     _IGNORE_INDEX = 255
 15 | 
 16 |     class _MeanFusion:
 17 |         def __init__(self, x, classes):
 18 |             self.buffer = x.new_zeros(x.size(0), classes, x.size(2), x.size(3))
 19 |             self.counter = 0
 20 | 
 21 |         def update(self, sem_logits):
 22 |             probs = F.softmax(sem_logits, dim=1)
 23 |             self.counter += 1
 24 |             self.buffer.add_((probs - self.buffer) / self.counter)
 25 | 
 26 |         def output(self):
 27 |             probs, cls = self.buffer.max(1)
 28 |             return probs, cls
 29 | 
 30 |     class _VotingFusion:
 31 |         def __init__(self, x, classes):
 32 |             self.votes = x.new_zeros(x.size(0), classes, x.size(2), x.size(3))
 33 |             self.probs = x.new_zeros(x.size(0), classes, x.size(2), x.size(3))
 34 | 
 35 |         def update(self, sem_logits):
 36 |             probs = F.softmax(sem_logits, dim=1)
 37 |             probs, cls = probs.max(1, keepdim=True)
 38 | 
 39 |             self.votes.scatter_add_(1, cls, self.votes.new_ones(cls.size()))
 40 |             self.probs.scatter_add_(1, cls, probs)
 41 | 
 42 |         def output(self):
 43 |             cls, idx = self.votes.max(1, keepdim=True)
 44 |             probs = self.probs / self.votes.clamp(min=1)
 45 |             probs = probs.gather(1, idx)
 46 |             return probs.squeeze(1), cls.squeeze(1)
 47 | 
 48 |     class _MaxFusion:
 49 |         def __init__(self, x, _):
 50 |             self.buffer_cls = x.new_zeros(x.size(0), x.size(2), x.size(3), dtype=torch.long)
 51 |             self.buffer_prob = x.new_zeros(x.size(0), x.size(2), x.size(3))
 52 | 
 53 |         def update(self, sem_logits):
 54 |             probs = F.softmax(sem_logits, dim=1)
 55 |             max_prob, max_cls = probs.max(1)
 56 | 
 57 |             replace_idx = max_prob > self.buffer_prob
 58 |             self.buffer_cls[replace_idx] = max_cls[replace_idx]
 59 |             self.buffer_prob[replace_idx] = max_prob[replace_idx]
 60 | 
 61 |         def output(self):
 62 |             return self.buffer_prob, self.buffer_cls
 63 | 
 64 |     def __init__(self, model_dict, head_channels, classes, depth_fusion, vote_mode="plain", vote_scales=[0.7, 1.2]):
 65 |         super(SegmentationModule, self).__init__()
 66 |         self.depth_fusion = depth_fusion
 67 |         self.vote_mode = vote_mode
 68 |         self.vote_scales = vote_scales
 69 |         self.body = model_dict['body']
 70 |         if depth_fusion == 'feature-concat':
 71 |             self.depth_body = model_dict['depth_body']
 72 |         self.head = model_dict['head']
 73 |         self.cls = nn.Conv2d(head_channels, classes, 1)
 74 | 
 75 |         self.classes = classes
 76 |         if not self.train:
 77 |             if "mean" in self.vote_mode:
 78 |                 self.fusion_cls = SegmentationModule._MeanFusion
 79 |             elif "voting" in self.vote_mode:
 80 |                 self.fusion_cls = SegmentationModule._VotingFusion
 81 |             elif "max" in self.vote_mode:
 82 |                 self.fusion_cls = SegmentationModule._MaxFusion
 83 | 
 84 |     def _forward(self, x, depth):
 85 |         img_shape = x.shape[-2:]
 86 |         if self.depth_fusion == 'pixel-concat':
 87 |             x = torch.cat([x, depth], dim=1)
 88 |         x = self.body(x)
 89 |         if self.depth_fusion == 'feature-concat':
 90 |             depth = self.depth_body(depth)
 91 |             x = torch.cat([x, depth], dim=1)
 92 |         x = self.head(x)
 93 |         x = self.cls(x)
 94 |         x = F.interpolate(x, size=img_shape, mode='bilinear', align_corners=True)
 95 |         return x
 96 | 
 97 |     def forward(self, x, depth):
 98 |         if self.train or self.vote_mode == 'plain':
 99 |             return self._forward(x, depth)
100 |         else:
101 |             # Prepare data_dict
102 |             feed_dict = [{"x": x, "depth": depth}]
103 |             feed_scales = [1]
104 |             for scale in self.vote_scales:
105 |                 scaled_size = [round(s * scale) for s in x.shape[-2:]]
106 |                 feed_dict.append(
107 |                     {
108 |                         "x": F.interpolate(x, size=scaled_size, mode="bilinear"),
109 |                         "depth": F.interpolate(depth, size=scaled_size, mode="bilinear")
110 |                     })
111 |                 feed_scales.append(scale)
112 |             if "flip" in self.vote_mode:
113 |                 for i in range(len(feed_scales)):
114 |                     feed_dict.append(
115 |                         {
116 |                             "x": flip(feed_dict[i]["x"], -1),
117 |                              "depth": flip(feed_dict[i]["depth"], -1)
118 |                         })
119 |                     feed_scales.append(-feed_scales[i])
120 | 
121 |             fusion = self.fusion_cls(x, self.classes)
122 |             for i in range(len(feed_scales)):
123 |                 sem_logits = self._forward(x, **feed_dict[i])
124 |                 if feed_scales[i] < 0:
125 |                     sem_logits = flip(sem_logits, -1)
126 |                 if abs(feed_scales[i]) != 1:
127 |                     sem_logits = F.interpolate(sem_logits, size=x.shape[-2:], mode="bilinear")
128 |                 fusion.update(sem_logits)
129 | 
130 |         return fusion.output()
131 | 


--------------------------------------------------------------------------------
/viewpoint_optim/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | from functools import partial
 5 | 
 6 | 
 7 | def load_snapshot(snapshot_file, depth_fusion):
 8 |     """Load a training snapshot"""
 9 |     print("--- Loading model from snapshot")
10 | 
11 |     # Create network
12 |     norm_act = partial(InPlaceABN, activation="leaky_relu", slope=.01)
13 |     model_dict = dict()
14 |     if depth_fusion == 'no-depth':
15 |         model_dict['body'] = models.__dict__["net_wider_resnet38_a2"](
16 |             norm_act=norm_act,
17 |             dilation=(1, 2, 4, 4)
18 |         )
19 |     elif depth_fusion == 'pixel-concat':
20 |         model_dict['body'] = models.__dict__["net_wider_resnet38_a2"](
21 |             norm_act=norm_act,
22 |             dilation=(1, 2, 4, 4),
23 |             channels_in=4
24 |         )
25 |     elif depth_fusion == 'feature-concat':
26 |         model_dict['body'] = models.__dict__["net_wider_resnet38_a2"](
27 |             norm_act=norm_act,
28 |             dilation=(1, 2, 4, 4)
29 |         )
30 |         model_dict['depth_body'] = models.__dict__["net_wider_resnet38_a2"](
31 |             norm_act=norm_act,
32 |             dilation=(1, 2, 4, 4),
33 |             channels_in=1
34 |         )
35 |     if depth_fusion == 'feature-concat':
36 |         model_dict['head'] = DeeplabV3(8192, 256, 256, norm_act=norm_act, pooling_size=(84, 84))
37 |     else:
38 |         model_dict['head'] = DeeplabV3(4096, 256, 256, norm_act=norm_act, pooling_size=(84, 84))
39 | 
40 |     # Load snapshot and recover network state
41 |     data = torch.load(snapshot_file)
42 |     if depth_fusion == 'feature-concat' or depth_fusion == 'no_depth':
43 |         model_dict['body'].load_state_dict(data["state_dict"]["body"])
44 | 
45 |     if depth_fusion == 'pixel-concat' or depth_fusion == 'no_depth':
46 |         model_dict['head'].load_state_dict(data["state_dict"]["head"])
47 | 
48 |     return model_dict
49 | 
50 | 
51 | def setup_logger(logger_name, log_file, level=logging.INFO, verbose=False):
52 |     l = logging.getLogger(logger_name)
53 |     formatter = logging.Formatter('%(asctime)s : %(message)s')
54 |     fileHandler = logging.FileHandler(log_file, mode='w')
55 |     fileHandler.setFormatter(formatter)
56 | 
57 |     l.setLevel(level)
58 |     l.addHandler(fileHandler)
59 | 
60 |     if verbose:
61 |         streamHandler = logging.StreamHandler()
62 |         streamHandler.setFormatter(formatter)
63 |         l.addHandler(streamHandler)
64 |     return l
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     pass


--------------------------------------------------------------------------------