├── __init__.py ├── data ├── __init__.py └── suncg.py ├── demo ├── __init__.py ├── data │ ├── suncg_img.png │ └── suncg_proposals.mat ├── cli_demo.py └── demo_utils.py ├── utils ├── __init__.py ├── setup.py ├── bbox_utils.pyx ├── metrics.py ├── html.py ├── visutil.py ├── make_html.py └── visualizer.py ├── benchmark ├── __init__.py └── suncg │ ├── __init__.py │ ├── pr_plots.py │ ├── sc_plots.py │ └── evaluate_detection.py ├── experiments ├── __init__.py └── suncg │ ├── __init__.py │ ├── layout.py │ ├── voxels.py │ ├── box3d.py │ └── dwr.py ├── nnutils ├── __init__.py ├── loss_utils.py ├── roi_pool_py.py ├── voxel_net.py ├── test_utils.py ├── disp_net.py ├── net_blocks.py ├── train_utils.py └── oc_net.py ├── renderer ├── __init__.py └── utils.py ├── preprocess └── suncg │ ├── globals.m │ ├── matUtils │ ├── volume_params.m │ ├── mkdirOptional.m │ ├── quatDist.m │ ├── getFileNamesFromDirectory.m │ ├── bboxOverlap.m │ ├── get_scene_vox.m │ └── read_wobj_safe.m │ ├── precompute_gt_bboxes.m │ ├── render_node_indices.py │ ├── render_layout_depth.py │ ├── voxelize_objects.py │ ├── precompute_scene_voxels.m │ └── precompute_edge_boxes.m ├── docs ├── requirements.txt ├── evaluation.md ├── suncg_data.md ├── installation.md ├── preprocessing.md └── training.md ├── .gitignore └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nnutils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /renderer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark/suncg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/suncg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/data/suncg_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shubhtuls/factored3d/HEAD/demo/data/suncg_img.png -------------------------------------------------------------------------------- /preprocess/suncg/globals.m: -------------------------------------------------------------------------------- 1 | global suncgDir; 2 | suncgDir = '/data0/shubhtuls/datasets/suncg_pbrs_release'; -------------------------------------------------------------------------------- /demo/data/suncg_proposals.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shubhtuls/factored3d/HEAD/demo/data/suncg_proposals.mat -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # python requirements. 2 | pip>=9.0 3 | jupyter 4 | scipy 5 | numpy 6 | matplotlib 7 | absl-py 8 | cython 9 | visdom 10 | -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/volume_params.m: -------------------------------------------------------------------------------- 1 | voxSize = [128;64;128]; 2 | voxUnit = 0.04; 3 | camK = [517.97,0,320;0,517.97,240;0,0,1]; 4 | im_w = 640; 5 | im_h = 480; -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/mkdirOptional.m: -------------------------------------------------------------------------------- 1 | function [] = mkdirOptional(dirName) 2 | %MKDIROPTIONAL Summary of this function goes here 3 | % Detailed explanation goes here 4 | 5 | if(~exist(dirName,'dir')) 6 | mkdir(dirName) 7 | end 8 | 9 | end 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *ipynb_checkpoints 2 | demo/data 3 | release_files 4 | external 5 | utils/bbox_utils.c 6 | utils/build 7 | utils/bbox_utils.so 8 | renderer/blender/bpy 9 | cachedir 10 | *.ipynb 11 | *.pyc 12 | ipyNb 13 | torchNb 14 | renderer/blender 15 | .timedBar 16 | *Debug* 17 | -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/quatDist.m: -------------------------------------------------------------------------------- 1 | function d = quatDist(q, QJ) 2 | nj = size(QJ,1); 3 | d = zeros(nj,1); 4 | for n = 1:nj 5 | d(n) = qDist(q, QJ(n,:)); 6 | end 7 | end 8 | 9 | function d = qDist(q1,q2) 10 | %disp(q1); 11 | %disp(q2); 12 | r1 = quat2dcm(q1); 13 | r2 = quat2dcm(q2); 14 | r_rel = r1'*r2; 15 | d = norm(logm(r_rel),'fro')/sqrt(2); 16 | end -------------------------------------------------------------------------------- /utils/setup.py: -------------------------------------------------------------------------------- 1 | # Usage: 2 | ''' 3 | python setup.py build_ext --inplace 4 | rm -rf build/ 5 | mv factored3d/utils/bbox_utils.so ./ 6 | rm -rf factored3d/ 7 | ''' 8 | import numpy 9 | from distutils.core import setup 10 | from Cython.Build import cythonize 11 | 12 | setup( 13 | name = "Bbox utils", 14 | ext_modules = cythonize('bbox_utils.pyx'), # accepts a glob pattern 15 | include_dirs=[numpy.get_include()] 16 | ) 17 | -------------------------------------------------------------------------------- /docs/evaluation.md: -------------------------------------------------------------------------------- 1 | # Instructions to evaluate models and baselines 2 | 3 | ### Pre-requisites 4 | Install pcl and pcl-python. Instructions for Ubuntu: 5 | 6 | ``` 7 | # add pcl repo 8 | sudo add-apt-repository ppa:v-launchpad-jochen-sprickerhof-de/pcl -y 9 | sudo apt-get update -y 10 | 11 | # install pcl 12 | sudo apt-get install libpcl-all 13 | 14 | # install python wrapper 15 | cd CODE_ROOT/external 16 | touch __init__.py 17 | git clone git@github.com:s-gupta/python-pcl.git pythonpcl && cd pythonpcl && make 18 | ``` 19 | 20 | Note that the example scripts below are for the validation set. Please modify the arguments and plotting functions to use test set for the final evaluation. 21 | ### Comparing Scene Representations 22 | ``` 23 | # Launch comparison evaluation 24 | # launch jobs from one level above code directory 25 | python -m factored3d.benchmark.suncg.scene_comparison --num_train_epoch=1 --name=dwr_shape_ft --classify_rot --pred_voxels=True --use_context --eval_set=val 26 | 27 | # Plot comparisons 28 | cd CODE_ROOT/benchmark/suncg 29 | python sc_plots.py 30 | ``` 31 | 32 | ### Object Detection with Reconstruction Evaluation 33 | ``` 34 | # Launch detection setting object 3D prediction evaluation 35 | # launch jobs from one level above code directory 36 | python -m factored3d.benchmark.suncg.dwr --num_train_epoch=1 --name=dwr_shape_ft --classify_rot --pred_voxels=True --use_context --eval_set=val 37 | 38 | # Plot precision-recall curves 39 | cd CODE_ROOT/benchmark/suncg 40 | python pr_plots.py 41 | ``` -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/getFileNamesFromDirectory.m: -------------------------------------------------------------------------------- 1 | function [nameStruct] = getFileNamesFromDirectory(dirPath,varargin) 2 | %Returns a cell array of names of all files of a specified format in a 3 | %given directory 4 | % dir is the directory from which image names are required 5 | % varargin can be used to specify mode (path/name) and filetypes to be 6 | % read 7 | % Example Usage - getFileNamesFromDirectory(dir,'mode','path','types',{'.png', '.jpg'}) 8 | % Default mode is 'name' (just returns filenames). Default 'types' is all 9 | % image types 10 | 11 | %% Initializing Variables 12 | nVarargs = length(varargin); 13 | mode = 'name'; % mode can be 'path' or 'name' 14 | types = {'.jpg','.png', '.bmp', '.tiff', '.jpeg'}; % types is a cell array 15 | nameStruct = {}; 16 | 17 | %% processing varargin 18 | if(nVarargs > 0) 19 | for i=1:(nVarargs/2) 20 | if(strcmp(varargin{2*i-1},'mode')) 21 | mode = varargin{2*i}; 22 | end 23 | 24 | if(strcmp(varargin{2*i-1},'types')) 25 | types = varargin{2*i}; 26 | end 27 | end 28 | end 29 | 30 | %% Getting the names of the files 31 | for i = 1:length(types) 32 | t = dir([dirPath,'/*',types{i}]); 33 | if(size(t,1) > 0) 34 | nameStruct = [nameStruct extractfield(t,'name')]; 35 | end 36 | end 37 | 38 | %% adding path if 'mode' == 'path' 39 | if (strcmp(mode,'path')) 40 | for i=1:length(nameStruct) 41 | nameStruct{i} = [dirPath,'/',nameStruct{i}]; 42 | end 43 | end 44 | 45 | end 46 | 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Factoring Shape, Pose, and Layout from the 2D Image of a 3D Scene 2 | 3 | Shubham Tulsiani, Saurabh Gupta, David Fouhey, Alexei A. Efros, Jitendra Malik. 4 | 5 | [Project Page](https://shubhtuls.github.io/factored3d/) 6 | 7 | Note: Also see [3D-RelNet](https://github.com/nileshkulkarni/relative3d) that improves on this work by incporporating relationships among objects. 8 | ![Teaser Image](https://shubhtuls.github.io/factored3d/resources/images/overview.png) 9 | 10 | ## Demo and Pre-trained Models 11 | 12 | Please check out the [interactive notebook](demo/demo.ipynb) which shows reconstructions using the learned models. To run this, you'll first need to follow the [installation instructions](docs/installation.md) to download trained models and some pre-requisites. 13 | 14 | ## Training and Evaluating 15 | To train or evaluate the (trained/downloaded) models, it is first required to [download the SUNCG dataset](docs/suncg_data.md) and [preprocess the data](docs/preprocessing.md). Please see the detailed README files for [Training](docs/training.md) or [Evaluation](docs/evaluation.md) of models for subsequent instructions. 16 | 17 | ### Citation 18 | If you use this code for your research, please consider citing: 19 | ``` 20 | @inProceedings{factored3dTulsiani17, 21 | title={Factoring Shape, Pose, and Layout from the 2D Image of a 3D Scene}, 22 | author = {Shubham Tulsiani 23 | and Saurabh Gupta 24 | and David Fouhey 25 | and Alexei A. Efros 26 | and Jitendra Malik}, 27 | booktitle={Computer Vision and Pattern Regognition (CVPR)}, 28 | year={2018} 29 | } 30 | ``` 31 | -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/bboxOverlap.m: -------------------------------------------------------------------------------- 1 | function [iu i a1 a2] = bboxOverlap(B1, B2) 2 | % function [iu i a1 a2] = bboxOverlap(B1, B2) 3 | % B1 and B2 are N x 4 and M x 4 matrices with values [xmin ymin xmax ymax] quadtruples 4 | % iu is N x M matrix with intersection over union, i is N x M matrix of the intersection 5 | % a1 is the area of boxes in B1 and a2 is the area of boxes in B2. 6 | 7 | % AUTORIGHTS 8 | % --------------------------------------------------------- 9 | % Copyright (c) 2014, Saurabh Gupta 10 | % 11 | % This file is part of the Utils code and is available 12 | % under the terms of the Simplified BSD License provided in 13 | % LICENSE. Please retain this notice and LICENSE if you use 14 | % this file (or any portion of it) in your project. 15 | % --------------------------------------------------------- 16 | if(numel(B1) == 0 && numel(B2) == 0) 17 | a1 = zeros(0,0); a2 = zeros(0,0); i = zeros(0,0); iu = zeros(0,0); 18 | elseif(numel(B1) == 0) 19 | a1 = zeros(0,0); i = zeros(0,size(B2,1)); iu = zeros(0,size(B2,1)); 20 | a2 = (B2(:,3)-B2(:,1)+1).*(B2(:,4)-B2(:,2)+1); 21 | 22 | elseif(numel(B2) == 0) 23 | a2 = zeros(0,0); i = zeros(size(B1,1), 0); iu = zeros(size(B1,1), 0); 24 | a1 = (B1(:,3)-B1(:,1)+1).*(B1(:,4)-B1(:,2)+1); 25 | 26 | else 27 | a1 = (B1(:,3)-B1(:,1)+1).*(B1(:,4)-B1(:,2)+1); 28 | a2 = (B2(:,3)-B2(:,1)+1).*(B2(:,4)-B2(:,2)+1); 29 | 30 | minX = bsxfun(@max, B1(:,1), B2(:,1)'); 31 | minY = bsxfun(@max, B1(:,2), B2(:,2)'); 32 | 33 | maxX = bsxfun(@min, B1(:,3), B2(:,3)'); 34 | maxY = bsxfun(@min, B1(:,4), B2(:,4)'); 35 | 36 | i = max(maxX-minX+1, 0).*max(maxY-minY+1, 0); 37 | iu = i./max(eps, bsxfun(@plus, a1, a2')-i); 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /docs/suncg_data.md: -------------------------------------------------------------------------------- 1 | # Instructions to download SUNCG 2 | 3 | ### SUNCG Dataset 4 | Donwload the [SUNCG dataset](http://suncg.cs.princeton.edu/) and extract the contents to SUNCG_DIR. There should be 5 folders named 'house', 'room', 'object', 'texture' and 'object_vox' in SUNCG_DIR. We now download additional meta-data. 5 | ``` 6 | cd SUNCG_DIR; 7 | 8 | # Download data splits 9 | mkdir splits 10 | cd splits 11 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/suncg_split.pkl 12 | cd .. 13 | 14 | # Download layout data (suncg houses with objects removed) 15 | # we use this data to render the amodal depths 16 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/layout.tar.gz 17 | tar -zxvf layout.tar.gz 18 | mv houseLayout layout 19 | 20 | # Download meta-data 21 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/ModelCategoryMappingEdited.csv 22 | ``` 23 | 24 | ### Physically-based Renderings 25 | To use the [physically-based renderings](http://pbrs.cs.princeton.edu/) provided by Zhang et. al., we need to download the images, associated camera viewpoints and depth images (for training the baseline). 26 | 27 | ``` 28 | cd SUNCG_DIR; 29 | 30 | mkdir zipfiles; cd zipfiles; 31 | 32 | # Download camera viewpoints 33 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/camera_v2.zip 34 | unzip camera_v2.zip -d ../camera 35 | 36 | # Download LDR renderings 37 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/mlt_v2.zip 38 | unzip mlt_v2.zip -d ../renderings_ldr 39 | 40 | # meta-data 41 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/data_goodlist_v2.txt 42 | 43 | 44 | # Download depth images (needed to train the depth baseline) 45 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/depth_v2.zip 46 | unzip depth_v2.zip -d ../renderings_depth 47 | ``` 48 | -------------------------------------------------------------------------------- /utils/bbox_utils.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | """Evaluation matric utils. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | import math 7 | import numpy as np 8 | import torch 9 | from . import transformations 10 | 11 | def volume_iou(pred, gt, thresh): 12 | gt = gt.float().ge(0.5) 13 | pred = pred.float().ge(thresh) 14 | intersection = torch.mul(gt, pred).sum() 15 | union = gt.sum() + pred.sum() - intersection 16 | return intersection/union 17 | 18 | def quat_dist(pred, gt): 19 | rot_pred = transformations.quaternion_matrix(pred.numpy()) 20 | rot_gt = transformations.quaternion_matrix(gt.numpy()) 21 | rot_rel = np.matmul(rot_pred, np.transpose(rot_gt)) 22 | quat_rel = transformations.quaternion_from_matrix(rot_rel, isprecise=True) 23 | angle = math.acos(abs(quat_rel[0]))*360/math.pi 24 | return angle 25 | 26 | def nms(dets, thresh, min_score=0): 27 | ''' 28 | adapted from Fast R-CNN 29 | Copyright (c) 2015 Microsoft 30 | Licensed under The MIT License 31 | Written by Ross Girshick 32 | ''' 33 | 34 | x1 = dets[:, 0] 35 | y1 = dets[:, 1] 36 | x2 = dets[:, 2] 37 | y2 = dets[:, 3] 38 | scores = dets[:, 4] 39 | 40 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 41 | order = scores.argsort()[::-1] 42 | 43 | keep = [] 44 | while order.size > 0: 45 | i = order[0] 46 | if scores[i] < min_score: 47 | break 48 | 49 | keep.append(i) 50 | xx1 = np.maximum(x1[i], x1[order[1:]]) 51 | yy1 = np.maximum(y1[i], y1[order[1:]]) 52 | xx2 = np.minimum(x2[i], x2[order[1:]]) 53 | yy2 = np.minimum(y2[i], y2[order[1:]]) 54 | 55 | w = np.maximum(0.0, xx2 - xx1 + 1) 56 | h = np.maximum(0.0, yy2 - yy1 + 1) 57 | inter = w * h 58 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 59 | 60 | inds = np.where(ovr <= thresh)[0] 61 | order = order[inds + 1] 62 | 63 | return keep -------------------------------------------------------------------------------- /preprocess/suncg/precompute_gt_bboxes.m: -------------------------------------------------------------------------------- 1 | function precompute_gt_bboxes(min_id, max_id) 2 | globals; 3 | suncgDir = suncgDir; % redundancy useful for parfor 4 | addpath(genpath('./matUtils')); 5 | basedir = pwd(); 6 | fileNamesAll = strsplit(fileread(fullfile(suncgDir, 'zipfiles', 'data_goodlist_v2.txt')), '\n'); 7 | 8 | saveDir = fullfile(suncgDir, 'bboxes_node'); 9 | mkdirOptional(saveDir); 10 | 11 | sceneIds = getFileNamesFromDirectory(fullfile(suncgDir, 'camera'),'types',{''}); 12 | sceneIds = sceneIds(3:end); 13 | sceneIds = sort(sceneIds); 14 | if max_id == 0 15 | max_id = length(sceneIds); 16 | end 17 | parfor ix = min_id:max_id 18 | %for ix = min_id:max_id 19 | sceneId = sceneIds{ix}; 20 | nodesBoxesDir = fullfile(saveDir, sceneId); 21 | mkdirOptional(nodesBoxesDir); 22 | imgsAll = getFileNamesFromDirectory(fullfile(suncgDir, 'renderings_node', sceneId),'types',{'.png'}); 23 | 24 | for cameraId=1:length(imgsAll) 25 | if ~ismember(sprintf('%s/%06d', sceneId, cameraId-1), fileNamesAll) 26 | continue 27 | end 28 | img = imread(fullfile(suncgDir, 'renderings_node', sceneId, sprintf('%06d_node.png', cameraId-1))); 29 | ids = unique(img); 30 | nIds = size(ids,1); 31 | bboxes = zeros(nIds,4); 32 | nPixels = zeros(nIds,1); 33 | for o=1:nIds 34 | bboxes(o,:) = mask2bbox(img == ids(o)); 35 | nPixels(o,:) = sum(sum(img == ids(o))); 36 | end 37 | saveFile = fullfile(nodesBoxesDir, sprintf('%06d_bboxes.mat', cameraId-1)); 38 | saveFunc(saveFile, ids, bboxes, nPixels); 39 | end 40 | end 41 | end 42 | 43 | function saveFunc(filename, ids, bboxes, nPixels) 44 | save(filename,'ids', 'bboxes', 'nPixels'); 45 | end 46 | 47 | function bbox = mask2bbox(mask) 48 | [y,x] = find(mask); 49 | bbox = [min(x) min(y) max(x) max(y)]; 50 | end -------------------------------------------------------------------------------- /preprocess/suncg/render_node_indices.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Example usage python render_node_indices.py --min=1 --nc=1 3 | import argparse 4 | import os 5 | import os.path as osp 6 | import threading 7 | 8 | import subprocess 9 | import time 10 | 11 | parser = argparse.ArgumentParser(description='Parse arguments.') 12 | parser.add_argument('--nc', type=int, help='number of cores') 13 | parser.add_argument('--min', type=int, help='min id') 14 | parser.add_argument('--max', type=int, default=0, help='max id') 15 | parser.add_argument('--mesa', type=bool, default=False, help='Use Mesa') 16 | args = parser.parse_args() 17 | 18 | sunCgDir = osp.join('/data0/shubhtuls/datasets', 'suncg_pbrs_release') 19 | toolboxDir = osp.join(sunCgDir, 'toolbox') 20 | execFolder = 'gaps/bin/x86_64' 21 | 22 | modelsAll = [f for f in os.listdir(osp.join(sunCgDir, 'camera'))] 23 | list.sort(modelsAll) 24 | 25 | nCores = args.nc 26 | nMin = args.min 27 | nMax = args.max 28 | if(nMax == 0): 29 | nMax = len(modelsAll) 30 | 31 | class renderingThread(threading.Thread): 32 | def __init__(self, c): 33 | threading.Thread.__init__(self) 34 | self.c = c 35 | 36 | def run(self): 37 | for ix in range(nMin-1, nMax): 38 | if(ix % nCores == self.c): 39 | modelId = modelsAll[ix] 40 | modelDir = osp.join(sunCgDir, 'house', modelId) 41 | saveDir = osp.join(sunCgDir, 'renderings_node', modelId) 42 | camFile = osp.join(sunCgDir, 'camera', modelId, 'room_camera.txt') 43 | 44 | if not os.path.exists(saveDir): 45 | os.makedirs(saveDir) 46 | renderFlags = '-capture_node_images' 47 | if args.mesa: 48 | renderFlags += ' -mesa' 49 | renderCommand = 'cd {}; {}/scn2img house.json {} {} {};'.format(modelDir, osp.join(toolboxDir, execFolder), renderFlags, camFile, saveDir) 50 | 51 | os.system(renderCommand) 52 | 53 | tList = [renderingThread(c) for c in range(nCores)] 54 | 55 | for renderer in tList: 56 | renderer.start() 57 | -------------------------------------------------------------------------------- /preprocess/suncg/render_layout_depth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Example usage python render_layout_depth.py --min=1 --nc=1 3 | import argparse 4 | import os 5 | import os.path as osp 6 | import threading 7 | 8 | import subprocess 9 | import time 10 | 11 | parser = argparse.ArgumentParser(description='Parse arguments.') 12 | parser.add_argument('--nc', type=int, help='number of cores') 13 | parser.add_argument('--min', type=int, help='min id') 14 | parser.add_argument('--max', type=int, default=0, help='max id') 15 | parser.add_argument('--mesa', type=bool, default=False, help='Use Mesa') 16 | args = parser.parse_args() 17 | 18 | sunCgDir = osp.join('/data0/shubhtuls/datasets', 'suncg_pbrs_release') 19 | toolboxDir = osp.join(sunCgDir, 'toolbox') 20 | execFolder = 'gaps/bin/x86_64' 21 | 22 | modelsAll = [f for f in os.listdir(osp.join(sunCgDir, 'camera'))] 23 | list.sort(modelsAll) 24 | 25 | nCores = args.nc 26 | nMin = args.min 27 | nMax = args.max 28 | if(nMax == 0): 29 | nMax = len(modelsAll) 30 | 31 | class renderingThread(threading.Thread): 32 | def __init__(self, c): 33 | threading.Thread.__init__(self) 34 | self.c = c 35 | 36 | def run(self): 37 | for ix in range(nMin-1, nMax): 38 | if(ix % nCores == self.c): 39 | modelId = modelsAll[ix] 40 | modelDir = osp.join(sunCgDir, 'layout', modelId) 41 | saveDir = osp.join(sunCgDir, 'renderings_layout', modelId) 42 | camFile = osp.join(sunCgDir, 'camera', modelId, 'room_camera.txt') 43 | 44 | if not os.path.exists(saveDir): 45 | os.makedirs(saveDir) 46 | renderFlags = '-capture_depth_images' 47 | if args.mesa: 48 | renderFlags += ' -mesa' 49 | 50 | renderCommand = 'cd {}; {}/scn2img layout.json {} {} {};'.format(modelDir, osp.join(toolboxDir, execFolder), renderFlags, camFile, saveDir) 51 | 52 | os.system(renderCommand) 53 | 54 | tList = [renderingThread(c) for c in range(nCores)] 55 | 56 | for renderer in tList: 57 | renderer.start() 58 | -------------------------------------------------------------------------------- /nnutils/loss_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Loss building blocks. 3 | ''' 4 | import torch 5 | import torch.nn as nn 6 | import math 7 | from absl import flags 8 | 9 | #-------------- flags -------------# 10 | #----------------------------------# 11 | flags.DEFINE_float('shape_loss_wt', 1, 'Shape loss weight.') 12 | flags.DEFINE_float('scale_loss_wt', 1, 'Scale loss weight.') 13 | flags.DEFINE_float('quat_loss_wt', 1, 'Quat loss weight.') 14 | flags.DEFINE_float('trans_loss_wt', 1, 'Trans loss weight.') 15 | 16 | 17 | def quat_loss(q1, q2): 18 | ''' 19 | Anti-podal squared L2 loss. 20 | 21 | Args: 22 | q1: N X 4 23 | q2: N X 4 24 | Returns: 25 | loss : scalar 26 | ''' 27 | q_diff_loss = (q1-q2).pow(2).sum(1) 28 | q_sum_loss = (q1+q2).pow(2).sum(1) 29 | q_loss, _ = torch.stack((q_diff_loss, q_sum_loss), dim=1).min(1) 30 | return q_loss.mean() 31 | 32 | 33 | def code_loss( 34 | code_pred, code_gt, 35 | pred_voxels=True, classify_rot=True, 36 | shape_wt=1.0, scale_wt=1.0, quat_wt=1.0, trans_wt=1.0): 37 | ''' 38 | Code loss 39 | 40 | Args: 41 | code_pred: [shape, scale, quat, trans] 42 | code_gt: [shape, scale, quat, trans] 43 | Returns: 44 | total_loss : scalar 45 | ''' 46 | if pred_voxels: 47 | s_loss = torch.nn.functional.binary_cross_entropy(code_pred[0], code_gt[0]) 48 | else: 49 | #print('Shape gt/pred mean : {}, {}'.format(code_pred[0].mean().data[0], code_gt[0].mean().data[0])) 50 | s_loss = (code_pred[0] - code_gt[0]).pow(2).mean() 51 | 52 | if classify_rot: 53 | q_loss = torch.nn.functional.nll_loss(code_pred[2], code_gt[2]) 54 | else: 55 | q_loss = quat_loss(code_pred[2], code_gt[2]) 56 | 57 | sc_loss = (code_pred[1].log() - code_gt[1].log()).abs().mean() 58 | tr_loss = (code_pred[3] - code_gt[3]).pow(2).mean() 59 | 60 | total_loss = sc_loss*scale_wt 61 | total_loss += q_loss*quat_wt 62 | total_loss += tr_loss*trans_wt 63 | total_loss += s_loss*shape_wt 64 | 65 | loss_factors = { 66 | 'shape': s_loss*shape_wt, 'scale': sc_loss*scale_wt, 'quat': q_loss*quat_wt, 'trans': tr_loss*trans_wt 67 | } 68 | return total_loss, loss_factors 69 | -------------------------------------------------------------------------------- /utils/html.py: -------------------------------------------------------------------------------- 1 | '''Code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 2 | import dominate 3 | from dominate.tags import * 4 | import os 5 | 6 | 7 | class HTML: 8 | def __init__(self, web_dir, title, reflesh=0): 9 | self.title = title 10 | self.web_dir = web_dir 11 | self.img_dir = os.path.join(self.web_dir, 'images') 12 | if not os.path.exists(self.web_dir): 13 | os.makedirs(self.web_dir) 14 | if not os.path.exists(self.img_dir): 15 | os.makedirs(self.img_dir) 16 | # print(self.img_dir) 17 | 18 | self.doc = dominate.document(title=title) 19 | if reflesh > 0: 20 | with self.doc.head: 21 | meta(http_equiv="reflesh", content=str(reflesh)) 22 | 23 | def get_image_dir(self): 24 | return self.img_dir 25 | 26 | def add_header(self, str): 27 | with self.doc: 28 | h3(str) 29 | 30 | def add_table(self, border=1): 31 | self.t = table(border=border, style="table-layout: fixed;") 32 | self.doc.add(self.t) 33 | 34 | def add_images(self, ims, txts, links, width=400): 35 | self.add_table() 36 | with self.t: 37 | with tr(): 38 | for im, txt, link in zip(ims, txts, links): 39 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 40 | with p(): 41 | with a(href=os.path.join('images', link)): 42 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 43 | br() 44 | p(txt) 45 | 46 | def save(self): 47 | html_file = '%s/index.html' % self.web_dir 48 | f = open(html_file, 'wt') 49 | f.write(self.doc.render()) 50 | f.close() 51 | 52 | 53 | if __name__ == '__main__': 54 | html = HTML('web/', 'test_html') 55 | html.add_header('hello world') 56 | 57 | ims = [] 58 | txts = [] 59 | links = [] 60 | for n in range(4): 61 | ims.append('image_%d.png' % n) 62 | txts.append('text_%d' % n) 63 | links.append('image_%d.png' % n) 64 | html.add_images(ims, txts, links) 65 | html.save() 66 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation Instructions 2 | 3 | Two overall comments: 4 | * Result visualizations depend on blender. We provide a version, but if you have issues where the renderings don't show up or where the script cannot read the result images, you may have to compile blender and provide a bpy.so file that matches your precise system configuration. See [here](https://wiki.blender.org/index.php/User:Ideasman42/BlenderAsPyModule) for information about how to do this. The results by themselves do not depend on blender, and if you just want to compute predictions, you do not need blender. 5 | * You should run each of these commands in the main root directory. 6 | 7 | #### Setup virtualenv. 8 | ``` 9 | virtualenv venv 10 | source venv/bin/activate 11 | pip install -U pip 12 | deactivate 13 | source venv/bin/activate 14 | pip install -r docs/requirements.txt 15 | ``` 16 | 17 | #### Install pytorch. 18 | ``` 19 | pip install http://download.pytorch.org/whl/cu80/torch-0.2.0.post3-cp27-cp27mu-manylinux1_x86_64.whl 20 | pip install torchvision visdom dominate 21 | ``` 22 | 23 | #### Compile cython modules. 24 | First, we need to compile some cython utilities. 25 | ``` 26 | cd utils 27 | python setup.py build_ext --inplace 28 | mv factored3d/utils/bbox_utils.so ./ 29 | rm -rf build/ # remove redundant folders 30 | rm -rf factored3d/ # remove redundant folders 31 | cd .. 32 | ``` 33 | 34 | #### Download pre-trained models. 35 | ``` 36 | # Download pre-trained Resnet18 Model. 37 | wget https://download.pytorch.org/models/resnet18-5c106cde.pth -O ~/.torch/models/resnet18-5c106cde.pth 38 | 39 | # Download our models. 40 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/cachedir.tar.gz && tar -xf cachedir.tar.gz 41 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/blender.tar.gz && tar -xf blender.tar.gz && mv blender renderer/. 42 | ``` 43 | 44 | #### Setup external dependencies. 45 | ``` 46 | mkdir external; cd external; 47 | # Python interface for binvox 48 | git clone https://github.com/dimatura/binvox-rw-py ./binvox 49 | 50 | # Piotr Dollar's toolbox 51 | git clone https://github.com/pdollar/toolbox ./toolbox 52 | 53 | # Edgeboxes code 54 | git clone https://github.com/pdollar/edges ./edges 55 | 56 | # SSC-Net code (used for computing voxelization for the baseline) 57 | git clone https://github.com/shurans/sscnet ./sscnet 58 | cd .. 59 | ``` 60 | -------------------------------------------------------------------------------- /preprocess/suncg/voxelize_objects.py: -------------------------------------------------------------------------------- 1 | # This needs to be executed onscreen 2 | import os,sys 3 | import os.path as osp 4 | import numpy as np 5 | import scipy.io as sio 6 | 7 | sys.path.append('/data0/shubhtuls/code/factored3d/external/binvox') 8 | sun_cg_dir = '/data0/shubhtuls/datasets/suncg_pbrs_release' 9 | binvox_exec_file = '/data0/shubhtuls/datasets/suncg_pbrs_release/toolbox/binvox' 10 | 11 | import binvox_rw 12 | 13 | def sub_dirs(d): 14 | return [o for o in os.listdir(d) if os.path.isdir(os.path.join(d,o))] 15 | 16 | 17 | obj_ids = sub_dirs(osp.join(sun_cg_dir,'object')) 18 | obj_ids = [o for o in obj_ids if 'copy' not in o] 19 | grid_size = 64 20 | dc1 = 'find {} -name "*.binvox" -type f -delete'.format(osp.join(sun_cg_dir,'object')) 21 | dc2 = 'find {} -name "*.mat" -type f -delete'.format(osp.join(sun_cg_dir,'object')) 22 | os.system(dc1) #delete old .binvox files 23 | os.system(dc2) #delete old .mat files 24 | 25 | for ix in range(len(obj_ids)): 26 | obj_id = obj_ids[ix] 27 | print(obj_id) 28 | object_dir = osp.join(sun_cg_dir, 'object', obj_id) 29 | binvox_file_interior = osp.join(object_dir, obj_id + '.binvox') 30 | binvox_file_surface = osp.join(object_dir, obj_id + '_1.binvox') 31 | 32 | cmd_interior = '{} -cb -d {} {}'.format(binvox_exec_file, grid_size, osp.join(object_dir, obj_id + '.obj')) 33 | cmd_surface = '{} -cb -e -d {} {}'.format(binvox_exec_file, grid_size, osp.join(object_dir, obj_id + '.obj')) 34 | os.system(cmd_interior) 35 | os.system(cmd_surface) 36 | 37 | with open(binvox_file_interior, 'rb') as f0: 38 | with open(binvox_file_surface, 'rb') as f1: 39 | vox_read_interior = binvox_rw.read_as_3d_array(f0) 40 | vox_read_surface = binvox_rw.read_as_3d_array(f1) 41 | 42 | #need to add translation corresponding to voxel centering 43 | shape_vox = vox_read_interior.data.astype(np.bool) + vox_read_surface.data.astype(np.bool) 44 | if(np.max(shape_vox) > 0): 45 | Xs, Ys, Zs = np.where(shape_vox) 46 | trans_centre = np.array([1.0*np.min(Xs)/(np.size(shape_vox,0)), 1.0*np.min(Ys)/(np.size(shape_vox,1)), 1.0*np.min(Zs)/(np.size(shape_vox,2)-1)] ) 47 | translate = vox_read_surface.translate - trans_centre*vox_read_surface.scale 48 | sio.savemat(osp.join(object_dir, obj_id + '.mat'), {'voxels' : shape_vox, 'scale' : vox_read_surface.scale, 'translation' : translate}) -------------------------------------------------------------------------------- /nnutils/roi_pool_py.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ROI pooling layer. 3 | Source adapted from https://github.com/longcw/faster_rcnn_pytorch/ 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | from torch.autograd import Variable 8 | import numpy as np 9 | 10 | 11 | class RoIPool(nn.Module): 12 | def __init__(self, pooled_height, pooled_width, spatial_scale): 13 | super(RoIPool, self).__init__() 14 | self.pooled_width = int(pooled_width) 15 | self.pooled_height = int(pooled_height) 16 | self.spatial_scale = float(spatial_scale) 17 | 18 | def forward(self, features, rois): 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size()[0] 21 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda() 22 | 23 | for roi_ind, roi in enumerate(rois): 24 | batch_ind = int(roi[0].data[0]) 25 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( 26 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) 27 | roi_width = max(roi_end_w - roi_start_w + 1, 1) 28 | roi_height = max(roi_end_h - roi_start_h + 1, 1) 29 | bin_size_w = float(roi_width) / float(self.pooled_width) 30 | bin_size_h = float(roi_height) / float(self.pooled_height) 31 | 32 | for ph in range(self.pooled_height): 33 | hstart = int(np.floor(ph * bin_size_h)) 34 | hend = int(np.ceil((ph + 1) * bin_size_h)) 35 | hstart = min(data_height, max(0, hstart + roi_start_h)) 36 | hend = min(data_height, max(0, hend + roi_start_h)) 37 | for pw in range(self.pooled_width): 38 | wstart = int(np.floor(pw * bin_size_w)) 39 | wend = int(np.ceil((pw + 1) * bin_size_w)) 40 | wstart = min(data_width, max(0, wstart + roi_start_w)) 41 | wend = min(data_width, max(0, wend + roi_start_w)) 42 | 43 | is_empty = (hend <= hstart) or(wend <= wstart) 44 | if is_empty: 45 | outputs[roi_ind, :, ph, pw] = 0 46 | else: 47 | data = features[batch_ind] 48 | outputs[roi_ind, :, ph, pw] = torch.max( 49 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 1)[0].view(-1) 50 | 51 | return outputs 52 | 53 | -------------------------------------------------------------------------------- /docs/preprocessing.md: -------------------------------------------------------------------------------- 1 | # Instructions to precompute data required for training 2 | 3 | ### Compiling SUNCG Toolbox 4 | 5 | ``` 6 | cd SUNCG_DIR; 7 | 8 | # Download the toolbox 9 | git clone https://github.com/shurans/SUNCGtoolbox ./toolbox 10 | cd toolbox 11 | 12 | # Use our modified rendering function 13 | cp CODE_ROOT/preprocess/suncg/scn2img.cpp ./gaps/apps/scn2img/ 14 | 15 | # Compile 16 | make 17 | 18 | # (or optionally instead of above) compile with offscreen mesa support 19 | make mesa 20 | ``` 21 | In case you compile with offscreen support, you might need to edit [this line](https://github.com/shurans/SUNCGtoolbox/blob/master/gaps/makefiles/Makefile.apps#L42) to specify additional lib directories if you're using a locally compiled version of mesa, and possibly also specify a CPLUS_INCLUDE_PATH. Though note that locally compiling mesa with offscreen support can get a bit tricky. 22 | 23 | We highly recommend using the offscreen version, as otherwise the rendering behaviour is often stochastic. 24 | 25 | 26 | ### Rendering Layout and Node Images 27 | You'll first need to edit the 'sunCgDir' variable in both the python scripts below. Note that both the rendering jobs can take a while. If you managed to compile the gaps toolbox with offscreen mesa, you can add --mesa=True to the commands below, else you'll need to run the rendering jobs in an onscreen mode. 28 | ``` 29 | cd CODE_ROOT/preprocess/suncg 30 | 31 | # Render amodal depths (edit the 'sunCgDir' variable before running) 32 | python render_layout_depth.py --min=1 --nc=1 33 | 34 | # Render node indices (edit the 'sunCgDir' variable before running) 35 | python render_node_indices.py --min=1 --nc=1 36 | 37 | ``` 38 | 39 | ### Voxelize Objects and Scenes 40 | Please download binvox from [here](http://www.patrickmin.com/binvox/) and store the binary as SUNCG_DIR/toolbox/binvox. 41 | 42 | ``` 43 | # Voxelize the objects (edit the 'sunCgDir' variable before running) 44 | # This needs to be run in onscreen mode with a display/desktop connected 45 | python voxelize_objects.py 46 | 47 | # Compute voxelizations for the full scene (required for training the baseline) 48 | # Edit the 'suncgDir' in globals.m before running 49 | precompute_scene_voxels(1, 0); 50 | ``` 51 | 52 | ### Compute object proposals 53 | ``` 54 | # Extract ground-truth object boxes 55 | # Edit the 'suncgDir' in globals.m before running 56 | precompute_gt_bboxes(1, 0); 57 | 58 | # Extract edgebox proposals 59 | # Edit the 'suncgDir' in globals.m before running 60 | precompute_edge_boxes(1, 0); 61 | ``` 62 | -------------------------------------------------------------------------------- /demo/cli_demo.py: -------------------------------------------------------------------------------- 1 | # edit the code path accordingly 2 | code_root = '/data0/shubhtuls/code/factored3d/' 3 | import sys 4 | import numpy as np 5 | import os.path as osp 6 | import scipy.misc 7 | import scipy.io as sio 8 | import torch 9 | import matplotlib.pyplot as plt 10 | sys.path.append(osp.join(code_root, '..')) 11 | from absl import flags 12 | from factored3d.demo import demo_utils 13 | 14 | flags.FLAGS(['demo']) 15 | opts = flags.FLAGS 16 | 17 | # do not change the options below 18 | opts.batch_size = 1 19 | opts.num_train_epoch = 1 20 | opts.name = 'dwr_shape_ft' 21 | opts.classify_rot = True 22 | opts.pred_voxels = True 23 | opts.use_context = True 24 | 25 | if opts.classify_rot: 26 | opts.nz_rot = 24 27 | else: 28 | opts.nz_rot = 4 29 | 30 | ## Load the trained models 31 | tester = demo_utils.DemoTester(opts) 32 | tester.init_testing() 33 | 34 | renderer = demo_utils.DemoRenderer(opts) 35 | ## Load input data 36 | dataset = 'suncg' 37 | 38 | img = scipy.misc.imread('./data/{}_img.png'.format(dataset)) 39 | 40 | img_fine = scipy.misc.imresize(img, (opts.img_height_fine, opts.img_width_fine)) 41 | img_fine = np.transpose(img_fine, (2,0,1)) 42 | 43 | img_coarse = scipy.misc.imresize(img, (opts.img_height, opts.img_width)) 44 | img_coarse = np.transpose(img_coarse, (2,0,1)) 45 | 46 | proposals = sio.loadmat('./data/{}_proposals.mat'.format(dataset))['proposals'][:, 0:4] 47 | 48 | inputs = {} 49 | inputs['img'] = torch.from_numpy(img_coarse/255.0).unsqueeze(0) 50 | inputs['img_fine'] = torch.from_numpy(img_fine/255.0).unsqueeze(0) 51 | inputs['bboxes_test_proposals'] = [torch.from_numpy(proposals)] 52 | tester.set_input(inputs) 53 | objects, layout = tester.predict_factored3d() 54 | scene_voxels = tester.predict_scene_voxels() 55 | dmap = tester.predict_depth() 56 | img_factored_cam, img_factored_novel = renderer.render_factored3d(objects, layout) 57 | img_voxels_cam, img_voxels_novel = renderer.render_scene_vox(scene_voxels) 58 | img_dmap_cam, img_dmap_novel = renderer.render_depth(dmap) 59 | 60 | f, axarr = plt.subplots(2, 4, figsize=(20, 8)) 61 | 62 | axarr[0, 0].imshow(img) 63 | axarr[0, 0].axis('off') 64 | axarr[1, 0].imshow(img*0 + 255) 65 | axarr[1, 0].axis('off') 66 | 67 | axarr[0, 1].imshow(img_factored_cam) 68 | axarr[0, 1].axis('off') 69 | axarr[1, 1].imshow(img_factored_novel) 70 | axarr[1, 1].axis('off') 71 | 72 | axarr[0, 2].imshow(img_voxels_cam) 73 | axarr[0, 2].axis('off') 74 | axarr[1, 2].imshow(img_voxels_novel) 75 | axarr[1, 2].axis('off') 76 | 77 | axarr[0, 3].imshow(img_dmap_cam) 78 | axarr[0, 3].axis('off') 79 | axarr[1, 3].imshow(img_dmap_novel) 80 | axarr[1, 3].axis('off') 81 | 82 | plt.show() 83 | -------------------------------------------------------------------------------- /preprocess/suncg/precompute_scene_voxels.m: -------------------------------------------------------------------------------- 1 | function precompute_scene_voxels(min_id, max_id) 2 | globals; 3 | suncgDir = suncgDir; % redundancy useful for parfor 4 | addpath(genpath('./matUtils')); 5 | basedir = pwd(); 6 | fileNamesAll = strsplit(fileread(fullfile(suncgDir, 'zipfiles', 'data_goodlist_v2.txt')), '\n'); 7 | 8 | sscnetDir = fullfile(basedir, '..', '..', 'external', 'sscnet', 'matlab_code'); 9 | addpath(sscnetDir); 10 | 11 | objectcategory = load(fullfile(sscnetDir, 'suncgObjcategory.mat')); 12 | addpath(fullfile(sscnetDir, 'utils')); 13 | addpath(fullfile(basedir, 'matUtils')); 14 | sceneIds = getFileNamesFromDirectory(fullfile(suncgDir, 'camera'),'types',{''}); 15 | sceneIds = sceneIds(3:end); 16 | sceneIds = sort(sceneIds); 17 | 18 | if max_id == 0 19 | max_id = length(sceneIds); 20 | end 21 | parfor ix = min_id:max_id 22 | genSceneData(sceneIds{ix}, suncgDir, objectcategory.objcategory, fileNamesAll); 23 | end 24 | end 25 | 26 | 27 | function genSceneData(sceneId, suncgDir, objcategory, fileNamesAll) 28 | %% generating scene voxels in camera view 29 | camerafile = sprintf('%s/camera/%s/room_camera.txt', suncgDir, sceneId); 30 | cameraInfofile = sprintf('%s/camera/%s/room_camera_name.txt', suncgDir, sceneId); 31 | cameraInfo = readCameraName(cameraInfofile); 32 | cameraPoses = readCameraPose(camerafile); 33 | voxPath = fullfile(suncgDir, 'scene_voxels', sceneId); 34 | mkdirOptional(voxPath); 35 | 36 | for cameraId = 1:length(cameraInfo) 37 | if ~ismember(sprintf('%s/%06d', sceneId, cameraId-1), fileNamesAll) 38 | continue 39 | end 40 | sceneVoxMatFilename = fullfile(voxPath,sprintf('%06d_voxels.mat',cameraId-1)); 41 | sceneVoxFilename = [sceneVoxMatFilename(1:(end-4)),'.bin']; 42 | if exist(sceneVoxMatFilename, 'file') 43 | continue 44 | end 45 | 46 | % get camera extrisic yup -> zup 47 | extCam2World = camPose2Extrinsics(cameraPoses(cameraId,:)); 48 | extCam2World = [[1 0 0; 0 0 1; 0 1 0]*extCam2World(1:3,1:3) extCam2World([1,3,2],4)]; 49 | 50 | % generating scene voxels in camera view 51 | [sceneVox] = get_scene_vox(suncgDir,sceneId,cameraInfo(cameraId).floorId+1,cameraInfo(cameraId).roomId+1,extCam2World,objcategory); 52 | camPoseArr = [extCam2World',[0;0;0;1]]; %' 53 | % camPoseArr = camPoseArr(:); 54 | sceneVox = (sceneVox ~= 0) & (sceneVox ~= 255); 55 | 56 | % Compress with RLE and save to binary file 57 | % writeRLEfile(sceneVoxFilename, sceneVox,camPoseArr,voxOriginWorld) 58 | save(sceneVoxMatFilename,'sceneVox','camPoseArr'); 59 | end 60 | end -------------------------------------------------------------------------------- /utils/visutil.py: -------------------------------------------------------------------------------- 1 | '''Code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 2 | from __future__ import print_function 3 | import torch 4 | import numpy as np 5 | from PIL import Image 6 | import inspect, re 7 | import numpy as np 8 | import os 9 | import collections 10 | 11 | # Converts a Tensor into a Numpy array 12 | # |imtype|: the desired type of the converted numpy array 13 | def tensor2im(image_tensor, imtype=np.uint8): 14 | image_numpy = image_tensor[0].cpu().float().numpy() 15 | image_numpy = (np.transpose(image_numpy, (1, 2, 0))) * 255.0 16 | return image_numpy.astype(imtype) 17 | 18 | 19 | def undo_resnet_preprocess(image_tensor): 20 | image_tensor = image_tensor.clone() 21 | image_tensor.narrow(1,0,1).mul_(.229).add_(.485) 22 | image_tensor.narrow(1,1,1).mul_(.224).add_(.456) 23 | image_tensor.narrow(1,2,1).mul_(.225).add_(.406) 24 | return image_tensor 25 | 26 | 27 | def diagnose_network(net, name='network'): 28 | mean = 0.0 29 | count = 0 30 | for param in net.parameters(): 31 | if param.grad is not None: 32 | mean += torch.mean(torch.abs(param.grad.data)) 33 | count += 1 34 | if count > 0: 35 | mean = mean / count 36 | print(name) 37 | print(mean) 38 | 39 | 40 | def save_image(image_numpy, image_path): 41 | image_pil = Image.fromarray(image_numpy) 42 | image_pil.save(image_path) 43 | 44 | def info(object, spacing=10, collapse=1): 45 | """Print methods and doc strings. 46 | Takes module, class, list, dictionary, or string.""" 47 | methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)] 48 | processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s) 49 | print( "\n".join(["%s %s" % 50 | (method.ljust(spacing), 51 | processFunc(str(getattr(object, method).__doc__))) 52 | for method in methodList]) ) 53 | 54 | def varname(p): 55 | for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]: 56 | m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line) 57 | if m: 58 | return m.group(1) 59 | 60 | def print_numpy(x, val=True, shp=False): 61 | x = x.astype(np.float64) 62 | if shp: 63 | print('shape,', x.shape) 64 | if val: 65 | x = x.flatten() 66 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 67 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 68 | 69 | 70 | def mkdirs(paths): 71 | if isinstance(paths, list) and not isinstance(paths, str): 72 | for path in paths: 73 | mkdir(path) 74 | else: 75 | mkdir(paths) 76 | 77 | 78 | def mkdir(path): 79 | if not os.path.exists(path): 80 | os.makedirs(path) 81 | -------------------------------------------------------------------------------- /nnutils/voxel_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Scene level voxels prediction net. 3 | ''' 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | from absl import flags 8 | import torch 9 | import torch.nn as nn 10 | from . import net_blocks as nb 11 | import torchvision 12 | #from oc3d.nnutils import roi_pooling 13 | import pdb 14 | 15 | #-------------- flags -------------# 16 | #----------------------------------# 17 | flags.DEFINE_integer('nz_voxels', 2000, 'Number of latent feat dimension for shape prediction') 18 | flags.DEFINE_integer('n_voxels_upconv', 5, 'Number of upconvolution layers') 19 | 20 | #------------- Modules ------------# 21 | #----------------------------------# 22 | class ResNetConv(nn.Module): 23 | def __init__(self, n_blocks=4): 24 | super(ResNetConv, self).__init__() 25 | self.resnet = torchvision.models.resnet18(pretrained=True) 26 | self.n_blocks=n_blocks 27 | 28 | def forward(self, x): 29 | n_blocks = self.n_blocks 30 | x = self.resnet.conv1(x) 31 | x = self.resnet.bn1(x) 32 | x = self.resnet.relu(x) 33 | x = self.resnet.maxpool(x) 34 | 35 | if n_blocks >= 1: 36 | x = self.resnet.layer1(x) 37 | if n_blocks >= 2: 38 | x = self.resnet.layer2(x) 39 | if n_blocks >= 3: 40 | x = self.resnet.layer3(x) 41 | if n_blocks >= 4: 42 | x = self.resnet.layer4(x) 43 | return x 44 | 45 | #------------ Voxel Net -----------# 46 | #----------------------------------# 47 | class VoxelNet(nn.Module): 48 | def __init__( 49 | self, img_size, 50 | voxel_size, nz_voxels=2000, 51 | nz_init=256, n_voxels_upconv=5 52 | ): 53 | super(VoxelNet, self).__init__() 54 | 55 | self.resnet_conv = ResNetConv(n_blocks=4) 56 | nc_inp = 512*(img_size[0]//32)*(img_size[1]//32) 57 | 58 | self.encoder = nb.fc_stack(nc_inp, nz_voxels, 2) 59 | 60 | upsamp_factor = pow(2, n_voxels_upconv) 61 | self.spatial_size_init = [voxel_size[0]//upsamp_factor, voxel_size[1]//upsamp_factor, voxel_size[2]//upsamp_factor] 62 | nz_spatial = self.spatial_size_init[0]*self.spatial_size_init[1]*self.spatial_size_init[2] 63 | self.nz_init = nz_init 64 | 65 | self.decoder_reshape = nb.fc_stack(nz_voxels, nz_init*nz_spatial, 1) 66 | self.decoder = nb.decoder3d(n_voxels_upconv, None, nz_init, init_fc=False) 67 | 68 | def forward(self, imgs_inp): 69 | img_feat = self.resnet_conv.forward(imgs_inp) 70 | img_feat = img_feat.view(imgs_inp.size(0), -1) 71 | img_feat = self.encoder.forward(img_feat) 72 | img_feat = self.decoder_reshape.forward(img_feat) 73 | img_feat = img_feat.view( 74 | imgs_inp.size(0), 75 | self.nz_init, 76 | self.spatial_size_init[0], 77 | self.spatial_size_init[1], 78 | self.spatial_size_init[2] 79 | ) 80 | voxels_pred = self.decoder.forward(img_feat) 81 | return voxels_pred 82 | -------------------------------------------------------------------------------- /utils/make_html.py: -------------------------------------------------------------------------------- 1 | """Script for making html from a directory. 2 | """ 3 | # Sample usage: 4 | # (box3d_shape_ft) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/box3d/val/box3d_shape_ft' --html_name=box3d_shape_ft --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/' 5 | 6 | # (dwr_shape_ft) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/dwr/val/dwr_shape_ft' --html_name=dwr_shape_ft --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/' 7 | 8 | # (depth_baseline) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/depth_baseline' --html_name=depth_baseline --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/' 9 | 10 | # (voxels_baseline) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/voxels_baseline' --html_name=voxels_baseline --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/' 11 | 12 | # (nyu) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/nyu/test/dwr_shape_ft' --html_name=nyu_dwr_shape_ft --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/' 13 | 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | from absl import app 18 | from absl import flags 19 | import os 20 | import os.path as osp 21 | from yattag import Doc 22 | from yattag import indent 23 | import numpy as np 24 | 25 | flags.DEFINE_string('imgs_root_dir', '', 'Directory where renderings are saved') 26 | flags.DEFINE_string('html_name', '', 'Name of webpage') 27 | flags.DEFINE_string('html_dir', '', 'Directory where output should be saved') 28 | 29 | def main(_): 30 | opts = flags.FLAGS 31 | vis_dir_names = os.listdir(opts.imgs_root_dir) 32 | vis_dir_names.sort() 33 | img_keys = os.listdir(osp.join(opts.imgs_root_dir, vis_dir_names[0])) 34 | img_keys.sort() 35 | img_root_rel_path = osp.relpath(opts.imgs_root_dir, opts.html_dir) 36 | if not os.path.exists(opts.html_dir): 37 | os.makedirs(opts.html_dir) 38 | html_file = osp.join(opts.html_dir, opts.html_name + '.html') 39 | ctr = 0 40 | 41 | doc, tag, text = Doc().tagtext() 42 | with tag('html'): 43 | with tag('body'): 44 | with tag('table', style = 'width:100%', border="1"): 45 | with tag('tr'): 46 | for img_name in img_keys: 47 | with tag('td'): 48 | text(img_name) 49 | 50 | for img_dir in vis_dir_names: 51 | with tag('tr'): 52 | for img_name in img_keys: 53 | with tag('td'): 54 | with tag('img', width="640px", src=osp.join(img_root_rel_path, img_dir, img_name)): 55 | ctr += 1 56 | 57 | r1 = doc.getvalue() 58 | r2 = indent(r1) 59 | 60 | with open(html_file, 'wt') as f: 61 | f.write(r2) 62 | 63 | 64 | if __name__ == '__main__': 65 | app.run() 66 | -------------------------------------------------------------------------------- /docs/training.md: -------------------------------------------------------------------------------- 1 | # Instructions to train models and baselines 2 | 3 | ### Prerequisites 4 | Make sure the data loading and preprocessing is complete. The training will also be visualized using visdom which can be started by 5 | ``` 6 | python -m visdom.server 7 | ``` 8 | Note that all the training jobs should be launched from one directory above CODE_ROOT. Additionally, the sample scripts below assume that the code folder is named 'factored3d'. 9 | 10 | 11 | ### Training factored 3D prediction models 12 | We first train the object-centric 3D prediction module. Since training with proposals or predicting full voxels is computationally expensive, we train in stages to speed up the process. 13 | ``` 14 | # Download a pretrained object voxel auto-encoder 15 | cd CODE_ROOT/cachedir/snapshots; 16 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/object_autoenc_32.tar.gz && tar -xvzf object_autoenc_32.tar.gz 17 | 18 | # All jobs should be launched from one level above code directory 19 | cd CODE_ROOT/..; 20 | 21 | # First train the object-centric 3D prediction model on ground-truth boxes 22 | python -m factored3d.experiments.suncg.box3d --plot_scalars --display_visuals --display_freq=2000 --save_epoch_freq=1 --batch_size=8 --name=box3d_base --use_context --pred_voxels=False --classify_rot --shape_loss_wt=10 --n_data_workers=0 --num_epochs=8 23 | 24 | # Fine-tune the above model using proposals 25 | python -m factored3d.experiments.suncg.dwr --name=dwr_base --classify_rot --pred_voxels=False --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --box3d_ft --shape_loss_wt=10 --label_loss_wt=10 --batch_size=8 --num_epochs=1 26 | 27 | # Finally, also learn to predict shape voxels instead of auto-encoder shape code 28 | python -m factored3d.experiments.suncg.dwr --name=dwr_shape_ft --classify_rot --pred_voxels=True --shape_dec_ft --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --shape_loss_wt=2 --label_loss_wt=10 --batch_size=8 --ft_pretrain_epoch=1 --num_epochs=1 29 | ``` 30 | 31 | We also train the layout (amodal inverse depth) prediction CNN 32 | ``` 33 | # job should be launched from one level above code directory 34 | cd CODE_ROOT/.. 35 | 36 | python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=layout_pred --display_freq=2000 --suncg_dl_out_layout=true --suncg_dl_out_depth=false --display_id=40 --num_epochs=8 37 | ``` 38 | 39 | ### Training (inverse) depth prediction baseline 40 | ``` 41 | python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=depth_baseline --display_freq=2000 --suncg_dl_out_layout=false --suncg_dl_out_depth=true --display_id=20 --num_epochs=8 42 | ``` 43 | 44 | ### Training scene voxel prediction baseline 45 | ``` 46 | # job should be launched from one level above code directory 47 | cd CODE_ROOT/..; 48 | 49 | python -m factored3d.experiments.suncg.voxels --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=voxels_baseline --display_freq=2000 --num_epochs=8 50 | ``` 51 | -------------------------------------------------------------------------------- /preprocess/suncg/precompute_edge_boxes.m: -------------------------------------------------------------------------------- 1 | function precompute_edge_boxes(min_id, max_id) 2 | globals; 3 | suncgDir = suncgDir; % redundancy useful for parfor 4 | 5 | addpath(genpath('./matUtils')); 6 | basedir = pwd(); 7 | fileNamesAll = strsplit(fileread(fullfile(suncgDir, 'zipfiles', 'data_goodlist_v2.txt')), '\n'); 8 | 9 | addpath(genpath('./matUtils')); 10 | addpath(genpath('../../external/edges/')); 11 | addpath(genpath('../../external/toolbox/')); 12 | 13 | basedir = pwd(); 14 | saveDir = fullfile(suncgDir, 'edgebox_proposals'); 15 | nodeDir = fullfile(suncgDir, 'bboxes_node'); 16 | mkdirOptional(saveDir); 17 | 18 | sceneIds = getFileNamesFromDirectory(fullfile(suncgDir, 'camera'),'types',{''}); 19 | sceneIds = sceneIds(3:end); 20 | sceneIds = sort(sceneIds); 21 | if max_id == 0 22 | max_id = length(sceneIds); 23 | end 24 | 25 | %% load pre-trained edge detection model and set opts (see edgesDemo.m) 26 | model=load('../../external/edges/models/forest/modelBsds'); model=model.model; 27 | model.opts.multiscale=0; model.opts.sharpen=2; model.opts.nThreads=4; 28 | 29 | %% set up opts for edgeBoxes (see edgeBoxes.m) 30 | opts = edgeBoxes; 31 | opts.alpha = .65; % step size of sliding window search 32 | opts.beta = .75; % nms threshold for object proposals 33 | opts.minScore = .01; % min score of boxes to detect 34 | opts.maxBoxes = 1e3; % max number of boxes to detect 35 | 36 | for ix = min_id:max_id 37 | %for ix = min_id:max_id 38 | if mod(ix, 100) == 0 39 | disp(ix) 40 | end 41 | sceneId = sceneIds{ix}; 42 | mkdirOptional(fullfile(saveDir, sceneId)); 43 | imgsAll = getFileNamesFromDirectory(fullfile(suncgDir, 'renderings_node', sceneId),'types',{'.png'}); 44 | 45 | for cameraId=1:length(imgsAll) 46 | saveFile = fullfile(saveDir, sceneId, sprintf('%06d_proposals.mat', cameraId-1)); 47 | if exist(saveFile, 'file') 48 | continue 49 | end 50 | if ~ismember(sprintf('%s/%06d', sceneId, cameraId-1), fileNamesAll) 51 | continue 52 | end 53 | img_file = fullfile(suncgDir, 'renderings_ldr', sceneId, sprintf('%06d_mlt.png', cameraId-1)); 54 | nodeFile = fullfile(nodeDir, sceneId, sprintf('%06d_bboxes.mat', cameraId-1)); 55 | 56 | if ~exist(img_file, 'file') 57 | % Bad file 58 | disp(img_file); 59 | continue 60 | end 61 | 62 | % disp(saveFile); 63 | img = imread(img_file); 64 | var = load(nodeFile); 65 | prop=edgeBoxes(img,model,opts); 66 | proposals = prop; 67 | proposals(:,3:4) = proposals(:,3:4) + proposals(:,1:2); 68 | 69 | overlaps = bboxOverlap(var.bboxes, proposals(:,1:4)); 70 | overlapsGt = (max(overlaps, [], 2) > 0.7); 71 | [overlapsProposals, gtInds] = max(overlaps, [], 1); 72 | 73 | saveFunc(saveFile, proposals, overlapsProposals, gtInds); 74 | end 75 | end 76 | end 77 | 78 | function saveFunc(filename, proposals, overlaps, gtInds) 79 | save(filename,'proposals', 'overlaps', 'gtInds'); 80 | end -------------------------------------------------------------------------------- /nnutils/test_utils.py: -------------------------------------------------------------------------------- 1 | """Generic Testing Utils. 2 | """ 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | import torch 8 | import os 9 | import os.path as osp 10 | import time 11 | import pdb 12 | from absl import flags 13 | 14 | import scipy.misc 15 | from ..utils.visualizer import Visualizer 16 | 17 | #-------------- flags -------------# 18 | #----------------------------------# 19 | ## Flags for training 20 | curr_path = osp.dirname(osp.abspath(__file__)) 21 | cache_path = osp.join(curr_path, '..', 'cachedir') 22 | 23 | flags.DEFINE_string('name', 'exp_name', 'Experiment Name') 24 | flags.DEFINE_string('cache_dir', cache_path, 'Cachedir') 25 | flags.DEFINE_string('eval_set', 'val', 'which set to evaluate on') 26 | flags.DEFINE_integer('gpu_id', 0, 'Which gpu to use') 27 | 28 | flags.DEFINE_integer('batch_size', 4, 'Size of minibatches') 29 | flags.DEFINE_integer('num_train_epoch', 0, 'Number of training iterations') 30 | flags.DEFINE_integer('n_data_workers', 4, 'Number of data loading workers') 31 | 32 | 33 | ## Flags for logging and snapshotting 34 | flags.DEFINE_string('checkpoint_dir', osp.join(cache_path, 'snapshots'), 35 | 'Directory where networks are saved') 36 | flags.DEFINE_string( 37 | 'results_vis_dir', osp.join(cache_path, 'results_vis'), 38 | 'Directory where intermittent results will be saved') 39 | flags.DEFINE_string( 40 | 'results_eval_dir', osp.join(cache_path, 'evaluation'), 41 | 'Directory where evaluation results will be saved') 42 | 43 | flags.DEFINE_boolean('save_visuals', False, 'Whether to save intermittent visuals') 44 | flags.DEFINE_integer('visuals_freq', 50, 'Save visuals every few forward passes') 45 | flags.DEFINE_integer('max_eval_iter', 0, 'Maximum evaluation iterations. 0 => 1 epoch.') 46 | 47 | #-------- tranining class ---------# 48 | #----------------------------------# 49 | class Tester(): 50 | def __init__(self, opts): 51 | self.opts = opts 52 | self.vis_iter = 0 53 | self.gpu_id = opts.gpu_id 54 | self.Tensor = torch.cuda.FloatTensor if (self.gpu_id is not None) else torch.Tensor 55 | self.invalid_batch = False #the trainer can optionally reset this every iteration during set_input call 56 | self.save_dir = os.path.join(opts.checkpoint_dir, opts.name) 57 | if not os.path.exists(self.save_dir): 58 | os.makedirs(self.save_dir) 59 | log_file = os.path.join(self.save_dir, 'opts_testing.log') 60 | with open(log_file, 'w') as f: 61 | for k in dir(opts): 62 | f.write('{}: {}\n'.format(k, opts.__getattr__(k))) 63 | 64 | # helper loading function that can be used by subclasses 65 | def load_network(self, network, network_label, epoch_label, network_dir=None): 66 | save_filename = '{}_net_{}.pth'.format(network_label, epoch_label) 67 | if network_dir is None: 68 | network_dir = self.save_dir 69 | save_path = os.path.join(network_dir, save_filename) 70 | network.load_state_dict(torch.load(save_path)) 71 | return 72 | 73 | def save_current_visuals(self): 74 | visuals = self.get_current_visuals() 75 | imgs_dir = osp.join(self.opts.results_vis_dir, 'vis_iter_{}'.format(self.vis_iter)) 76 | if not os.path.exists(imgs_dir): 77 | os.makedirs(imgs_dir) 78 | for k in visuals: 79 | img_path = osp.join(imgs_dir, k + '.png') 80 | scipy.misc.imsave(img_path, visuals[k]) 81 | self.vis_iter += 1 82 | 83 | def define_model(self): 84 | '''Should be implemented by the child class.''' 85 | raise NotImplementedError 86 | 87 | def init_dataset(self): 88 | '''Should be implemented by the child class.''' 89 | raise NotImplementedError 90 | 91 | def set_input(self, batch): 92 | '''Should be implemented by the child class.''' 93 | raise NotImplementedError 94 | 95 | def init_testing(self): 96 | opts = self.opts 97 | self.define_model() 98 | self.init_dataset() 99 | 100 | def test(self): 101 | '''Should be implemented by the child class.''' 102 | raise NotImplementedError 103 | -------------------------------------------------------------------------------- /benchmark/suncg/pr_plots.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import json 7 | import os 8 | import os.path as osp 9 | import platform 10 | 11 | eval_set = 'val' 12 | net_name = 'dwr_shape_ft' 13 | 14 | curr_path = osp.dirname(osp.abspath(__file__)) 15 | cache_path = osp.join(curr_path, '..', '..', 'cachedir') 16 | plots_dir = os.path.join(cache_path, 'evaluation', 'icp', eval_set, 'plots') 17 | 18 | def subplots(plt, Y_X, sz_y_sz_x=(10,10)): 19 | Y,X = Y_X 20 | sz_y,sz_x = sz_y_sz_x 21 | plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y) 22 | fig, axes = plt.subplots(Y, X) 23 | plt.subplots_adjust(wspace=0.1, hspace=0.1) 24 | return fig, axes 25 | 26 | def pr_plots(net_name, iter_number, set_number): 27 | dir_name = os.path.join(cache_path, 'evaluation', 'dwr') 28 | json_file = os.path.join(dir_name, set_number, net_name, 'eval_set{}_0.json'.format(set_number)) 29 | 30 | with open(json_file, 'rt') as f: 31 | a = json.load(f) 32 | imset = a['eval_params']['set'].title() 33 | 34 | plot_file = os.path.join(dir_name, set_number, net_name, 'eval_set{}_0_back.pdf'.format(set_number)) 35 | print('Saving plot to {}'.format(osp.abspath(plot_file))) 36 | # Plot 1 with AP for all, and minus other things one at a time. 37 | #with sns.axes_style("darkgrid"): 38 | with plt.style.context('fivethirtyeight'): 39 | fig, axes = subplots(plt, (1,1), (7,7)) 40 | ax = axes 41 | legs = [] 42 | i_order = [0, 1, 2, 3, 5, 4] 43 | # for i in np.arange(6, 12): 44 | for jx in range(6): 45 | i = i_order[jx] 46 | prec = np.array(a['bench_summary'][i]['prec']) 47 | rec = np.array(a['bench_summary'][i]['rec']) 48 | if i == 0: 49 | ax.plot(rec, prec, '-') 50 | legs.append('{:4.1f} {:s}'.format(100*a['bench_summary'][i]['ap'], a['eval_params']['ap_str'][i])) 51 | else: 52 | ax.plot(rec, prec, '--') 53 | legs.append('{:4.1f} {:s}'.format(100*a['bench_summary'][i]['ap'], a['eval_params']['ap_str'][i])) 54 | ax.set_xlim([0, 1]); ax.set_ylim([0, 1]); 55 | ax.set_xlabel('Recall', fontsize=20) 56 | ax.set_ylabel('Precision', fontsize=20) 57 | ax.set_title('Precision Recall Plots on {:s} Set'.format(imset), fontsize=20) 58 | 59 | l = ax.legend(legs, fontsize=18, bbox_to_anchor=(0,0), loc='lower left', framealpha=0.5, frameon=True) 60 | 61 | ax.plot([0,1], [0,0], 'k-') 62 | ax.plot([0,0], [0,1], 'k-') 63 | plt.tick_params(axis='both', which='major', labelsize=20) 64 | extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) 65 | plt.savefig(plot_file, bbox_inches='tight') 66 | plt.close(fig) 67 | 68 | plot_file = os.path.join(dir_name, set_number, net_name, 'eval_set{}_0_frwd.pdf'.format(set_number)) 69 | print('Saving plot to {}'.format(osp.abspath(plot_file))) 70 | 71 | with plt.style.context('fivethirtyeight'): 72 | fig, axes = subplots(plt, (1,1), (7,7)) 73 | ax = axes 74 | legs = [] 75 | i_order = [6, 9, 7, 8, 10, 11] 76 | # for i in np.arange(6, 12): 77 | for jx in range(6): 78 | i = i_order[jx] 79 | prec = np.array(a['bench_summary'][i]['prec']) 80 | rec = np.array(a['bench_summary'][i]['rec']) 81 | if i == 6: 82 | ax.plot(rec, prec, '-') 83 | legs.append('{:4.1f} {:s}'.format(100*a['bench_summary'][i]['ap'], a['eval_params']['ap_str'][i])) 84 | else: 85 | ax.plot(rec, prec, '--') 86 | str_ = '+'+'+'.join(a['eval_params']['ap_str'][i].split('+')[1:]) 87 | legs.append('{:4.1f} {:s}'.format(100*a['bench_summary'][i]['ap'], str_)) 88 | ax.set_xlim([0, 1]); ax.set_ylim([0, 1]); 89 | ax.set_xlabel('Recall', fontsize=20) 90 | ax.set_ylabel('Precision', fontsize=20) 91 | ax.set_title('Precision Recall Plots on {:s} Set'.format(imset), fontsize=20) 92 | 93 | l = ax.legend(legs, fontsize=18, bbox_to_anchor=(0,0), loc='lower left', framealpha=0.5, frameon=True) 94 | ax.plot([0,1], [0,0], 'k-') 95 | ax.plot([0,0], [0,1], 'k-') 96 | plt.tick_params(axis='both', which='major', labelsize=20) 97 | extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) 98 | plt.savefig(plot_file, bbox_inches='tight') 99 | plt.close(fig) 100 | 101 | if __name__ == '__main__': 102 | pr_plots(net_name, 0, eval_set) 103 | -------------------------------------------------------------------------------- /benchmark/suncg/sc_plots.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import matplotlib 6 | matplotlib.use('Agg') 7 | import seaborn as sns 8 | import matplotlib.pyplot as plt 9 | 10 | import numpy as np 11 | import json 12 | import os 13 | import os.path as osp 14 | import scipy.io 15 | 16 | eval_set = 'val' 17 | netName = 'dwr_shape_ft' 18 | 19 | curr_path = osp.dirname(osp.abspath(__file__)) 20 | cache_path = osp.join(curr_path, '..', '..', 'cachedir') 21 | plots_dir = os.path.join(cache_path, 'evaluation', 'icp', eval_set, 'plots') 22 | 23 | if not os.path.exists(plots_dir): 24 | os.makedirs(plots_dir) 25 | 26 | def subplots(plt, Y_X, sz_y_sz_x=(10,10)): 27 | Y,X = Y_X 28 | sz_y,sz_x = sz_y_sz_x 29 | plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y) 30 | fig, axes = plt.subplots(Y, X) 31 | plt.subplots_adjust(wspace=0.1, hspace=0.1) 32 | return fig, axes 33 | 34 | def toNpArray(matVar): 35 | out = np.zeros(len(matVar)) 36 | for i in range(len(matVar)): 37 | out[i] = matVar[i][0] 38 | return out 39 | 40 | def plotExperiment(expName, errors, representationNames, xLeg, varName, maxRange=1): 41 | with plt.style.context('fivethirtyeight'): 42 | fig, axes = subplots(plt, (1,1), (6,6)) 43 | ax = axes 44 | 45 | legs = [] 46 | for i in range(len(representationNames)): 47 | repName = representationNames[i] 48 | perf = np.sort(errors[varName][i, :]) 49 | perf = perf[~np.isnan(perf)] 50 | perf = perf[perf < 1e6] 51 | medVal = np.median(perf) 52 | percentile = np.linspace(0,1,np.size(perf,0)) 53 | ax.plot(percentile, perf, '-') 54 | legs.append('{:s}'.format(repName)) 55 | ax.set_ylim([0, maxRange]); ax.set_xlim([0, 1]); 56 | ax.set_ylabel(xLeg, fontsize=20) 57 | ax.set_xlabel('Fraction of Data', fontsize=20) 58 | ax.set_title(expName, fontsize=20) 59 | 60 | l = ax.legend(legs, title="Scene Representations:", fontsize=14, bbox_to_anchor=(0,1), loc='upper left', framealpha=0.5, frameon=True) 61 | 62 | ax.plot([0,0], [0,maxRange], 'k-') 63 | ax.plot([0,1], [0,0], 'k-') 64 | plt.tick_params(axis='both', which='major', labelsize=20) 65 | extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) 66 | plot_file = os.path.join(plots_dir, varName + '.pdf') 67 | plt.savefig(plot_file, bbox_inches='tight') 68 | plt.close(fig) 69 | 70 | resultsDir = os.path.join(cache_path, 'evaluation', 'icp', eval_set, netName) 71 | matFile = os.path.join(resultsDir, 'results.mat') 72 | results = scipy.io.loadmat(matFile) 73 | 74 | ######################################## 75 | ############## Objects ############### 76 | expName = 'Object Representation Ability' 77 | representationNames = ['Factored (ours)', 'Depth', 'Voxels'] 78 | xLeg = 'Scale-normalized Mean Squared Error' 79 | varName = 'object_eval' 80 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=1e-2) 81 | 82 | ######################################## 83 | ############### Depth ################ 84 | expName = 'Depth Representation Ability' 85 | representationNames = ['Factored (ours)', 'Depth', 'Voxels'] 86 | xLeg = 'Mean Squared Error (in $m^2$)' 87 | varName = 'depth_eval' 88 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=8e-1) 89 | 90 | ######################################## 91 | ############## Voxels ################ 92 | expName = 'Volume Representation Ability' 93 | representationNames = ['Factored (ours)', 'Depth', 'Voxels'] 94 | xLeg = 'IoU (Higher is better)' 95 | varName = 'volume_overlap_eval' 96 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=1) 97 | 98 | ######################################## 99 | ########## Visible Layout ############ 100 | expName = 'Visible Layout Representation Ability' 101 | representationNames = ['Factored (ours)', 'Depth', 'Voxels'] 102 | xLeg = 'Mean Squared Error (in $m^2$)' 103 | varName = 'layout_eval' 104 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=5e-1) 105 | 106 | ######################################## 107 | ########## Amodal Layout ############# 108 | expName = 'Amodal Layout Representation Ability' 109 | representationNames = ['Factored (ours)', 'Depth', 'Voxels'] 110 | xLeg = 'Mean Squared Error (in $m^2$)' 111 | varName = 'layout_amodal_eval' 112 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=8e-1) 113 | 114 | print('Plots saved in {}'.format(osp.abspath(plots_dir))) -------------------------------------------------------------------------------- /experiments/suncg/layout.py: -------------------------------------------------------------------------------- 1 | """Script for layout prediction predictor experiment. 2 | """ 3 | # example usage (depth baseline) : python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=depth_baseline --display_freq=2000 --suncg_dl_out_layout=false --suncg_dl_out_depth=true --display_id=20 4 | 5 | # example usage (layout prediction) : python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=layout_pred --display_freq=2000 --suncg_dl_out_layout=true --suncg_dl_out_depth=false --display_id=40 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | from absl import app 11 | from absl import flags 12 | import os 13 | import os.path as osp 14 | import numpy as np 15 | import torch 16 | from torch.autograd import Variable 17 | import time 18 | import pdb 19 | 20 | from ...data import suncg as suncg_data 21 | from ...utils import suncg_parse 22 | from ...nnutils import train_utils 23 | from ...nnutils import disp_net 24 | from ...utils import visutil 25 | from ...renderer import utils as render_utils 26 | 27 | FLAGS = flags.FLAGS 28 | 29 | class LayoutTrainer(train_utils.Trainer): 30 | def define_model(self): 31 | self.model = disp_net.dispnet().cuda(device_id=self.opts.gpu_id) 32 | if self.opts.num_pretrain_epochs > 0: 33 | self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs-1) 34 | return 35 | 36 | def init_dataset(self): 37 | opts = self.opts 38 | split_dir = osp.join(opts.suncg_dir, 'splits') 39 | self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera'))) 40 | self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts) 41 | 42 | def define_criterion(self): 43 | self.criterion = torch.nn.L1Loss().cuda(device_id=self.opts.gpu_id) 44 | 45 | def set_input(self, batch): 46 | opts = self.opts 47 | img_tensor = batch['img'].type(torch.FloatTensor) 48 | 49 | # batch_size=1 messes with batch norm 50 | self.invalid_batch = (img_tensor.size(0) == 1) 51 | 52 | if self.invalid_batch: 53 | return 54 | else: 55 | self.input_imgs = Variable( 56 | img_tensor.cuda(device=self.opts.gpu_id), requires_grad=False) 57 | 58 | if opts.suncg_dl_out_layout: 59 | trg_tensor = batch['layout'] 60 | else: 61 | assert(opts.suncg_dl_out_depth) 62 | trg_tensor = batch['depth'] 63 | 64 | self.trg_layout = Variable( 65 | trg_tensor.type(torch.FloatTensor).cuda(device=self.opts.gpu_id), requires_grad=False) 66 | 67 | def forward(self): 68 | self.pred_layout = self.model.forward(self.input_imgs) 69 | self.total_loss = self.criterion.forward(self.pred_layout, self.trg_layout) 70 | 71 | def get_current_points(self): 72 | pts_dict = {} 73 | #for b in range(self.opts.batch_size): 74 | for b in range(1): 75 | dmap_gt = self.trg_layout.data[b].cpu().numpy().transpose((1,2,0)) 76 | dmap_pred = self.pred_layout.data[b].cpu().numpy().transpose((1,2,0)) 77 | keys = ['gt_layout_' + str(b), 'pred_layout_' + str(b)] 78 | dmaps = [dmap_gt, dmap_pred] 79 | min_disp = 1e-2 80 | for kx in range(2): 81 | dmap_points = render_utils.dispmap_to_points( 82 | dmaps[kx], 83 | suncg_parse.cam_intrinsic(), 84 | scale_x=self.opts.layout_width/640, 85 | scale_y=self.opts.layout_height/480, 86 | min_disp = min_disp 87 | ) 88 | pts_dict[keys[kx]] = dmap_points 89 | if kx == 0: 90 | min_disp = 0.8/np.max(dmap_points[:, 2]) 91 | 92 | return pts_dict 93 | 94 | def get_current_visuals(self): 95 | return { 96 | 'img':visutil.tensor2im(self.input_imgs.data), 97 | 'gt_layout':visutil.tensor2im(self.trg_layout.data), 98 | 'pred_layout':visutil.tensor2im(self.pred_layout.data) 99 | } 100 | 101 | def get_current_scalars(self): 102 | return {'total_loss': self.smoothed_total_loss, 'total_loss_repeat': self.smoothed_total_loss} 103 | 104 | def main(_): 105 | FLAGS.suncg_dl_out_codes = False 106 | FLAGS.suncg_dl_out_fine_img = False 107 | FLAGS.suncg_dl_out_voxels = False 108 | torch.manual_seed(0) 109 | trainer = LayoutTrainer(FLAGS) 110 | trainer.init_training() 111 | trainer.train() 112 | 113 | if __name__ == '__main__': 114 | app.run(main) -------------------------------------------------------------------------------- /benchmark/suncg/evaluate_detection.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------- 2 | # Copyright (c) 2015, Saurabh Gupta 3 | # 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # --------------------------------------------------------- 6 | from ...utils import bbox_utils 7 | import numpy as np 8 | 9 | def inst_bench_image(dt, gt, bOpts, overlap = None): 10 | 11 | nDt = len(dt['sc']) 12 | nGt = len(gt['diff']) 13 | numInst = np.sum(gt['diff'] == False) 14 | 15 | if overlap is None: 16 | overlap = bbox_utils.bbox_overlaps(dt['boxInfo'].astype(np.float), gt['boxInfo'].astype(np.float)) 17 | # assert(issorted(-dt.sc), 'Scores are not sorted.\n'); 18 | sc = dt['sc']; 19 | 20 | det = np.zeros((nGt,1)).astype(np.bool) 21 | tp = np.zeros((nDt,1)).astype(np.bool) 22 | fp = np.zeros((nDt,1)).astype(np.bool) 23 | dupDet = np.zeros((nDt,1)).astype(np.bool) 24 | instId = np.zeros((nDt,1)).astype(np.int32) 25 | ov = np.zeros((nDt,1)).astype(np.float32) 26 | 27 | # Walk through the detections in decreasing score 28 | # and assign tp, fp, fn, tn labels 29 | for i in xrange(nDt): 30 | # assign detection to ground truth object if any 31 | if nGt > 0: 32 | maxOverlap = overlap[i,:].max(); maxInd = overlap[i,:].argmax(); 33 | instId[i] = maxInd; ov[i] = maxOverlap; 34 | else: 35 | maxOverlap = 0; instId[i] = -1; maxInd = -1; 36 | # assign detection as true positive/don't care/false positive 37 | if maxOverlap >= bOpts['minoverlap']: 38 | if gt['diff'][maxInd] == False: 39 | if det[maxInd] == False: 40 | # true positive 41 | tp[i] = True; 42 | det[maxInd] = True; 43 | else: 44 | # false positive (multiple detection) 45 | fp[i] = True; 46 | dupDet[i] = True; 47 | else: 48 | # false positive 49 | fp[i] = True; 50 | return tp, fp, sc, numInst, dupDet, instId, ov 51 | 52 | 53 | def inst_bench(dt, gt, bOpts, tp=None, fp=None, score=None, numInst=None): 54 | """ 55 | ap, rec, prec, npos, details = inst_bench(dt, gt, bOpts, tp = None, fp = None, sc = None, numInst = None) 56 | dt - a list with a dict for each image and with following fields 57 | .boxInfo - info that will be used to cpmpute the overlap with ground truths, a list 58 | .sc - score 59 | gt 60 | .boxInfo - info used to compute the overlap, a list 61 | .diff - a logical array of size nGtx1, saying if the instance is hard or not 62 | bOpt 63 | .minoverlap - the minimum overlap to call it a true positive 64 | [tp], [fp], [sc], [numInst] 65 | Optional arguments, in case the inst_bench_image is being called outside of this function 66 | """ 67 | details = None 68 | if tp is None: 69 | # We do not have the tp, fp, sc, and numInst, so compute them from the structures gt, and out 70 | tp = []; fp = []; numInst = []; score = []; dupDet = []; instId = []; ov = []; 71 | for i in range(len(gt)): 72 | # Sort dt by the score 73 | sc = dt[i]['sc'] 74 | bb = dt[i]['boxInfo'] 75 | ind = np.argsort(sc, axis = 0); 76 | ind = ind[::-1] 77 | if len(ind) > 0: 78 | sc = np.vstack((sc[i,:] for i in ind)) 79 | bb = np.vstack((bb[i,:] for i in ind)) 80 | else: 81 | sc = np.zeros((0,1)).astype(np.float) 82 | bb = np.zeros((0,4)).astype(np.float) 83 | 84 | dtI = dict({'boxInfo': bb, 'sc': sc}) 85 | tp_i, fp_i, sc_i, numInst_i, dupDet_i, instId_i, ov_i = inst_bench_image(dtI, gt[i], bOpts) 86 | tp.append(tp_i); fp.append(fp_i); score.append(sc_i); numInst.append(numInst_i); 87 | dupDet.append(dupDet_i); instId.append(instId_i); ov.append(ov_i); 88 | details = {'tp': list(tp), 'fp': list(fp), 'score': list(score), 'dupDet': list(dupDet), 89 | 'numInst': list(numInst), 'instId': list(instId), 'ov': list(ov)} 90 | 91 | tp = np.vstack(tp[:]) 92 | fp = np.vstack(fp[:]) 93 | sc = np.vstack(score[:]) 94 | 95 | cat_all = np.hstack((tp,fp,sc)) 96 | ind = np.argsort(cat_all[:,2]) 97 | cat_all = cat_all[ind[::-1],:] 98 | tp = np.cumsum(cat_all[:,0], axis = 0); 99 | fp = np.cumsum(cat_all[:,1], axis = 0); 100 | thresh = cat_all[:,2]; 101 | npos = np.sum(numInst, axis = 0); 102 | 103 | # Compute precision/recall 104 | rec = tp / npos; 105 | prec = np.divide(tp, (fp+tp)); 106 | ap = VOCap(rec, prec); 107 | return ap, rec, prec, npos, details 108 | 109 | def VOCap(rec, prec): 110 | rec = rec.reshape(rec.size,1); prec = prec.reshape(prec.size,1) 111 | z = np.zeros((1,1)); o = np.ones((1,1)); 112 | mrec = np.vstack((z, rec, o)) 113 | mpre = np.vstack((z, prec, z)) 114 | for i in range(len(mpre)-2, -1, -1): 115 | mpre[i] = max(mpre[i], mpre[i+1]) 116 | 117 | I = np.where(mrec[1:] != mrec[0:-1])[0]+1; 118 | ap = 0; 119 | for i in I: 120 | ap = ap + (mrec[i] - mrec[i-1])*mpre[i]; 121 | return ap 122 | -------------------------------------------------------------------------------- /experiments/suncg/voxels.py: -------------------------------------------------------------------------------- 1 | """Script for scene level voxels prediction experiment. 2 | """ 3 | # example usage : python -m factored3d.experiments.suncg.voxels --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=voxels_baseline --display_freq=2000 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | from absl import app 8 | from absl import flags 9 | import os 10 | import os.path as osp 11 | import numpy as np 12 | import scipy.misc 13 | import torch 14 | from torch.autograd import Variable 15 | import time 16 | import pdb 17 | 18 | from ...data import suncg as suncg_data 19 | from ...nnutils import train_utils 20 | from ...nnutils import voxel_net 21 | from ...utils import visutil 22 | from ...utils import suncg_parse 23 | from ...renderer import utils as render_utils 24 | 25 | curr_path = osp.dirname(osp.abspath(__file__)) 26 | cache_path = osp.join(curr_path, '..', '..', 'cachedir') 27 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved') 28 | 29 | FLAGS = flags.FLAGS 30 | 31 | class VoxelTrainer(train_utils.Trainer): 32 | def define_model(self): 33 | opts = self.opts 34 | self.model = voxel_net.VoxelNet( 35 | [opts.img_height, opts.img_width], 36 | [opts.voxels_width, opts.voxels_height, opts.voxels_depth], 37 | nz_voxels=opts.nz_voxels, 38 | n_voxels_upconv=opts.n_voxels_upconv 39 | ) 40 | if self.opts.num_pretrain_epochs > 0: 41 | self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs) 42 | self.model = self.model.cuda(device_id=self.opts.gpu_id) 43 | return 44 | 45 | def init_dataset(self): 46 | opts = self.opts 47 | split_dir = osp.join(opts.suncg_dir, 'splits') 48 | self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera'))) 49 | self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts) 50 | 51 | def define_criterion(self): 52 | self.criterion = torch.nn.BCEWithLogitsLoss().cuda(device_id=self.opts.gpu_id) 53 | 54 | def set_input(self, batch): 55 | opts = self.opts 56 | img_tensor = batch['img'].type(torch.FloatTensor) 57 | 58 | # batch_size=1 messes with batch norm 59 | self.invalid_batch = (img_tensor.size(0) == 1) 60 | 61 | if self.invalid_batch: 62 | return 63 | else: 64 | self.input_imgs = Variable( 65 | img_tensor.cuda(device=self.opts.gpu_id), requires_grad=False) 66 | 67 | trg_tensor = batch['voxels'].unsqueeze(1) 68 | self.trg_voxels = Variable( 69 | trg_tensor.type(torch.FloatTensor).cuda(device=self.opts.gpu_id), requires_grad=False) 70 | 71 | def forward(self): 72 | self.pred_voxels = self.model.forward(self.input_imgs) 73 | self.total_loss = self.criterion.forward(self.pred_voxels, self.trg_voxels) 74 | 75 | def render_voxels(self, voxels, prefix='mesh'): 76 | opts = self.opts 77 | voxels = voxels.data.cpu()[0,0].numpy() 78 | 79 | mesh_dir = osp.join(opts.rendering_dir, opts.name) 80 | if not os.path.exists(mesh_dir): 81 | os.makedirs(mesh_dir) 82 | 83 | mesh_file = osp.join(mesh_dir, prefix + '.obj') 84 | vs, fs = render_utils.voxels_to_mesh(voxels.astype(np.float32)) 85 | vs[:,0] -= voxels.shape[0]/2.0 86 | vs[:,1] -= voxels.shape[1]/2.0 87 | vs *= 0.04*(64//opts.voxels_height) 88 | fout = open(mesh_file, 'w') 89 | render_utils.append_obj(fout, vs, fs) 90 | fout.close() 91 | 92 | png_dir = mesh_file.replace('.obj', '/') 93 | render_utils.render_mesh(mesh_file, png_dir) 94 | 95 | return scipy.misc.imread(osp.join(png_dir, prefix + '_render_000.png')) 96 | 97 | def get_current_visuals(self): 98 | visuals = { 99 | 'img':visutil.tensor2im(self.input_imgs.data) 100 | } 101 | visuals['voxels_gt'] = self.render_voxels(self.trg_voxels, prefix='gt') 102 | visuals['voxels_pred'] = self.render_voxels( 103 | torch.nn.functional.sigmoid(self.pred_voxels), prefix='pred') 104 | return visuals 105 | 106 | def get_current_points(self): 107 | return {} 108 | 109 | def get_current_scalars(self): 110 | return {'total_loss': self.smoothed_total_loss, 'total_loss_repeat': self.smoothed_total_loss} 111 | 112 | def main(_): 113 | FLAGS.suncg_dl_out_codes = False 114 | FLAGS.suncg_dl_out_fine_img = False 115 | FLAGS.suncg_dl_out_voxels = True 116 | FLAGS.suncg_dl_out_layout = False 117 | FLAGS.suncg_dl_out_depth = False 118 | torch.manual_seed(0) 119 | trainer = VoxelTrainer(FLAGS) 120 | trainer.init_training() 121 | trainer.train() 122 | 123 | if __name__ == '__main__': 124 | app.run() -------------------------------------------------------------------------------- /nnutils/disp_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Inverse depth prediction net. 3 | Code based on https://github.com/ClementPinard/dispNetPytorch/ 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | from . import net_blocks as nb 9 | 10 | def predict_disp(in_planes): 11 | return nn.Conv2d(in_planes,1,kernel_size=3,stride=1,padding=1,bias=False) 12 | 13 | class DispNet(nn.Module): 14 | expansion = 1 15 | 16 | def __init__(self, batch_norm=True): 17 | super(DispNet, self).__init__() 18 | 19 | self.batch_norm = batch_norm 20 | self.conv1 = nb.conv2d(self.batch_norm, 3, 64, kernel_size=7, stride=2) 21 | self.conv2 = nb.conv2d(self.batch_norm, 64, 128, kernel_size=5, stride=2) 22 | self.conv3 = nb.conv2d(self.batch_norm, 128, 256, kernel_size=5, stride=2) 23 | self.conv3_1 = nb.conv2d(self.batch_norm, 256, 256) 24 | self.conv4 = nb.conv2d(self.batch_norm, 256, 512, stride=2) 25 | self.conv4_1 = nb.conv2d(self.batch_norm, 512, 512) 26 | self.conv5 = nb.conv2d(self.batch_norm, 512, 512, stride=2) 27 | self.conv5_1 = nb.conv2d(self.batch_norm, 512, 512) 28 | self.conv6 = nb.conv2d(self.batch_norm, 512, 1024, stride=2) 29 | self.conv6_1 = nb.conv2d(self.batch_norm,1024, 1024) 30 | 31 | self.deconv5 = nb.deconv2d(1024,512) 32 | self.deconv4 = nb.deconv2d(1025,256) 33 | self.deconv3 = nb.deconv2d(769,128) 34 | self.deconv2 = nb.deconv2d(385,64) 35 | self.deconv1 = nb.deconv2d(193,64) 36 | 37 | self.predict_disp6 = predict_disp(1024) 38 | self.predict_disp5 = predict_disp(1025) 39 | self.predict_disp4 = predict_disp(769) 40 | self.predict_disp3 = predict_disp(385) 41 | self.predict_disp2 = predict_disp(193) 42 | self.predict_disp1 = predict_disp(129) 43 | 44 | self.upsampled_disp6_to_5 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 45 | self.upsampled_disp5_to_4 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 46 | self.upsampled_disp4_to_3 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 47 | self.upsampled_disp3_to_2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 48 | self.upsampled_disp2_to_1 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 49 | 50 | for m in self.modules(): 51 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 52 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 53 | m.weight.data.normal_(0, 0.02 / n) #this modified initialization seems to work better, but it's very hacky 54 | if m.bias is not None: 55 | m.bias.data.zero_() 56 | elif isinstance(m, nn.BatchNorm2d): 57 | m.weight.data.fill_(1) 58 | m.bias.data.zero_() 59 | 60 | def forward(self, x): 61 | out_conv1 = self.conv1(x) 62 | out_conv2 = self.conv2(out_conv1) 63 | out_conv3 = self.conv3_1(self.conv3(out_conv2)) 64 | out_conv4 = self.conv4_1(self.conv4(out_conv3)) 65 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 66 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 67 | 68 | disp6 = self.predict_disp6(out_conv6) 69 | disp6_up = self.upsampled_disp6_to_5(disp6) 70 | out_deconv5 = self.deconv5(out_conv6) 71 | 72 | concat5 = torch.cat((out_conv5,out_deconv5,disp6_up),1) 73 | disp5 = self.predict_disp5(concat5) 74 | disp5_up = self.upsampled_disp5_to_4(disp5) 75 | out_deconv4 = self.deconv4(concat5) 76 | 77 | concat4 = torch.cat((out_conv4,out_deconv4,disp5_up),1) 78 | disp4 = self.predict_disp4(concat4) 79 | disp4_up = self.upsampled_disp4_to_3(disp4) 80 | out_deconv3 = self.deconv3(concat4) 81 | 82 | concat3 = torch.cat((out_conv3,out_deconv3,disp4_up),1) 83 | disp3 = self.predict_disp3(concat3) 84 | disp3_up = self.upsampled_disp3_to_2(disp3) 85 | out_deconv2 = self.deconv2(concat3) 86 | 87 | concat2 = torch.cat((out_conv2,out_deconv2,disp3_up),1) 88 | disp2 = self.predict_disp2(concat2) 89 | disp2_up = self.upsampled_disp2_to_1(disp2) 90 | out_deconv1 = self.deconv1(concat2) 91 | 92 | concat1 = torch.cat((out_conv1,out_deconv1,disp2_up),1) 93 | disp1 = self.predict_disp1(concat1) 94 | 95 | if self.training: 96 | #return disp1,disp2,disp3,disp4,disp5,disp6 97 | return disp1 98 | else: 99 | return disp1 100 | 101 | 102 | def dispnet(path=None, batch_norm=True): 103 | """dispNet model architecture. 104 | 105 | Args: 106 | path : where to load pretrained network. will create a new one if not set 107 | """ 108 | model = DispNet(batch_norm=batch_norm) 109 | if path is not None: 110 | data = torch.load(path) 111 | if 'state_dict' in data.keys(): 112 | model.load_state_dict(data['state_dict']) 113 | else: 114 | model.load_state_dict(data) 115 | return model -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/get_scene_vox.m: -------------------------------------------------------------------------------- 1 | function [sceneVox] = get_scene_vox(pathToData,sceneId,floorId,roomId,extCam2World,objcategory) 2 | % Notes: grid is Z up while the The loaded houses are Y up 3 | % Adapted from the sscnet codebase - https://github.com/shurans/sscnet 4 | 5 | volume_params; 6 | ignore_classes = {'people', 'plants'}; 7 | % Compute voxel range in cam coordinates 8 | voxOriginCam = - [voxSize(1)/2*voxUnit;voxSize(2)/2*voxUnit;0]; 9 | [gridPtsCamX,gridPtsCamY,gridPtsCamZ] = ndgrid(voxOriginCam(1):voxUnit:(voxOriginCam(1)+(voxSize(1)-1)*voxUnit), ... 10 | voxOriginCam(2):voxUnit:(voxOriginCam(2)+(voxSize(2)-1)*voxUnit), ... 11 | voxOriginCam(3):voxUnit:(voxOriginCam(3)+(voxSize(3)-1)*voxUnit)); 12 | gridPtsCam_init = [gridPtsCamX(:),gridPtsCamY(:),gridPtsCamZ(:)]'; %' 13 | 14 | % Compute voxel grid centres in world coordinates 15 | gridPtsWorld = bsxfun(@plus,extCam2World(1:3,1:3)*gridPtsCam_init, extCam2World(1:3,4)); 16 | gridPtsWorldX = gridPtsWorld(1,:); 17 | gridPtsWorldY = gridPtsWorld(2,:); 18 | gridPtsWorldZ = gridPtsWorld(3,:); 19 | gridPtsLabel = zeros(1,size(gridPtsWorld,2)); 20 | 21 | house = loadjson(fullfile(pathToData,'house', sceneId,'house.json')); 22 | roomStruct = house.levels{floorId}.nodes{roomId}; 23 | floorStruct = house.levels{floorId}; 24 | 25 | % find all grid in the room 26 | floorObj = read_wobj_safe([fullfile(pathToData,'room',sceneId,roomStruct.modelId) 'f.obj']); 27 | inRoom = zeros(size(gridPtsWorldX)); 28 | for i = 1:length(floorObj.objects(3).data.vertices) 29 | faceId = floorObj.objects(3).data.vertices(i,:); 30 | floorP = floorObj.vertices(faceId,[1,3])'; 31 | inRoom = inRoom|inpolygon(gridPtsWorldX,gridPtsWorldY,floorP(1,:),floorP(2,:)); %' 32 | end 33 | 34 | % find floor 35 | floorZ = mean(floorObj.vertices(:,2)); 36 | gridPtsObjWorldInd = inRoom(:)'&(abs(gridPtsWorld(3,:)-floorZ) <= voxUnit/2); %' 37 | [~,classRootId] = getobjclassSUNCG('floor',objcategory); 38 | gridPtsLabel(gridPtsObjWorldInd) = classRootId; 39 | 40 | % find ceiling 41 | ceilObj = read_wobj_safe([fullfile(pathToData,'room',sceneId,roomStruct.modelId) 'c.obj']); 42 | ceilZ = mean(ceilObj.vertices(:,2)); 43 | gridPtsObjWorldInd = inRoom(:)'&abs(gridPtsWorld(3,:)-ceilZ) <= voxUnit/2; %' 44 | [~,classRootId] = getobjclassSUNCG('ceiling',objcategory); 45 | gridPtsLabel(gridPtsObjWorldInd) = classRootId; 46 | 47 | % Load walls 48 | WallObj = read_wobj_safe([fullfile(pathToData,'room',sceneId,roomStruct.modelId) 'w.obj']); 49 | inWall = zeros(size(gridPtsWorldX)); 50 | for oi = 1:length(WallObj.objects) 51 | if WallObj.objects(oi).type == 'f' 52 | for i = 1:length(WallObj.objects(oi).data.vertices) 53 | faceId = WallObj.objects(oi).data.vertices(i,:); 54 | floorP = WallObj.vertices(faceId,[1,3])'; %' 55 | inWall = inWall|inpolygon(gridPtsWorldX,gridPtsWorldY,floorP(1,:),floorP(2,:)); 56 | end 57 | end 58 | end 59 | gridPtsObjWorldInd = inWall(:)'&(gridPtsWorld(3,:)floorZ+voxUnit/2); %' 60 | [~,classRootId] = getobjclassSUNCG('wall',objcategory); 61 | gridPtsLabel(gridPtsObjWorldInd) = classRootId; 62 | 63 | % Loop through each object and set voxels to class ID 64 | for objId = roomStruct.nodeIndices 65 | object_struct = floorStruct.nodes{objId+1}; 66 | if isfield(object_struct, 'modelId') && isfield(object_struct, 'valid') && (object_struct.valid) 67 | % Set segmentation class ID 68 | [classRootName,classRootId,className] = getobjclassSUNCG(strrep(object_struct.modelId,'/','__'),objcategory); 69 | if ismember(className, ignore_classes) 70 | continue 71 | end 72 | 73 | % Compute object bbox in world coordinates 74 | objBbox = [object_struct.bbox.min([1,3,2])',object_struct.bbox.max([1,3,2])']; 75 | 76 | % Load segmentation of object in object coordinates 77 | filename= fullfile(pathToData,'object_vox/object_vox_data/',strrep(object_struct.modelId,'/','__'), [strrep(object_struct.modelId,'/','__'), '.binvox']); 78 | [voxels,scale,translate] = read_binvox(filename); 79 | [x,y,z] = ind2sub(size(voxels),find(voxels(:)>0)); 80 | objSegPts = bsxfun(@plus,[x,y,z]*scale,translate'); %' 81 | 82 | % Convert object to world coordinates 83 | extObj2World_yup = reshape(object_struct.transform,[4,4]); 84 | objSegPts = extObj2World_yup*[objSegPts(:,[1,3,2])';ones(1,size(x,1))]; %' 85 | objSegPts = objSegPts([1,3,2],:); 86 | 87 | % Get all grid points within the object bbox in world coordinates 88 | gridPtsObjWorldInd = gridPtsWorld(1,:) >= objBbox(1,1) - voxUnit & gridPtsWorld(1,:) <= objBbox(1,2) + voxUnit & ... 89 | gridPtsWorld(2,:) >= objBbox(2,1) - voxUnit & gridPtsWorld(2,:) <= objBbox(2,2) + voxUnit & ... 90 | gridPtsWorld(3,:) >= objBbox(3,1) - voxUnit & gridPtsWorld(3,:) <= objBbox(3,2) + voxUnit; 91 | gridPtsObjWorld = gridPtsWorld(:,find(gridPtsObjWorldInd)); 92 | 93 | 94 | % If object is a window or door, clear voxels in object bbox 95 | [~,wallId] = getobjclassSUNCG('wall',objcategory); 96 | if classRootId == 4 || classRootId == 5 97 | gridPtsObjClearInd = gridPtsObjWorldInd&gridPtsLabel==wallId; 98 | gridPtsLabel(gridPtsObjClearInd) = 0; 99 | end 100 | 101 | % Apply segmentation to grid points of object 102 | if numel(gridPtsObjWorld) > 0 103 | [indices, dists] = multiQueryKNNSearchImpl(pointCloud(objSegPts'), gridPtsObjWorld',1); 104 | objOccInd = find(sqrt(dists) <= (sqrt(3)/2)*scale); 105 | gridPtsObjWorldLinearIdx = find(gridPtsObjWorldInd); 106 | gridPtsLabel(gridPtsObjWorldLinearIdx(objOccInd)) = classRootId; 107 | end 108 | end 109 | end 110 | 111 | % Remove grid points not in field of view 112 | extWorld2Cam = inv([extCam2World;[0,0,0,1]]); 113 | gridPtsCam = extWorld2Cam(1:3,1:3)*gridPtsWorld + repmat(extWorld2Cam(1:3,4),1,size(gridPtsWorld,2)); 114 | gridPtsPixX = gridPtsCam(1,:).*(camK(1,1))./gridPtsCam(3,:)+camK(1,3); 115 | gridPtsPixY = gridPtsCam(2,:).*(camK(2,2))./gridPtsCam(3,:)+camK(2,3); 116 | invalidPixInd = (gridPtsPixX < 0 | gridPtsPixX >= im_w | gridPtsPixY < 0 | gridPtsPixY >= im_h | gridPtsCam(3,:) < 0); 117 | gridPtsLabel(find(invalidPixInd)) = 0; 118 | 119 | % Remove grid points not in the room 120 | gridPtsLabel(~inRoom(:)&gridPtsLabel(:)==0) = 255; 121 | 122 | % Save the volume 123 | sceneVox = reshape(gridPtsLabel,voxSize'); %' 124 | 125 | end -------------------------------------------------------------------------------- /utils/visualizer.py: -------------------------------------------------------------------------------- 1 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 2 | import numpy as np 3 | import os 4 | import ntpath 5 | import time 6 | import visdom 7 | from . import visutil as util 8 | from . import html 9 | 10 | class Visualizer(): 11 | def __init__(self, opt): 12 | # self.opt = opt 13 | self.display_id = opt.display_id 14 | self.use_html = opt.is_train and opt.use_html 15 | self.win_size = opt.display_winsize 16 | self.name = opt.name 17 | if self.display_id > 0: 18 | self.vis = visdom.Visdom(port = opt.display_port) 19 | self.display_single_pane_ncols = opt.display_single_pane_ncols 20 | 21 | if self.use_html: 22 | self.web_dir = os.path.join(opt.checkpoint_dir, opt.name, 'web') 23 | self.img_dir = os.path.join(self.web_dir, 'images') 24 | print('create web directory %s...' % self.web_dir) 25 | util.mkdirs([self.web_dir, self.img_dir]) 26 | self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 27 | with open(self.log_name, "a") as log_file: 28 | now = time.strftime("%c") 29 | log_file.write('================ Training Loss (%s) ================\n' % now) 30 | 31 | # |visuals|: dictionary of images to display or save 32 | def display_current_results(self, visuals, epoch): 33 | if self.display_id > 0: # show images in the browser 34 | if self.display_single_pane_ncols > 0: 35 | h, w = next(iter(visuals.values())).shape[:2] 36 | table_css = """""" % (w, h) 40 | ncols = self.display_single_pane_ncols 41 | title = self.name 42 | label_html = '' 43 | label_html_row = '' 44 | nrows = int(np.ceil(len(visuals.items()) / ncols)) 45 | images = [] 46 | idx = 0 47 | # for label, image_numpy in visuals.items(): 48 | img_keys = visuals.keys() 49 | list.sort(img_keys) 50 | for label in img_keys: 51 | image_numpy = visuals[label] 52 | label_html_row += '%s' % label 53 | images.append(image_numpy.transpose([2, 0, 1])) 54 | idx += 1 55 | if idx % ncols == 0: 56 | label_html += '%s' % label_html_row 57 | label_html_row = '' 58 | white_image = np.ones_like(image_numpy.transpose([2, 0, 1]))*255 59 | while idx % ncols != 0: 60 | images.append(white_image) 61 | label_html_row += '' 62 | idx += 1 63 | if label_html_row != '': 64 | label_html += '%s' % label_html_row 65 | # pane col = image row 66 | self.vis.images(images, nrow=ncols, win=self.display_id + 1, 67 | padding=2, opts=dict(title=title + ' images')) 68 | label_html = '%s
' % label_html 69 | self.vis.text(table_css + label_html, win = self.display_id + 2, 70 | opts=dict(title=title + ' labels')) 71 | else: 72 | idx = 1 73 | for label, image_numpy in visuals.items(): 74 | self.vis.image( 75 | image_numpy.transpose([2,0,1]), opts=dict(title=label), 76 | win=self.display_id + idx) 77 | idx += 1 78 | 79 | if self.use_html: # save images to a html file 80 | for label, image_numpy in visuals.items(): 81 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label)) 82 | util.save_image(image_numpy, img_path) 83 | # update website 84 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1) 85 | for n in range(epoch, 0, -1): 86 | webpage.add_header('epoch [%d]' % n) 87 | ims = [] 88 | txts = [] 89 | links = [] 90 | 91 | for label, image_numpy in visuals.items(): 92 | img_path = 'epoch%.3d_%s.png' % (n, label) 93 | ims.append(img_path) 94 | txts.append(label) 95 | links.append(img_path) 96 | webpage.add_images(ims, txts, links, width=self.win_size) 97 | webpage.save() 98 | 99 | # scalars: dictionary of scalar labels and values 100 | def plot_current_scalars(self, epoch, counter_ratio, opt, scalars): 101 | if not hasattr(self, 'plot_data'): 102 | self.plot_data = {'X':[],'Y':[], 'legend':list(scalars.keys())} 103 | self.plot_data['X'].append(epoch + counter_ratio) 104 | self.plot_data['Y'].append([scalars[k] for k in self.plot_data['legend']]) 105 | self.vis.line( 106 | X=np.stack([np.array(self.plot_data['X'])]*len(self.plot_data['legend']),1), 107 | Y=np.array(self.plot_data['Y']), 108 | opts={ 109 | 'title': self.name + ' loss over time', 110 | 'legend': self.plot_data['legend'], 111 | 'xlabel': 'epoch', 112 | 'ylabel': 'loss'}, 113 | win=self.display_id) 114 | 115 | # scatter plots 116 | def plot_current_points(self, points, disp_offset=10): 117 | idx = disp_offset 118 | for label, pts in points.items(): 119 | #image_numpy = np.flipud(image_numpy) 120 | self.vis.scatter( 121 | pts, opts=dict(title=label, markersize=1), win=self.display_id + idx) 122 | idx += 1 123 | 124 | # scalars: same format as |scalars| of plot_current_scalars 125 | def print_current_scalars(self, epoch, i, scalars): 126 | message = '(epoch: %d, iters: %d) ' % (epoch, i) 127 | for k, v in scalars.items(): 128 | message += '%s: %.3f ' % (k, v) 129 | 130 | print(message) 131 | with open(self.log_name, "a") as log_file: 132 | log_file.write('%s\n' % message) 133 | 134 | # save image to the disk 135 | def save_images(self, webpage, visuals, image_path): 136 | image_dir = webpage.get_image_dir() 137 | short_path = ntpath.basename(image_path[0]) 138 | name = os.path.splitext(short_path)[0] 139 | 140 | webpage.add_header(name) 141 | ims = [] 142 | txts = [] 143 | links = [] 144 | 145 | for label, image_numpy in visuals.items(): 146 | image_name = '%s_%s.png' % (name, label) 147 | save_path = os.path.join(image_dir, image_name) 148 | util.save_image(image_numpy, save_path) 149 | 150 | ims.append(image_name) 151 | txts.append(label) 152 | links.append(image_name) 153 | webpage.add_images(ims, txts, links, width=self.win_size) 154 | -------------------------------------------------------------------------------- /nnutils/net_blocks.py: -------------------------------------------------------------------------------- 1 | ''' 2 | CNN building blocks. 3 | ''' 4 | from __future__ import division 5 | from __future__ import print_function 6 | import torch 7 | import torch.nn as nn 8 | import math 9 | 10 | class Flatten(nn.Module): 11 | def forward(self, x): 12 | return x.view(x.size()[0], -1) 13 | 14 | class Unsqueeze(nn.Module): 15 | def __init__(self, dim): 16 | super(Unsqueeze, self).__init__() 17 | self.dim = dim 18 | 19 | def forward(self, x): 20 | return x.unsqueeze(self.dim) 21 | 22 | ## fc layers 23 | def fc(batch_norm, nc_inp, nc_out): 24 | if batch_norm: 25 | return nn.Sequential( 26 | nn.Linear(nc_inp, nc_out, bias=True), 27 | nn.BatchNorm1d(nc_out), 28 | nn.LeakyReLU(0.2,inplace=True) 29 | ) 30 | else: 31 | return nn.Sequential( 32 | nn.Linear(nc_inp, nc_out), 33 | nn.LeakyReLU(0.1,inplace=True) 34 | ) 35 | 36 | def fc_stack(nc_inp, nc_out, nlayers, use_bn=True): 37 | modules = [] 38 | for l in range(nlayers): 39 | modules.append(fc(use_bn, nc_inp, nc_out)) 40 | nc_inp = nc_out 41 | encoder = nn.Sequential(*modules) 42 | net_init(encoder) 43 | return encoder 44 | 45 | ## 2D convolution layers 46 | def conv2d(batch_norm, in_planes, out_planes, kernel_size=3, stride=1): 47 | if batch_norm: 48 | return nn.Sequential( 49 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 50 | nn.BatchNorm2d(out_planes), 51 | nn.LeakyReLU(0.2,inplace=True) 52 | ) 53 | else: 54 | return nn.Sequential( 55 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 56 | nn.LeakyReLU(0.2,inplace=True) 57 | ) 58 | 59 | 60 | def deconv2d(in_planes, out_planes): 61 | return nn.Sequential( 62 | nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True), 63 | nn.LeakyReLU(0.2,inplace=True) 64 | ) 65 | 66 | ## 3D convolution layers 67 | def conv3d(batch_norm, in_planes, out_planes, kernel_size=3, stride=1): 68 | if batch_norm: 69 | return nn.Sequential( 70 | nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 71 | nn.BatchNorm3d(out_planes), 72 | nn.LeakyReLU(0.2,inplace=True) 73 | ) 74 | else: 75 | return nn.Sequential( 76 | nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 77 | nn.LeakyReLU(0.2,inplace=True) 78 | ) 79 | 80 | 81 | def deconv3d(batch_norm, in_planes, out_planes): 82 | if batch_norm: 83 | return nn.Sequential( 84 | nn.ConvTranspose3d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True), 85 | nn.BatchNorm3d(out_planes), 86 | nn.LeakyReLU(0.2,inplace=True) 87 | ) 88 | else: 89 | return nn.Sequential( 90 | nn.ConvTranspose3d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True), 91 | nn.LeakyReLU(0.2,inplace=True) 92 | ) 93 | 94 | 95 | ## 3D Network Modules 96 | def encoder3d(nlayers, use_bn=True, nc_input=1, nc_max=128, nc_l1=8, nc_step=1, nz_shape=20): 97 | ''' Simple 3D encoder with nlayers. 98 | 99 | Args: 100 | nlayers: number of encoder layers 101 | use_bn: whether to use batch_norm 102 | nc_input: number of input channels 103 | nc_max: number of max channels 104 | nc_l1: number of channels in layer 1 105 | nc_step: double number of channels every nc_step layers 106 | nz_shape: size of bottleneck layer 107 | ''' 108 | modules = [] 109 | nc_output = nc_l1 110 | for nl in range(nlayers): 111 | if (nl>=1) and (nl%nc_step==0) and (nc_output <= nc_max*2): 112 | nc_output *= 2 113 | 114 | modules.append(conv3d(use_bn, nc_input, nc_output, stride=1)) 115 | nc_input = nc_output 116 | modules.append(conv3d(use_bn, nc_input, nc_output, stride=1)) 117 | modules.append(torch.nn.MaxPool3d(kernel_size=2, stride=2)) 118 | 119 | modules.append(Flatten()) 120 | modules.append(fc_stack(nc_output, nz_shape, 2, use_bn=True)) 121 | encoder = nn.Sequential(*modules) 122 | net_init(encoder) 123 | return encoder, nc_output 124 | 125 | 126 | def decoder3d(nlayers, nz_shape, nc_input, use_bn=True, nc_final=1, nc_min=8, nc_step=1, init_fc=True): 127 | ''' Simple 3D encoder with nlayers. 128 | 129 | Args: 130 | nlayers: number of decoder layers 131 | nz_shape: number of bottleneck 132 | nc_input: number of channels to start upconvolution from 133 | use_bn: whether to use batch_norm 134 | nc_final: number of output channels 135 | nc_min: number of min channels 136 | nc_step: double number of channels every nc_step layers 137 | init_fc: initial features are not spatial, use an fc & unsqueezing to make them 3D 138 | ''' 139 | modules = [] 140 | if init_fc: 141 | modules.append(fc(use_bn, nz_shape, nc_input)) 142 | for d in range(3): 143 | modules.append(Unsqueeze(2)) 144 | nc_output = nc_input 145 | for nl in range(nlayers): 146 | if (nl%nc_step==0) and (nc_output//2 >= nc_min): 147 | nc_output = nc_output//2 148 | 149 | modules.append(deconv3d(use_bn, nc_input, nc_output)) 150 | nc_input = nc_output 151 | modules.append(conv3d(use_bn, nc_input, nc_output)) 152 | 153 | modules.append(nn.Conv3d(nc_output, nc_final, kernel_size=3, stride=1, padding=1, bias=True)) 154 | decoder = nn.Sequential(*modules) 155 | net_init(decoder) 156 | return decoder 157 | 158 | 159 | def net_init(net): 160 | for m in net.modules(): 161 | if isinstance(m, nn.Linear): 162 | #n = m.out_features 163 | #m.weight.data.normal_(0, 0.02 / n) #this modified initialization seems to work better, but it's very hacky 164 | #n = m.in_features 165 | #m.weight.data.normal_(0, math.sqrt(2. / n)) #xavier 166 | m.weight.data.normal_(0, 0.02) 167 | if m.bias is not None: 168 | m.bias.data.zero_() 169 | 170 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 171 | #n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels 172 | #m.weight.data.normal_(0, math.sqrt(2. / n)) #this modified initialization seems to work better, but it's very hacky 173 | m.weight.data.normal_(0, 0.02) 174 | if m.bias is not None: 175 | m.bias.data.zero_() 176 | 177 | if isinstance(m, nn.Conv3d) or isinstance(m, nn.ConvTranspose3d): 178 | #n = m.kernel_size[0] * m.kernel_size[1] * m.kernel_size[2] * m.in_channels 179 | #m.weight.data.normal_(0, math.sqrt(2. / n)) 180 | m.weight.data.normal_(0, 0.02) 181 | if m.bias is not None: 182 | m.bias.data.zero_() 183 | 184 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm3d): 185 | m.weight.data.fill_(1) 186 | m.bias.data.zero_() 187 | -------------------------------------------------------------------------------- /nnutils/train_utils.py: -------------------------------------------------------------------------------- 1 | """Generic Training Utils. 2 | """ 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | import torch 8 | import os 9 | import os.path as osp 10 | import time 11 | import pdb 12 | from absl import flags 13 | 14 | from ..utils.visualizer import Visualizer 15 | 16 | #-------------- flags -------------# 17 | #----------------------------------# 18 | ## Flags for training 19 | curr_path = osp.dirname(osp.abspath(__file__)) 20 | cache_path = osp.join(curr_path, '..', 'cachedir') 21 | 22 | flags.DEFINE_string('name', 'exp_name', 'Experiment Name') 23 | flags.DEFINE_string('cache_dir', cache_path, 'Cachedir') 24 | flags.DEFINE_integer('gpu_id', 0, 'Which gpu to use') 25 | flags.DEFINE_integer('num_epochs', 10, 'Number of epochs to train') 26 | flags.DEFINE_integer('num_pretrain_epochs', 0, 'If >0, we will pretain from an existing saved model.') 27 | flags.DEFINE_float('learning_rate', 0.0001, 'learning rate') 28 | flags.DEFINE_float('beta1', 0.9, 'Momentum term of adam') 29 | 30 | flags.DEFINE_integer('batch_size', 4, 'Size of minibatches') 31 | flags.DEFINE_integer('num_iter', 0, 'Number of training iterations. 0 -> Use epoch_iter') 32 | flags.DEFINE_integer('n_data_workers', 4, 'Number of data loading workers') 33 | 34 | ## Flags for logging and snapshotting 35 | flags.DEFINE_string('checkpoint_dir', osp.join(cache_path, 'snapshots'), 36 | 'Root directory for output files') 37 | flags.DEFINE_integer('print_freq', 20, 'scalar logging frequency') 38 | flags.DEFINE_integer('save_latest_freq', 10000, 'save latest model every x iterations') 39 | flags.DEFINE_integer('save_epoch_freq', 2, 'save model every k epochs') 40 | 41 | ## Flags for visualization 42 | flags.DEFINE_integer('display_freq', 100, 'visuals logging frequency') 43 | flags.DEFINE_boolean('display_visuals', False, 'whether to display images') 44 | flags.DEFINE_boolean('print_scalars', True, 'whether to print scalars') 45 | flags.DEFINE_boolean('plot_scalars', False, 'whether to plot scalars') 46 | flags.DEFINE_boolean('is_train', True, 'Are we training ?') 47 | flags.DEFINE_boolean('use_html', False, 'Save html visualizations') 48 | flags.DEFINE_integer('display_id', 1, 'Display Id') 49 | flags.DEFINE_integer('display_winsize', 256, 'Display Size') 50 | flags.DEFINE_integer('display_port', 8097, 'Display port') 51 | flags.DEFINE_integer('display_single_pane_ncols', 0, 'if positive, display all images in a single visdom web panel with certain number of images per row.') 52 | 53 | #-------- tranining class ---------# 54 | #----------------------------------# 55 | class Trainer(): 56 | def __init__(self, opts): 57 | self.opts = opts 58 | self.gpu_id = opts.gpu_id 59 | self.Tensor = torch.cuda.FloatTensor if (self.gpu_id is not None) else torch.Tensor 60 | self.invalid_batch = False #the trainer can optionally reset this every iteration during set_input call 61 | self.save_dir = os.path.join(opts.checkpoint_dir, opts.name) 62 | if not os.path.exists(self.save_dir): 63 | os.makedirs(self.save_dir) 64 | log_file = os.path.join(self.save_dir, 'opts.log') 65 | with open(log_file, 'w') as f: 66 | for k in dir(opts): 67 | f.write('{}: {}\n'.format(k, opts.__getattr__(k))) 68 | 69 | 70 | # helper saving function that can be used by subclasses 71 | def save_network(self, network, network_label, epoch_label, gpu_id=None): 72 | save_filename = '{}_net_{}.pth'.format(network_label, epoch_label) 73 | save_path = os.path.join(self.save_dir, save_filename) 74 | torch.save(network.cpu().state_dict(), save_path) 75 | if gpu_id is not None and torch.cuda.is_available(): 76 | network.cuda(device_id=gpu_id) 77 | return 78 | 79 | # helper loading function that can be used by subclasses 80 | def load_network(self, network, network_label, epoch_label, network_dir=None): 81 | save_filename = '{}_net_{}.pth'.format(network_label, epoch_label) 82 | if network_dir is None: 83 | network_dir = self.save_dir 84 | save_path = os.path.join(network_dir, save_filename) 85 | network.load_state_dict(torch.load(save_path)) 86 | return 87 | 88 | def define_model(self): 89 | '''Should be implemented by the child class.''' 90 | raise NotImplementedError 91 | 92 | def init_dataset(self): 93 | '''Should be implemented by the child class.''' 94 | raise NotImplementedError 95 | 96 | def define_criterion(self): 97 | '''Should be implemented by the child class.''' 98 | raise NotImplementedError 99 | 100 | def set_input(self, batch): 101 | '''Should be implemented by the child class.''' 102 | raise NotImplementedError 103 | 104 | def forward(self): 105 | '''Should compute self.total_loss. To be implemented by the child class.''' 106 | raise NotImplementedError 107 | 108 | def save(self, epoch_prefix): 109 | '''Saves the model.''' 110 | self.save_network(self.model, 'pred', epoch_prefix, gpu_id=self.opts.gpu_id) 111 | return 112 | 113 | def get_current_visuals(self): 114 | '''Should be implemented by the child class.''' 115 | raise NotImplementedError 116 | 117 | def get_current_scalars(self): 118 | '''Should be implemented by the child class.''' 119 | raise NotImplementedError 120 | 121 | def get_current_points(self): 122 | '''Should be implemented by the child class.''' 123 | raise NotImplementedError 124 | 125 | def init_training(self): 126 | opts = self.opts 127 | self.define_model() 128 | self.init_dataset() 129 | self.define_criterion() 130 | self.optimizer = torch.optim.Adam( 131 | self.model.parameters(), lr=opts.learning_rate, betas=(opts.beta1, 0.999)) 132 | 133 | def train(self): 134 | opts = self.opts 135 | self.smoothed_total_loss = 0 136 | self.visualizer = Visualizer(opts) 137 | visualizer = self.visualizer 138 | total_steps = 0 139 | dataset_size = len(self.dataloader) 140 | 141 | for epoch in range(opts.num_pretrain_epochs, opts.num_epochs): 142 | epoch_iter = 0 143 | for i, batch in enumerate(self.dataloader): 144 | iter_start_time = time.time() 145 | self.set_input(batch) 146 | if not self.invalid_batch: 147 | self.optimizer.zero_grad() 148 | self.forward() 149 | self.smoothed_total_loss = self.smoothed_total_loss*0.99 + 0.01*self.total_loss.data[0] 150 | self.total_loss.backward() 151 | # pdb.set_trace() 152 | self.optimizer.step() 153 | 154 | total_steps += 1 155 | epoch_iter += 1 156 | 157 | if opts.display_visuals and (total_steps % opts.display_freq == 0): 158 | visualizer.display_current_results(self.get_current_visuals(), epoch) 159 | visualizer.plot_current_points(self.get_current_points()) 160 | 161 | if opts.print_scalars and (total_steps % opts.print_freq == 0): 162 | scalars = self.get_current_scalars() 163 | visualizer.print_current_scalars(epoch, epoch_iter, scalars) 164 | if opts.plot_scalars: 165 | visualizer.plot_current_scalars(epoch, float(epoch_iter)/dataset_size, opts, scalars) 166 | 167 | if total_steps % opts.save_latest_freq == 0: 168 | print('saving the model at the end of epoch {:d}, iters {:d}'.format(epoch, total_steps)) 169 | self.save('latest') 170 | 171 | if total_steps == opts.num_iter: 172 | return 173 | 174 | if (epoch+1) % opts.save_epoch_freq == 0: 175 | print('saving the model at the end of epoch {:d}, iters {:d}'.format(epoch, total_steps)) 176 | self.save('latest') 177 | self.save(epoch+1) -------------------------------------------------------------------------------- /nnutils/oc_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Object-centric prediction net. 3 | ''' 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | from absl import flags 8 | import torch 9 | import torch.nn as nn 10 | import torchvision 11 | from . import net_blocks as nb 12 | from . import roi_pool_py as roi_pool 13 | #from oc3d.nnutils import roi_pooling 14 | import pdb 15 | 16 | #-------------- flags -------------# 17 | #----------------------------------# 18 | flags.DEFINE_integer('roi_size', 4, 'RoI feat spatial size.') 19 | flags.DEFINE_integer('nz_shape', 20, 'Number of latent feat dimension for shape prediction') 20 | flags.DEFINE_integer('nz_feat', 300, 'RoI encoded feature size') 21 | flags.DEFINE_boolean('use_context', True, 'Should we use bbox + full image features') 22 | flags.DEFINE_boolean('pred_voxels', True, 'Predict voxels, or code instead') 23 | flags.DEFINE_boolean('classify_rot', False, 'Classify rotation, or regress quaternion instead') 24 | flags.DEFINE_integer('nz_rot', 4, 'Number of outputs for rot prediction. Value overriden in code.') 25 | 26 | 27 | #------------- Modules ------------# 28 | #----------------------------------# 29 | class ResNetConv(nn.Module): 30 | def __init__(self, n_blocks=4): 31 | super(ResNetConv, self).__init__() 32 | self.resnet = torchvision.models.resnet18(pretrained=True) 33 | self.n_blocks=n_blocks 34 | 35 | def forward(self, x): 36 | n_blocks = self.n_blocks 37 | x = self.resnet.conv1(x) 38 | x = self.resnet.bn1(x) 39 | x = self.resnet.relu(x) 40 | x = self.resnet.maxpool(x) 41 | 42 | if n_blocks >= 1: 43 | x = self.resnet.layer1(x) 44 | if n_blocks >= 2: 45 | x = self.resnet.layer2(x) 46 | if n_blocks >= 3: 47 | x = self.resnet.layer3(x) 48 | if n_blocks >= 4: 49 | x = self.resnet.layer4(x) 50 | return x 51 | 52 | 53 | class ShapePredictor(nn.Module): 54 | def __init__(self, nz_feat, nz_shape, pred_voxels=True): 55 | super(ShapePredictor, self).__init__() 56 | self.pred_layer = nb.fc(True, nz_feat, nz_shape) 57 | self.pred_voxels = pred_voxels 58 | 59 | def forward(self, feat): 60 | # pdb.set_trace() 61 | shape = self.pred_layer.forward(feat) 62 | # print('shape: ( Mean = {}, Var = {} )'.format(shape.mean().data[0], shape.var().data[0])) 63 | if self.pred_voxels: 64 | shape = torch.nn.functional.sigmoid(self.decoder.forward(shape)) 65 | return shape 66 | 67 | def add_voxel_decoder(self, voxel_decoder=None): 68 | # if self.pred_voxels: 69 | self.decoder = voxel_decoder 70 | 71 | 72 | class QuatPredictor(nn.Module): 73 | def __init__(self, nz_feat, nz_rot, classify_rot=True): 74 | super(QuatPredictor, self).__init__() 75 | self.pred_layer = nn.Linear(nz_feat, nz_rot) 76 | self.classify_rot = classify_rot 77 | 78 | def forward(self, feat): 79 | quat = self.pred_layer.forward(feat) 80 | if self.classify_rot: 81 | quat = torch.nn.functional.log_softmax(quat) 82 | else: 83 | quat = torch.nn.functional.normalize(quat) 84 | return quat 85 | 86 | 87 | class ScalePredictor(nn.Module): 88 | def __init__(self, nz): 89 | super(ScalePredictor, self).__init__() 90 | self.pred_layer = nn.Linear(nz, 3) 91 | 92 | def forward(self, feat): 93 | scale = self.pred_layer.forward(feat) + 1 #biasing the scale to 1 94 | scale = torch.nn.functional.relu(scale) + 1e-12 95 | # print('scale: ( Mean = {}, Var = {} )'.format(scale.mean().data[0], scale.var().data[0])) 96 | return scale 97 | 98 | 99 | class TransPredictor(nn.Module): 100 | def __init__(self, nz): 101 | super(TransPredictor, self).__init__() 102 | self.pred_layer = nn.Linear(nz, 3) 103 | 104 | def forward(self, feat): 105 | #pdb.set_trace() 106 | trans = self.pred_layer.forward(feat) 107 | # print('trans: ( Mean = {}, Var = {} )'.format(trans.mean().data[0], trans.var().data[0])) 108 | return trans 109 | 110 | 111 | class LabelPredictor(nn.Module): 112 | def __init__(self, nz_feat, classify_rot=True): 113 | super(LabelPredictor, self).__init__() 114 | self.pred_layer = nn.Linear(nz_feat, 1) 115 | 116 | def forward(self, feat): 117 | pred = self.pred_layer.forward(feat) 118 | pred = torch.nn.functional.sigmoid(pred) 119 | return pred 120 | 121 | 122 | class CodePredictor(nn.Module): 123 | def __init__( 124 | self, nz_feat=200, 125 | pred_voxels=True, nz_shape=100, 126 | classify_rot=True, nz_rot=4 127 | ): 128 | super(CodePredictor, self).__init__() 129 | self.quat_predictor = QuatPredictor(nz_feat, classify_rot=classify_rot, nz_rot=nz_rot) 130 | self.shape_predictor = ShapePredictor(nz_feat, nz_shape=nz_shape, pred_voxels=pred_voxels) 131 | self.scale_predictor = ScalePredictor(nz_feat) 132 | self.trans_predictor = TransPredictor(nz_feat) 133 | 134 | def forward(self, feat): 135 | shape_pred = self.shape_predictor.forward(feat) 136 | scale_pred = self.scale_predictor.forward(feat) 137 | quat_pred = self.quat_predictor.forward(feat) 138 | trans_pred = self.trans_predictor.forward(feat) 139 | return shape_pred, scale_pred, quat_pred, trans_pred 140 | 141 | 142 | class RoiEncoder(nn.Module): 143 | def __init__(self, nc_inp_fine, nc_inp_coarse, use_context=True, nz_joint=300, nz_roi=300, nz_coarse=300, nz_box=50): 144 | super(RoiEncoder, self).__init__() 145 | 146 | self.encoder_fine = nb.fc_stack(nc_inp_fine, nz_roi, 2) 147 | self.encoder_coarse = nb.fc_stack(nc_inp_coarse, nz_coarse, 2) 148 | self.encoder_bbox = nb.fc_stack(4, nz_box, 3) 149 | 150 | self.encoder_joint = nb.fc_stack(nz_roi+nz_coarse+nz_box, nz_joint, 2) 151 | self.use_context = use_context 152 | 153 | def forward(self, feats): 154 | roi_img_feat, img_feat_coarse, rois_inp = feats 155 | feat_fine = self.encoder_fine.forward(roi_img_feat) 156 | feat_coarse = self.encoder_coarse.forward(img_feat_coarse) 157 | 158 | #dividing by img_height that the inputs are not too high 159 | feat_bbox = self.encoder_bbox.forward(rois_inp[:, 1:5]/480.0) 160 | if not self.use_context: 161 | feat_bbox = feat_bbox*0 162 | feat_coarse = feat_coarse*0 163 | feat_coarse_rep = torch.index_select(feat_coarse, 0, rois_inp[:, 0].type(torch.LongTensor).cuda()) 164 | 165 | # print(feat_fine.size(), feat_coarse_rep.size(), feat_bbox.size()) 166 | feat_roi = self.encoder_joint.forward(torch.cat((feat_fine, feat_coarse_rep, feat_bbox), dim=1)) 167 | return feat_roi 168 | 169 | 170 | #------------- OC Net -------------# 171 | #----------------------------------# 172 | class OCNet(nn.Module): 173 | def __init__( 174 | self, img_size_coarse, 175 | roi_size=4, 176 | use_context=True, nz_feat=1000, 177 | pred_voxels=True, nz_shape=100, 178 | classify_rot=False, nz_rot=4, 179 | pred_labels=False, filter_positives=False 180 | ): 181 | super(OCNet, self).__init__() 182 | self.pred_labels = pred_labels 183 | self.filter_positives = filter_positives 184 | self.nz_feat = nz_feat 185 | 186 | self.resnet_conv_fine = ResNetConv(n_blocks=3) 187 | self.resnet_conv_coarse = ResNetConv(n_blocks=4) 188 | self.roi_size = roi_size 189 | self.roi_pool = roi_pool.RoIPool(roi_size, roi_size, 1/16) 190 | nc_inp_fine = 256*roi_size*roi_size 191 | nc_inp_coarse = 512*(img_size_coarse[0]//32)*(img_size_coarse[1]//32) 192 | 193 | self.roi_encoder = RoiEncoder(nc_inp_fine, nc_inp_coarse, use_context=use_context, nz_joint=nz_feat) 194 | 195 | self.code_predictor = CodePredictor( 196 | nz_feat=nz_feat, 197 | pred_voxels=pred_voxels, nz_shape=nz_shape, 198 | classify_rot=classify_rot, nz_rot=nz_rot) 199 | nb.net_init(self.roi_encoder) 200 | nb.net_init(self.code_predictor) 201 | 202 | def add_label_predictor(self): 203 | self.label_predictor = LabelPredictor(self.nz_feat) 204 | nb.net_init(self.label_predictor) 205 | 206 | def forward(self, imgs_rois): 207 | imgs_inp_fine = imgs_rois[0] 208 | imgs_inp_coarse = imgs_rois[1] 209 | rois_inp = imgs_rois[2] 210 | 211 | img_feat_coarse = self.resnet_conv_coarse.forward(imgs_inp_coarse) 212 | img_feat_coarse = img_feat_coarse.view(img_feat_coarse.size(0), -1) 213 | 214 | img_feat_fine = self.resnet_conv_fine.forward(imgs_inp_fine) 215 | 216 | roi_img_feat = self.roi_pool.forward(img_feat_fine, rois_inp) 217 | roi_img_feat = roi_img_feat.view(roi_img_feat.size(0), -1) 218 | 219 | roi_feat = self.roi_encoder.forward((roi_img_feat, img_feat_coarse, rois_inp)) 220 | 221 | if self.pred_labels: 222 | labels_pred = self.label_predictor.forward(roi_feat) 223 | 224 | if self.filter_positives: 225 | pos_inds = imgs_rois[3].squeeze().data.nonzero().squeeze() 226 | pos_inds = torch.autograd.Variable( 227 | pos_inds.type(torch.LongTensor).cuda(), requires_grad=False) 228 | roi_feat = torch.index_select(roi_feat, 0, pos_inds) 229 | 230 | codes_pred = self.code_predictor.forward(roi_feat) 231 | 232 | if self.pred_labels: 233 | return codes_pred, labels_pred 234 | else: 235 | return codes_pred -------------------------------------------------------------------------------- /experiments/suncg/box3d.py: -------------------------------------------------------------------------------- 1 | """Script for box3d prediction experiment. 2 | """ 3 | # Sample usage: python -m factored3d.experiments.suncg.box3d --plot_scalars --display_visuals --display_freq=2000 --save_epoch_freq=1 --batch_size=8 --name=box3d_base --use_context --pred_voxels=False --classify_rot --shape_loss_wt=10 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | from absl import app 9 | from absl import flags 10 | import os 11 | import os.path as osp 12 | import numpy as np 13 | import torch 14 | import torchvision 15 | from torch.autograd import Variable 16 | import time 17 | import scipy.misc 18 | import pdb 19 | import copy 20 | 21 | from ...data import suncg as suncg_data 22 | from ...utils import suncg_parse 23 | from ...nnutils import train_utils 24 | from ...nnutils import net_blocks 25 | from ...nnutils import loss_utils 26 | from ...nnutils import oc_net 27 | from ...nnutils import disp_net 28 | from ...utils import visutil 29 | from ...renderer import utils as render_utils 30 | 31 | curr_path = osp.dirname(osp.abspath(__file__)) 32 | cache_path = osp.join(curr_path, '..', '..', 'cachedir') 33 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved') 34 | 35 | flags.DEFINE_integer('voxel_size', 32, 'Spatial dimension of shape voxels') 36 | flags.DEFINE_integer('n_voxel_layers', 5, 'Number of layers ') 37 | flags.DEFINE_integer('voxel_nc_max', 128, 'Max 3D channels') 38 | flags.DEFINE_integer('voxel_nc_l1', 8, 'Initial shape encder/decoder layer dimension') 39 | 40 | flags.DEFINE_string('shape_pretrain_name', 'object_autoenc_32', 'Experiment name for pretrained shape encoder-decoder') 41 | flags.DEFINE_integer('shape_pretrain_epoch', 800, 'Experiment name for shape decoder') 42 | flags.DEFINE_boolean('shape_dec_ft', False, 'If predicting voxels, should we pretrain from an existing deocder') 43 | 44 | flags.DEFINE_string('ft_pretrain_name', 'box3d_base', 'Experiment name from which we will pretrain the OCNet') 45 | flags.DEFINE_integer('ft_pretrain_epoch', 0, 'Network epoch from which we will finetune') 46 | 47 | flags.DEFINE_integer('max_rois', 5, 'If we have more objects than this per image, we will subsample.') 48 | flags.DEFINE_integer('max_total_rois', 40, 'If we have more objects than this per batch, we will reject the batch.') 49 | 50 | FLAGS = flags.FLAGS 51 | 52 | 53 | class Box3dTrainer(train_utils.Trainer): 54 | def define_model(self): 55 | ''' 56 | Define the pytorch net 'model' whose weights will be updated during training. 57 | ''' 58 | opts = self.opts 59 | assert(not (opts.ft_pretrain_epoch > 0 and opts.num_pretrain_epochs > 0)) 60 | 61 | self.voxel_encoder, nc_enc_voxel = net_blocks.encoder3d( 62 | opts.n_voxel_layers, nc_max=opts.voxel_nc_max, nc_l1=opts.voxel_nc_l1, nz_shape=opts.nz_shape) 63 | 64 | self.voxel_decoder = net_blocks.decoder3d( 65 | opts.n_voxel_layers, opts.nz_shape, nc_enc_voxel, nc_min=opts.voxel_nc_l1) 66 | 67 | self.model = oc_net.OCNet( 68 | (opts.img_height, opts.img_width), 69 | roi_size=opts.roi_size, 70 | use_context=opts.use_context, nz_feat=opts.nz_feat, 71 | pred_voxels=opts.pred_voxels, nz_shape=opts.nz_shape, 72 | classify_rot=opts.classify_rot, nz_rot=opts.nz_rot) 73 | 74 | if opts.ft_pretrain_epoch > 0: 75 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.ft_pretrain_name) 76 | self.load_network( 77 | self.model, 'pred', opts.ft_pretrain_epoch, network_dir=network_dir) 78 | 79 | if opts.pred_voxels: 80 | self.model.code_predictor.shape_predictor.add_voxel_decoder( 81 | copy.deepcopy(self.voxel_decoder)) 82 | 83 | if opts.pred_voxels and opts.shape_dec_ft: 84 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name) 85 | self.load_network( 86 | self.model.code_predictor.shape_predictor.decoder, 87 | 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir) 88 | 89 | if self.opts.num_pretrain_epochs > 0: 90 | self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs-1) 91 | self.model = self.model.cuda(device_id=self.opts.gpu_id) 92 | return 93 | 94 | def init_dataset(self): 95 | opts = self.opts 96 | self.real_iter = 1 # number of iterations we actually updated the net for 97 | self.data_iter = 1 # number of iterations we called the data loader 98 | self.resnet_transform = torchvision.transforms.Normalize( 99 | mean=[0.485, 0.456, 0.406], 100 | std=[0.229, 0.224, 0.225]) 101 | split_dir = osp.join(opts.suncg_dir, 'splits') 102 | self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera'))) 103 | self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts) 104 | 105 | if not opts.pred_voxels: 106 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name) 107 | self.load_network( 108 | self.voxel_encoder, 109 | 'encoder', opts.shape_pretrain_epoch, network_dir=network_dir) 110 | self.load_network( 111 | self.voxel_decoder, 112 | 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir) 113 | self.voxel_encoder.eval() 114 | self.voxel_encoder = self.voxel_encoder.cuda(device_id=self.opts.gpu_id) 115 | self.voxel_decoder.eval() 116 | self.voxel_decoder = self.voxel_decoder.cuda(device_id=self.opts.gpu_id) 117 | 118 | if opts.voxel_size < 64: 119 | self.downsample_voxels = True 120 | self.downsampler = render_utils.Downsample( 121 | 64//opts.voxel_size, use_max=True, batch_mode=True 122 | ).cuda(device_id=self.opts.gpu_id) 123 | 124 | if opts.classify_rot: 125 | self.quat_medoids = torch.from_numpy( 126 | scipy.io.loadmat(osp.join(opts.cache_dir, 'quat_medoids.mat'))['medoids']).type(torch.FloatTensor) 127 | 128 | 129 | def define_criterion(self): 130 | self.smoothed_factor_losses = { 131 | 'shape': 0, 'scale': 0, 'quat': 0, 'trans': 0 132 | } 133 | 134 | def set_input(self, batch): 135 | opts = self.opts 136 | rois = suncg_parse.bboxes_to_rois(batch['bboxes']) 137 | self.data_iter += 1 138 | if rois.numel() <= 5 or rois.numel() >= 5*opts.max_total_rois: #with just one element, batch_norm will screw up 139 | self.invalid_batch = True 140 | return 141 | else: 142 | self.invalid_batch = False 143 | self.real_iter += 1 144 | 145 | input_imgs_fine = batch['img_fine'].type(torch.FloatTensor) 146 | input_imgs = batch['img'].type(torch.FloatTensor) 147 | for b in range(input_imgs_fine.size(0)): 148 | input_imgs_fine[b] = self.resnet_transform(input_imgs_fine[b]) 149 | input_imgs[b] = self.resnet_transform(input_imgs[b]) 150 | 151 | self.input_imgs = Variable( 152 | input_imgs.cuda(device=opts.gpu_id), requires_grad=False) 153 | 154 | self.input_imgs_fine = Variable( 155 | input_imgs_fine.cuda(device=opts.gpu_id), requires_grad=False) 156 | 157 | self.rois = Variable( 158 | rois.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False) 159 | 160 | code_tensors = suncg_parse.collate_codes(batch['codes']) 161 | code_tensors[0] = code_tensors[0].unsqueeze(1) #unsqueeze voxels 162 | 163 | if opts.classify_rot: 164 | quats_gt = code_tensors[2].clone() 165 | code_tensors[2] = suncg_parse.quats_to_bininds(code_tensors[2], self.quat_medoids) 166 | quats_binned = suncg_parse.bininds_to_quats(code_tensors[2], self.quat_medoids) 167 | # q_diff_loss = (quats_gt-quats_binned).pow(2).sum(1) 168 | # q_sum_loss = (quats_gt+quats_binned).pow(2).sum(1) 169 | # q_loss, _ = torch.stack((q_diff_loss, q_sum_loss), dim=1).min(1) 170 | # print(quats_gt, quats_binned) 171 | # print(q_loss) 172 | 173 | 174 | self.codes_gt = [ 175 | Variable(t.cuda(device=opts.gpu_id), requires_grad=False) for t in code_tensors] 176 | 177 | if self.downsample_voxels: 178 | self.codes_gt[0] = self.downsampler.forward(self.codes_gt[0]) 179 | 180 | if not opts.pred_voxels: 181 | self.codes_gt[0] = self.voxel_encoder.forward(self.codes_gt[0]) 182 | 183 | def get_current_scalars(self): 184 | loss_dict = {'total_loss': self.smoothed_total_loss, 'iter_frac': self.real_iter/self.data_iter} 185 | for k in self.smoothed_factor_losses.keys(): 186 | loss_dict['loss_' + k] = self.smoothed_factor_losses[k] 187 | return loss_dict 188 | 189 | def render_codes(self, code_vars, prefix='mesh'): 190 | opts = self.opts 191 | code_list = suncg_parse.uncollate_codes(code_vars, self.input_imgs.data.size(0), self.rois.data.cpu()[:,0]) 192 | 193 | mesh_dir = osp.join(opts.rendering_dir, opts.name) 194 | if not os.path.exists(mesh_dir): 195 | os.makedirs(mesh_dir) 196 | mesh_file = osp.join(mesh_dir, prefix + '.obj') 197 | render_utils.save_parse(mesh_file, code_list[0], save_objectwise=False) 198 | 199 | png_dir = mesh_file.replace('.obj', '/') 200 | render_utils.render_mesh(mesh_file, png_dir) 201 | 202 | return scipy.misc.imread(osp.join(png_dir, prefix + '_render_000.png')) 203 | 204 | 205 | def get_current_visuals(self): 206 | visuals = {} 207 | opts = self.opts 208 | visuals['img'] = visutil.tensor2im(visutil.undo_resnet_preprocess( 209 | self.input_imgs_fine.data)) 210 | 211 | codes_gt_vis = [t for t in self.codes_gt] 212 | if not opts.pred_voxels: 213 | codes_gt_vis[0] = torch.nn.functional.sigmoid( 214 | self.voxel_decoder.forward(self.codes_gt[0]) 215 | ) 216 | 217 | if opts.classify_rot: 218 | codes_gt_vis[2] = Variable(suncg_parse.bininds_to_quats( 219 | codes_gt_vis[2].cpu().data, self.quat_medoids), requires_grad=False) 220 | 221 | visuals['codes_gt'] = self.render_codes(codes_gt_vis, prefix='gt') 222 | 223 | codes_pred_vis = [t for t in self.codes_pred] 224 | if not opts.pred_voxels: 225 | codes_pred_vis[0] = torch.nn.functional.sigmoid( 226 | self.voxel_decoder.forward(self.codes_pred[0]) 227 | ) 228 | 229 | if opts.classify_rot: 230 | _, bin_inds = torch.max(codes_pred_vis[2].data.cpu(), 1) 231 | codes_pred_vis[2] = Variable(suncg_parse.bininds_to_quats( 232 | bin_inds, self.quat_medoids), requires_grad=False) 233 | 234 | visuals['codes_pred'] = self.render_codes(codes_pred_vis, prefix='pred') 235 | 236 | return visuals 237 | 238 | 239 | def get_current_points(self): 240 | pts_dict = {} 241 | return pts_dict 242 | 243 | def forward(self): 244 | opts = self.opts 245 | 246 | self.codes_pred = self.model.forward((self.input_imgs_fine, self.input_imgs, self.rois)) 247 | self.total_loss, self.loss_factors = loss_utils.code_loss( 248 | self.codes_pred, self.codes_gt, 249 | pred_voxels=opts.pred_voxels, 250 | classify_rot=opts.classify_rot, 251 | shape_wt=opts.shape_loss_wt, 252 | scale_wt=opts.scale_loss_wt, 253 | quat_wt=opts.quat_loss_wt, 254 | trans_wt=opts.trans_loss_wt 255 | ) 256 | for k in self.smoothed_factor_losses.keys(): 257 | self.smoothed_factor_losses[k] = 0.99*self.smoothed_factor_losses[k] + 0.01*self.loss_factors[k].data[0] 258 | 259 | 260 | def main(_): 261 | torch.manual_seed(0) 262 | if FLAGS.classify_rot: 263 | FLAGS.nz_rot = 24 264 | else: 265 | FLAGS.nz_rot = 4 266 | FLAGS.n_data_workers = 0 # code crashes otherwise due to json not liking parallelization 267 | trainer = Box3dTrainer(FLAGS) 268 | trainer.init_training() 269 | trainer.train() 270 | 271 | 272 | if __name__ == '__main__': 273 | app.run() -------------------------------------------------------------------------------- /experiments/suncg/dwr.py: -------------------------------------------------------------------------------- 1 | """Script for dwr experiment. 2 | """ 3 | # Sample usage: 4 | 5 | # (init) : python -m factored3d.experiments.suncg.dwr --name=dwr_base --classify_rot --pred_voxels=False --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --box3d_ft --shape_loss_wt=10 --label_loss_wt=10 --batch_size=8 6 | 7 | # shape_ft : python -m factored3d.experiments.suncg.dwr --name=dwr_shape_ft --classify_rot --pred_voxels=True --shape_dec_ft --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --shape_loss_wt=10 --label_loss_wt=10 --batch_size=8 --ft_pretrain_epoch=1 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | from absl import app 13 | from absl import flags 14 | import os 15 | import os.path as osp 16 | import numpy as np 17 | import torch 18 | import torchvision 19 | from torch.autograd import Variable 20 | import time 21 | import scipy.misc 22 | import pdb 23 | import copy 24 | 25 | from ...data import suncg as suncg_data 26 | from ...utils import suncg_parse 27 | from ...nnutils import train_utils 28 | from ...nnutils import net_blocks 29 | from ...nnutils import loss_utils 30 | from ...nnutils import oc_net 31 | from ...nnutils import disp_net 32 | from ...utils import visutil 33 | from ...renderer import utils as render_utils 34 | 35 | curr_path = osp.dirname(osp.abspath(__file__)) 36 | cache_path = osp.join(curr_path, '..', '..', 'cachedir') 37 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved') 38 | 39 | flags.DEFINE_integer('voxel_size', 32, 'Spatial dimension of shape voxels') 40 | flags.DEFINE_integer('n_voxel_layers', 5, 'Number of layers ') 41 | flags.DEFINE_integer('voxel_nc_max', 128, 'Max 3D channels') 42 | flags.DEFINE_integer('voxel_nc_l1', 8, 'Initial shape encder/decoder layer dimension') 43 | 44 | flags.DEFINE_string('shape_pretrain_name', 'object_autoenc_32', 'Experiment name for pretrained shape encoder-decoder') 45 | flags.DEFINE_integer('shape_pretrain_epoch', 800, 'Experiment name for shape decoder') 46 | flags.DEFINE_boolean('shape_dec_ft', False, 'If predicting voxels, should we pretrain from an existing deocder') 47 | 48 | flags.DEFINE_string('box3d_pretrain_name', 'box3d_base', 'Experiment name for pretrained box3d experiment') 49 | flags.DEFINE_integer('box3d_pretrain_epoch', 8, 'Experiment name for shape decoder') 50 | flags.DEFINE_boolean('box3d_ft', False, 'Finetune from existing net trained with gt boxes') 51 | 52 | flags.DEFINE_string('ft_pretrain_name', 'dwr_base', 'Experiment name from which we will pretrain the OCNet') 53 | flags.DEFINE_integer('ft_pretrain_epoch', 0, 'Network epoch from which we will finetune') 54 | 55 | flags.DEFINE_float('label_loss_wt', 1, 'Label loss weight') 56 | 57 | flags.DEFINE_integer('max_rois', 100, 'If we have more objects than this per image, we will subsample. Set to very large value') 58 | flags.DEFINE_integer('max_total_rois', 100, 'If we have more objects than this per batch, we will reject the batch.') 59 | 60 | FLAGS = flags.FLAGS 61 | 62 | 63 | class DWRTrainer(train_utils.Trainer): 64 | def define_model(self): 65 | ''' 66 | Define the pytorch net 'model' whose weights will be updated during training. 67 | ''' 68 | 69 | opts = self.opts 70 | 71 | assert(not (opts.ft_pretrain_epoch > 0 and opts.num_pretrain_epochs > 0)) 72 | assert(not (opts.ft_pretrain_epoch > 0 and opts.box3d_ft)) 73 | assert(not (opts.num_pretrain_epochs > 0 and opts.box3d_ft)) 74 | 75 | self.voxel_encoder, nc_enc_voxel = net_blocks.encoder3d( 76 | opts.n_voxel_layers, nc_max=opts.voxel_nc_max, nc_l1=opts.voxel_nc_l1, nz_shape=opts.nz_shape) 77 | 78 | self.voxel_decoder = net_blocks.decoder3d( 79 | opts.n_voxel_layers, opts.nz_shape, nc_enc_voxel, nc_min=opts.voxel_nc_l1) 80 | 81 | self.model = oc_net.OCNet( 82 | (opts.img_height, opts.img_width), 83 | roi_size=opts.roi_size, 84 | use_context=opts.use_context, nz_feat=opts.nz_feat, 85 | pred_voxels=opts.pred_voxels, nz_shape=opts.nz_shape, 86 | classify_rot=opts.classify_rot, nz_rot=opts.nz_rot, 87 | pred_labels=True, filter_positives=True) 88 | 89 | if opts.box3d_ft: 90 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.box3d_pretrain_name) 91 | self.load_network( 92 | self.model, 93 | 'pred', opts.box3d_pretrain_epoch, network_dir=network_dir) 94 | 95 | # need to add label pred separately to allow finetuning from existing box3d net 96 | self.model.add_label_predictor() 97 | 98 | if opts.ft_pretrain_epoch > 0: 99 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.ft_pretrain_name) 100 | self.load_network( 101 | self.model, 'pred', opts.ft_pretrain_epoch, network_dir=network_dir) 102 | 103 | if opts.pred_voxels: 104 | self.model.code_predictor.shape_predictor.add_voxel_decoder( 105 | copy.deepcopy(self.voxel_decoder)) 106 | 107 | if opts.pred_voxels and opts.shape_dec_ft: 108 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name) 109 | self.load_network( 110 | self.model.code_predictor.shape_predictor.decoder, 111 | 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir) 112 | 113 | if self.opts.num_pretrain_epochs > 0: 114 | self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs-1) 115 | self.model = self.model.cuda(device_id=self.opts.gpu_id) 116 | return 117 | 118 | def init_dataset(self): 119 | opts = self.opts 120 | self.real_iter = 1 # number of iterations we actually updated the net for 121 | self.data_iter = 1 # number of iterations we called the data loader 122 | self.resnet_transform = torchvision.transforms.Normalize( 123 | mean=[0.485, 0.456, 0.406], 124 | std=[0.229, 0.224, 0.225]) 125 | split_dir = osp.join(opts.suncg_dir, 'splits') 126 | self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera'))) 127 | self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts) 128 | 129 | if not opts.pred_voxels: 130 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name) 131 | self.load_network( 132 | self.voxel_encoder, 133 | 'encoder', opts.shape_pretrain_epoch, network_dir=network_dir) 134 | self.load_network( 135 | self.voxel_decoder, 136 | 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir) 137 | self.voxel_encoder.eval() 138 | self.voxel_encoder = self.voxel_encoder.cuda(device_id=self.opts.gpu_id) 139 | self.voxel_decoder.eval() 140 | self.voxel_decoder = self.voxel_decoder.cuda(device_id=self.opts.gpu_id) 141 | 142 | if opts.voxel_size < 64: 143 | self.downsample_voxels = True 144 | self.downsampler = render_utils.Downsample( 145 | 64//opts.voxel_size, use_max=True, batch_mode=True 146 | ).cuda(device_id=self.opts.gpu_id) 147 | 148 | if opts.classify_rot: 149 | self.quat_medoids = torch.from_numpy( 150 | scipy.io.loadmat(osp.join(opts.cache_dir, 'quat_medoids.mat'))['medoids']).type(torch.FloatTensor) 151 | 152 | 153 | def define_criterion(self): 154 | self.smoothed_factor_losses = { 155 | 'shape': 0, 'scale': 0, 'quat': 0, 'trans': 0 156 | } 157 | self.labels_criterion = torch.nn.BCELoss() 158 | self.smoothed_label_loss = 0 159 | 160 | def set_input(self, batch): 161 | opts = self.opts 162 | rois = suncg_parse.bboxes_to_rois(batch['bboxes_proposals']) 163 | roi_labels = batch['labels_proposals'] 164 | self.data_iter += 1 165 | if roi_labels.sum() <= 1 or roi_labels.sum() >= opts.max_total_rois: #with just one element, batch_norm will screw up 166 | self.invalid_batch = True 167 | return 168 | else: 169 | self.invalid_batch = False 170 | self.real_iter += 1 171 | 172 | input_imgs_fine = batch['img_fine'].type(torch.FloatTensor) 173 | input_imgs = batch['img'].type(torch.FloatTensor) 174 | for b in range(input_imgs_fine.size(0)): 175 | input_imgs_fine[b] = self.resnet_transform(input_imgs_fine[b]) 176 | input_imgs[b] = self.resnet_transform(input_imgs[b]) 177 | 178 | self.input_imgs = Variable( 179 | input_imgs.cuda(device=opts.gpu_id), requires_grad=False) 180 | 181 | self.input_imgs_fine = Variable( 182 | input_imgs_fine.cuda(device=opts.gpu_id), requires_grad=False) 183 | 184 | self.rois = Variable( 185 | rois.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False) 186 | 187 | self.roi_labels = Variable( 188 | roi_labels.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False) 189 | 190 | code_tensors = suncg_parse.collate_codes(batch['codes_proposals']) 191 | code_tensors[0] = code_tensors[0].unsqueeze(1) #unsqueeze voxels 192 | 193 | if opts.classify_rot: 194 | quats_gt = code_tensors[2].clone() 195 | code_tensors[2] = suncg_parse.quats_to_bininds(code_tensors[2], self.quat_medoids) 196 | quats_binned = suncg_parse.bininds_to_quats(code_tensors[2], self.quat_medoids) 197 | 198 | self.codes_gt = [ 199 | Variable(t.cuda(device=opts.gpu_id), requires_grad=False) for t in code_tensors] 200 | 201 | if self.downsample_voxels: 202 | self.codes_gt[0] = self.downsampler.forward(self.codes_gt[0]) 203 | 204 | if not opts.pred_voxels: 205 | self.codes_gt[0] = self.voxel_encoder.forward(self.codes_gt[0]) 206 | 207 | def get_current_scalars(self): 208 | loss_dict = {'total_loss': self.smoothed_total_loss, 'iter_frac': self.real_iter/self.data_iter} 209 | loss_dict['label_loss'] = self.smoothed_label_loss 210 | for k in self.smoothed_factor_losses.keys(): 211 | loss_dict['loss_' + k] = self.smoothed_factor_losses[k] 212 | return loss_dict 213 | 214 | def get_current_visuals(self): 215 | visuals = {} 216 | opts = self.opts 217 | visuals['img'] = visutil.tensor2im(visutil.undo_resnet_preprocess( 218 | self.input_imgs_fine.data)) 219 | return visuals 220 | 221 | def get_current_points(self): 222 | pts_dict = {} 223 | return pts_dict 224 | 225 | def forward(self): 226 | opts = self.opts 227 | 228 | self.codes_pred, self.labels_pred = self.model.forward(( 229 | self.input_imgs_fine, self.input_imgs, self.rois, self.roi_labels)) 230 | self.total_loss, self.loss_factors = loss_utils.code_loss( 231 | self.codes_pred, self.codes_gt, 232 | pred_voxels=opts.pred_voxels, 233 | classify_rot=opts.classify_rot, 234 | shape_wt=opts.shape_loss_wt, 235 | scale_wt=opts.scale_loss_wt, 236 | quat_wt=opts.quat_loss_wt, 237 | trans_wt=opts.trans_loss_wt 238 | ) 239 | labels_loss = self.labels_criterion.forward(self.labels_pred, self.roi_labels.unsqueeze(1)) 240 | self.total_loss += opts.label_loss_wt*labels_loss 241 | 242 | for k in self.smoothed_factor_losses.keys(): 243 | self.smoothed_factor_losses[k] = 0.99*self.smoothed_factor_losses[k] + 0.01*self.loss_factors[k].data[0] 244 | self.smoothed_label_loss = 0.99*self.smoothed_label_loss + 0.01*labels_loss.data[0] 245 | 246 | 247 | def main(_): 248 | torch.manual_seed(0) 249 | FLAGS.suncg_dl_out_codes = True 250 | FLAGS.suncg_dl_out_fine_img = True 251 | FLAGS.suncg_dl_out_proposals = True 252 | FLAGS.suncg_dl_out_voxels = False 253 | FLAGS.suncg_dl_out_layout = False 254 | FLAGS.suncg_dl_out_depth = False 255 | FLAGS.n_data_workers = 0 # code crashes otherwise due to json not liking parallelization 256 | torch.manual_seed(0) 257 | 258 | if FLAGS.classify_rot: 259 | FLAGS.nz_rot = 24 260 | else: 261 | FLAGS.nz_rot = 4 262 | trainer = DWRTrainer(FLAGS) 263 | trainer.init_training() 264 | trainer.train() 265 | 266 | 267 | if __name__ == '__main__': 268 | app.run() -------------------------------------------------------------------------------- /demo/demo_utils.py: -------------------------------------------------------------------------------- 1 | """Testing class for the demo. 2 | """ 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | from absl import flags 9 | import os 10 | import os.path as osp 11 | import numpy as np 12 | import torch 13 | import torchvision 14 | from torch.autograd import Variable 15 | import scipy.misc 16 | import pdb 17 | import copy 18 | import scipy.io as sio 19 | 20 | from ..nnutils import test_utils 21 | from ..nnutils import net_blocks 22 | from ..nnutils import voxel_net 23 | from ..nnutils import oc_net 24 | from ..nnutils import disp_net 25 | 26 | from ..utils import suncg_parse 27 | from ..utils import metrics 28 | 29 | from ..renderer import utils as render_utils 30 | 31 | 32 | curr_path = osp.dirname(osp.abspath(__file__)) 33 | cache_path = osp.join(curr_path, '..', 'cachedir') 34 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved') 35 | 36 | flags.DEFINE_integer('voxel_size', 32, 'Spatial dimension of shape voxels') 37 | flags.DEFINE_integer('n_voxel_layers', 5, 'Number of layers ') 38 | flags.DEFINE_integer('voxel_nc_max', 128, 'Max 3D channels') 39 | flags.DEFINE_integer('voxel_nc_l1', 8, 'Initial shape encder/decoder layer dimension') 40 | flags.DEFINE_float('voxel_eval_thresh', 0.25, 'Voxel evaluation threshold') 41 | 42 | flags.DEFINE_string('shape_pretrain_name', 'object_autoenc_32', 'Experiment name for pretrained shape encoder-decoder') 43 | flags.DEFINE_integer('shape_pretrain_epoch', 800, 'Experiment name for shape decoder') 44 | 45 | flags.DEFINE_string('layout_name', 'layout_pred', 'Experiment name for layout predictor') 46 | flags.DEFINE_integer('layout_train_epoch', 8, 'Experiment name for layout predictor') 47 | 48 | flags.DEFINE_string('depth_name', 'depth_baseline', 'Experiment name for layout predictor') 49 | flags.DEFINE_integer('depth_train_epoch', 8, 'Experiment name for layout predictor') 50 | 51 | flags.DEFINE_string('scene_voxels_name', 'voxels_baseline', 'Experiment name for layout predictor') 52 | flags.DEFINE_integer('scene_voxels_train_epoch', 8, 'Experiment name for layout predictor') 53 | flags.DEFINE_float('scene_voxels_thresh', 0.25, 'Threshold for scene voxels prediction') 54 | 55 | flags.DEFINE_integer('img_height', 128, 'image height') 56 | flags.DEFINE_integer('img_width', 256, 'image width') 57 | 58 | flags.DEFINE_integer('img_height_fine', 480, 'image height') 59 | flags.DEFINE_integer('img_width_fine', 640, 'image width') 60 | 61 | flags.DEFINE_integer('layout_height', 64, 'amodal depth height : should be half image height') 62 | flags.DEFINE_integer('layout_width', 128, 'amodal depth width : should be half image width') 63 | 64 | flags.DEFINE_integer('voxels_height', 32, 'scene voxels height. Should be half of width and depth.') 65 | flags.DEFINE_integer('voxels_width', 64, 'scene voxels width') 66 | flags.DEFINE_integer('voxels_depth', 64, 'scene voxels depth') 67 | 68 | class DemoTester(test_utils.Tester): 69 | def load_oc3d_model(self): 70 | opts = self.opts 71 | self.voxel_encoder, nc_enc_voxel = net_blocks.encoder3d( 72 | opts.n_voxel_layers, nc_max=opts.voxel_nc_max, nc_l1=opts.voxel_nc_l1, nz_shape=opts.nz_shape) 73 | 74 | self.voxel_decoder = net_blocks.decoder3d( 75 | opts.n_voxel_layers, opts.nz_shape, nc_enc_voxel, nc_min=opts.voxel_nc_l1) 76 | 77 | self.oc3d_model = oc_net.OCNet( 78 | (opts.img_height, opts.img_width), 79 | roi_size=opts.roi_size, 80 | use_context=opts.use_context, nz_feat=opts.nz_feat, 81 | pred_voxels=False, nz_shape=opts.nz_shape, pred_labels=True, 82 | classify_rot=opts.classify_rot, nz_rot=opts.nz_rot) 83 | self.oc3d_model.add_label_predictor() 84 | 85 | if opts.pred_voxels: 86 | self.oc3d_model.code_predictor.shape_predictor.add_voxel_decoder( 87 | copy.deepcopy(self.voxel_decoder)) 88 | 89 | self.load_network(self.oc3d_model, 'pred', self.opts.num_train_epoch) 90 | self.oc3d_model.eval() 91 | self.oc3d_model = self.oc3d_model.cuda(device_id=self.opts.gpu_id) 92 | 93 | if opts.pred_voxels: 94 | self.voxel_decoder = copy.deepcopy(self.oc3d_model.code_predictor.shape_predictor.decoder) 95 | 96 | def load_depth_model(self): 97 | opts = self.opts 98 | ## Load depth prediction network 99 | self.depth_model = disp_net.dispnet() 100 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.depth_name) 101 | self.load_network( 102 | self.depth_model, 'pred', opts.depth_train_epoch, network_dir=network_dir) 103 | self.depth_model.eval() 104 | self.depth_model = self.depth_model.cuda(device_id=self.opts.gpu_id) 105 | 106 | def load_layout_model(self): 107 | opts = self.opts 108 | self.layout_model = disp_net.dispnet() 109 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.layout_name) 110 | self.load_network( 111 | self.layout_model, 'pred', opts.layout_train_epoch, network_dir=network_dir) 112 | self.layout_model.eval() 113 | self.layout_model = self.layout_model.cuda(device_id=self.opts.gpu_id) 114 | 115 | def load_scene_voxels_model(self): 116 | opts = self.opts 117 | self.scene_voxels_model = voxel_net.VoxelNet( 118 | [opts.img_height, opts.img_width], 119 | [opts.voxels_width, opts.voxels_height, opts.voxels_depth], 120 | nz_voxels=opts.nz_voxels, 121 | n_voxels_upconv=opts.n_voxels_upconv 122 | ) 123 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.scene_voxels_name) 124 | self.load_network( 125 | self.scene_voxels_model, 'pred', opts.layout_train_epoch, network_dir=network_dir) 126 | self.scene_voxels_model.eval() 127 | self.scene_voxels_model = self.scene_voxels_model.cuda(device_id=self.opts.gpu_id) 128 | 129 | def define_model(self): 130 | self.load_oc3d_model() 131 | self.load_layout_model() 132 | self.load_depth_model() 133 | self.load_scene_voxels_model() 134 | return 135 | 136 | def init_dataset(self): 137 | opts = self.opts 138 | self.resnet_transform = torchvision.transforms.Normalize( 139 | mean=[0.485, 0.456, 0.406], 140 | std=[0.229, 0.224, 0.225]) 141 | 142 | if opts.voxel_size < 64: 143 | self.downsample_voxels = True 144 | self.downsampler = render_utils.Downsample( 145 | 64//opts.voxel_size, use_max=True, batch_mode=True 146 | ).cuda(device_id=self.opts.gpu_id) 147 | else: 148 | self.downsampler = None 149 | 150 | if opts.classify_rot: 151 | self.quat_medoids = torch.from_numpy( 152 | scipy.io.loadmat(osp.join(opts.cache_dir, 'quat_medoids.mat'))['medoids']).type(torch.FloatTensor) 153 | 154 | if not opts.pred_voxels: 155 | network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name) 156 | self.load_network( 157 | self.voxel_decoder, 158 | 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir) 159 | self.voxel_decoder.eval() 160 | self.voxel_decoder = self.voxel_decoder.cuda(device_id=self.opts.gpu_id) 161 | 162 | def decode_shape(self, pred_shape): 163 | opts = self.opts 164 | pred_shape = torch.nn.functional.sigmoid( 165 | self.voxel_decoder.forward(pred_shape) 166 | ) 167 | return pred_shape 168 | 169 | def decode_rotation(self, pred_rot): 170 | opts = self.opts 171 | if opts.classify_rot: 172 | _, bin_inds = torch.max(pred_rot.data.cpu(), 1) 173 | pred_rot = Variable(suncg_parse.bininds_to_quats( 174 | bin_inds, self.quat_medoids), requires_grad=False) 175 | return pred_rot 176 | 177 | def set_input(self, batch): 178 | opts = self.opts 179 | rois = suncg_parse.bboxes_to_rois(batch['bboxes_test_proposals']) 180 | 181 | # Inputs for prediction 182 | input_imgs_fine = batch['img_fine'].type(torch.FloatTensor) 183 | input_imgs = batch['img'].type(torch.FloatTensor) 184 | 185 | self.input_imgs_orig = Variable( 186 | input_imgs.cuda(device=opts.gpu_id), requires_grad=False) 187 | 188 | for b in range(input_imgs_fine.size(0)): 189 | input_imgs_fine[b] = self.resnet_transform(input_imgs_fine[b]) 190 | input_imgs[b] = self.resnet_transform(input_imgs[b]) 191 | 192 | self.input_imgs = Variable( 193 | input_imgs.cuda(device=opts.gpu_id), requires_grad=False) 194 | 195 | self.input_imgs_fine = Variable( 196 | input_imgs_fine.cuda(device=opts.gpu_id), requires_grad=False) 197 | 198 | self.rois = Variable( 199 | rois.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False) 200 | 201 | def filter_pos(self, codes, pos_inds): 202 | pos_inds = torch.from_numpy(np.array(pos_inds)).squeeze() 203 | pos_inds = torch.autograd.Variable( 204 | pos_inds.type(torch.LongTensor).cuda(), requires_grad=False) 205 | filtered_codes = [torch.index_select(code, 0, pos_inds) for code in codes] 206 | return filtered_codes 207 | 208 | def predict_factored3d(self): 209 | codes_pred_all, labels_pred = self.oc3d_model.forward( 210 | (self.input_imgs_fine, self.input_imgs, self.rois)) 211 | scores_pred = labels_pred.cpu().data.numpy() 212 | bboxes_pred = self.rois.data.cpu().numpy()[:, 1:] 213 | min_score_vis = np.minimum(0.7, np.max(scores_pred)) 214 | pos_inds_vis = metrics.nms( 215 | np.concatenate((bboxes_pred, scores_pred), axis=1), 216 | 0.3, min_score=min_score_vis) 217 | 218 | codes_pred_vis = self.filter_pos(codes_pred_all, pos_inds_vis) 219 | rois_pos_vis = self.filter_pos([self.rois], pos_inds_vis)[0] 220 | codes_pred_vis[0] = self.decode_shape(codes_pred_vis[0]) 221 | codes_pred_vis[2] = self.decode_rotation(codes_pred_vis[2]) 222 | 223 | layout_pred = self.layout_model.forward(self.input_imgs_orig) 224 | return codes_pred_vis, layout_pred 225 | 226 | def predict_depth(self): 227 | depth_pred = self.depth_model.forward(self.input_imgs_orig) 228 | return depth_pred 229 | 230 | def predict_scene_voxels(self): 231 | scene_voxels_pred = self.scene_voxels_model.forward(self.input_imgs_orig) 232 | return scene_voxels_pred 233 | 234 | 235 | class DemoRenderer(): 236 | def __init__(self, opts): 237 | self.opts = opts 238 | self.mesh_dir = osp.join(opts.rendering_dir, opts.name) 239 | if not os.path.exists(self.mesh_dir): 240 | os.makedirs(self.mesh_dir) 241 | 242 | def save_layout_mesh(self, mesh_dir, layout, prefix='layout'): 243 | opts = self.opts 244 | layout_vis = layout.data[0].cpu().numpy().transpose((1,2,0)) 245 | vs, fs = render_utils.dispmap_to_mesh( 246 | layout_vis, 247 | suncg_parse.cam_intrinsic(), 248 | scale_x=self.opts.layout_width/640, 249 | scale_y=self.opts.layout_height/480 250 | ) 251 | mesh_file = osp.join(self.mesh_dir, prefix + '.obj') 252 | fout = open(mesh_file, 'w') 253 | render_utils.append_obj(fout, vs, fs) 254 | fout.close() 255 | 256 | def save_codes_mesh(self, mesh_dir, code_vars, prefix='codes'): 257 | n_rois = code_vars[0].size()[0] 258 | code_list = suncg_parse.uncollate_codes(code_vars, 1, torch.Tensor(n_rois).fill_(0)) 259 | mesh_file = osp.join(mesh_dir, prefix + '.obj') 260 | render_utils.save_parse(mesh_file, code_list[0], save_objectwise=False, thresh=0.1) 261 | 262 | def render_visuals(self, mesh_dir, obj_name=None): 263 | png_dir = osp.join(mesh_dir, 'rendering') 264 | if obj_name is not None: 265 | render_utils.render_mesh(osp.join(mesh_dir, obj_name + '.obj'), png_dir) 266 | im_view1 = scipy.misc.imread(osp.join(png_dir, '{}_render_000.png'.format(obj_name))) 267 | im_view2 = scipy.misc.imread(osp.join(png_dir, '{}_render_003.png'.format(obj_name))) 268 | else: 269 | render_utils.render_directory(mesh_dir, png_dir) 270 | im_view1 = scipy.misc.imread(osp.join(png_dir, 'render_000.png')) 271 | im_view2 = scipy.misc.imread(osp.join(png_dir, 'render_003.png')) 272 | return im_view1, im_view2 273 | 274 | def render_factored3d(self, codes, layout): 275 | os.system('rm {}/*.obj'.format(self.mesh_dir)) 276 | self.save_codes_mesh(self.mesh_dir, codes) 277 | self.save_layout_mesh(self.mesh_dir, layout) 278 | return self.render_visuals(self.mesh_dir) 279 | 280 | def render_scene_vox(self, scene_vox): 281 | opts = self.opts 282 | os.system('rm {}/*.obj'.format(self.mesh_dir)) 283 | voxels = scene_vox.data.cpu()[0,0].numpy() 284 | 285 | mesh_file = osp.join(self.mesh_dir, 'scene_vox.obj') 286 | vs, fs = render_utils.voxels_to_mesh(voxels.astype(np.float32), thresh=0.25) 287 | vs[:,0] -= voxels.shape[0]/2.0 288 | vs[:,1] -= voxels.shape[1]/2.0 289 | vs *= 0.04*(64//opts.voxels_height) 290 | fout = open(mesh_file, 'w') 291 | render_utils.append_obj(fout, vs, fs) 292 | fout.close() 293 | return self.render_visuals(self.mesh_dir, obj_name='scene_vox') 294 | 295 | def render_depth(self, dmap): 296 | opts = self.opts 297 | os.system('rm {}/*.obj'.format(self.mesh_dir)) 298 | dmap_pred = dmap.data[0].cpu().numpy().transpose((1,2,0)) 299 | mesh_file = osp.join(self.mesh_dir, 'depth.obj') 300 | dmap_points = render_utils.dispmap_to_points( 301 | dmap_pred, 302 | suncg_parse.cam_intrinsic(), 303 | scale_x=self.opts.layout_width/640, 304 | scale_y=self.opts.layout_height/480 305 | ) 306 | 307 | vs, fs = render_utils.points_to_cubes(dmap_points) 308 | fout = open(mesh_file, 'w') 309 | render_utils.append_obj(fout, vs, fs) 310 | fout.close() 311 | 312 | return self.render_visuals(self.mesh_dir, obj_name='depth') 313 | -------------------------------------------------------------------------------- /data/suncg.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import os 5 | import os.path as osp 6 | import numpy as np 7 | import collections 8 | 9 | import scipy.misc 10 | import scipy.linalg 11 | import scipy.io as sio 12 | import scipy.ndimage.interpolation 13 | from absl import flags 14 | 15 | import torch 16 | from torch.utils.data import Dataset 17 | from torch.utils.data import DataLoader 18 | from torch.utils.data.dataloader import default_collate 19 | 20 | from ..utils import suncg_parse 21 | 22 | from ..renderer import utils as render_utils 23 | 24 | #-------------- flags -------------# 25 | #----------------------------------# 26 | flags.DEFINE_string('suncg_dir', '/data0/shubhtuls/datasets/suncg_pbrs_release', 'Suncg Data Directory') 27 | flags.DEFINE_boolean('filter_objects', True, 'Restrict object classes to main semantic classes.') 28 | flags.DEFINE_integer('max_views_per_house', 0, '0->use all views. Else we randomly select upto the specified number.') 29 | 30 | flags.DEFINE_boolean('suncg_dl_out_codes', True, 'Should the data loader load codes') 31 | flags.DEFINE_boolean('suncg_dl_out_layout', False, 'Should the data loader load layout') 32 | flags.DEFINE_boolean('suncg_dl_out_depth', False, 'Should the data loader load modal depth') 33 | flags.DEFINE_boolean('suncg_dl_out_fine_img', True, 'We should output fine images') 34 | flags.DEFINE_boolean('suncg_dl_out_voxels', False, 'We should output scene voxels') 35 | flags.DEFINE_boolean('suncg_dl_out_proposals', False, 'We should edgebox proposals for training') 36 | flags.DEFINE_boolean('suncg_dl_out_test_proposals', False, 'We should edgebox proposals for testing') 37 | flags.DEFINE_integer('suncg_dl_max_proposals', 40, 'Max number of proposals per image') 38 | 39 | flags.DEFINE_integer('img_height', 128, 'image height') 40 | flags.DEFINE_integer('img_width', 256, 'image width') 41 | 42 | flags.DEFINE_integer('img_height_fine', 480, 'image height') 43 | flags.DEFINE_integer('img_width_fine', 640, 'image width') 44 | 45 | flags.DEFINE_integer('layout_height', 64, 'amodal depth height : should be half image height') 46 | flags.DEFINE_integer('layout_width', 128, 'amodal depth width : should be half image width') 47 | 48 | flags.DEFINE_integer('voxels_height', 32, 'scene voxels height. Should be half of width and depth.') 49 | flags.DEFINE_integer('voxels_width', 64, 'scene voxels width') 50 | flags.DEFINE_integer('voxels_depth', 64, 'scene voxels depth') 51 | flags.DEFINE_boolean('suncg_dl_debug_mode', False, 'Just running for debugging, should not preload ojects') 52 | 53 | #------------- Dataset ------------# 54 | #----------------------------------# 55 | class SuncgDataset(Dataset): 56 | '''SUNCG data loader''' 57 | def __init__(self, house_names, opts): 58 | self._suncg_dir = opts.suncg_dir 59 | 60 | self._house_names = house_names 61 | self.img_size = (opts.img_height, opts.img_width) 62 | self.output_fine_img = opts.suncg_dl_out_fine_img 63 | if self.output_fine_img: 64 | self.img_size_fine = (opts.img_height_fine, opts.img_width_fine) 65 | self.output_codes = opts.suncg_dl_out_codes 66 | self.output_layout = opts.suncg_dl_out_layout 67 | self.output_modal_depth = opts.suncg_dl_out_depth 68 | self.output_voxels = opts.suncg_dl_out_voxels 69 | self.output_proposals = opts.suncg_dl_out_proposals 70 | self.output_test_proposals = opts.suncg_dl_out_test_proposals 71 | 72 | if self.output_layout or self.output_modal_depth: 73 | self.layout_size = (opts.layout_height, opts.layout_width) 74 | if self.output_voxels: 75 | self.voxels_size = (opts.voxels_width, opts.voxels_height, opts.voxels_depth) 76 | 77 | if self.output_proposals: 78 | self.max_proposals = opts.suncg_dl_max_proposals 79 | if self.output_codes: 80 | self.max_rois = opts.max_rois 81 | self._obj_loader = suncg_parse.ObjectLoader(osp.join(opts.suncg_dir, 'object')) 82 | if not opts.suncg_dl_debug_mode: 83 | self._obj_loader.preload() 84 | if opts.filter_objects: 85 | self._meta_loader = suncg_parse.MetaLoader(osp.join(opts.suncg_dir, 'ModelCategoryMappingEdited.csv')) 86 | else: 87 | self._meta_loader = None 88 | 89 | data_tuples = [] 90 | for hx, house in enumerate(house_names): 91 | if (hx % 1000) == 0: 92 | print('Reading image names from house {}/{}'.format(hx, len(house_names))) 93 | imgs_dir = osp.join(opts.suncg_dir, 'renderings_ldr', house) 94 | view_ids = [f[0:6] for f in os.listdir(imgs_dir)] 95 | 96 | rng = np.random.RandomState([ord(c) for c in house]) 97 | rng.shuffle(view_ids) 98 | 99 | if (opts.max_views_per_house > 0) and (opts.max_views_per_house < len(view_ids)): 100 | view_ids = view_ids[0:opts.max_views_per_house] 101 | for view_id in view_ids: 102 | data_tuples.append((house, view_id)) 103 | self.n_imgs = len(data_tuples) 104 | self._data_tuples = data_tuples 105 | self._preload_cameras(house_names) 106 | 107 | def forward_img(self, index): 108 | house, view_id = self._data_tuples[index] 109 | img = scipy.misc.imread(osp.join(self._suncg_dir, 'renderings_ldr', house, view_id + '_mlt.png')) 110 | if self.output_fine_img: 111 | img_fine = scipy.misc.imresize(img, self.img_size_fine) 112 | img_fine = np.transpose(img_fine, (2,0,1)) 113 | 114 | img = scipy.misc.imresize(img, self.img_size) 115 | img = np.transpose(img, (2,0,1)) 116 | if self.output_fine_img: 117 | return img/255, img_fine/255, house, view_id 118 | else: 119 | return img/255, house, view_id 120 | 121 | def _preload_cameras(self, house_names): 122 | self._house_cameras = {} 123 | for hx, house in enumerate(house_names): 124 | if (hx % 200) == 0: 125 | print('Pre-loading cameras from house {}/{}'.format(hx, len(house_names))) 126 | cam_file = osp.join(self._suncg_dir, 'camera', house, 'room_camera.txt') 127 | camera_poses = suncg_parse.read_camera_pose(cam_file) 128 | self._house_cameras[house] = camera_poses 129 | 130 | def forward_codes(self, house_name, view_id): 131 | #print('Loading Codes for {}_{}'.format(house_name, view_id)) 132 | campose = self._house_cameras[house_name][int(view_id)] 133 | cam2world = suncg_parse.campose_to_extrinsic(campose) 134 | world2cam = scipy.linalg.inv(cam2world) 135 | 136 | house_data = suncg_parse.load_json( 137 | osp.join(self._suncg_dir, 'house', house_name, 'house.json')) 138 | bbox_data = sio.loadmat( 139 | osp.join(self._suncg_dir, 'bboxes_node', house_name, view_id + '_bboxes.mat')) 140 | objects_data, objects_bboxes = suncg_parse.select_ids( 141 | house_data, bbox_data, meta_loader=self._meta_loader, min_pixels=500) 142 | objects_codes = suncg_parse.codify_room_data( 143 | objects_data, world2cam, self._obj_loader) 144 | objects_bboxes -= 1 #0 indexing to 1 indexing 145 | if len(objects_codes) > self.max_rois: 146 | select_inds = np.random.permutation(len(objects_codes))[0:self.max_rois] 147 | objects_bboxes = objects_bboxes[select_inds, :] 148 | objects_codes = [objects_codes[ix] for ix in select_inds] 149 | return objects_codes, objects_bboxes 150 | 151 | def forward_proposals(self, house_name, view_id, codes_gt, bboxes_gt): 152 | proposals_data = sio.loadmat( 153 | osp.join(self._suncg_dir, 'edgebox_proposals', house_name, view_id + '_proposals.mat')) 154 | bboxes_proposals = proposals_data['proposals'][:,0:4] 155 | bboxes_proposals -= 1 #zero indexed 156 | codes, bboxes, labels = suncg_parse.extract_proposal_codes( 157 | codes_gt, bboxes_gt, bboxes_proposals, self.max_proposals) 158 | return codes, bboxes, labels 159 | 160 | def forward_test_proposals(self, house_name, view_id): 161 | proposals_data = sio.loadmat( 162 | osp.join(self._suncg_dir, 'edgebox_proposals', house_name, view_id + '_proposals.mat')) 163 | bboxes_proposals = proposals_data['proposals'][:,0:4] 164 | bboxes_proposals -= 1 #zero indexed 165 | return bboxes_proposals 166 | 167 | def forward_layout(self, house_name, view_id, bg_depth=1e4): 168 | depth_im = scipy.misc.imread(osp.join( 169 | self._suncg_dir, 'renderings_layout', house_name, view_id + '_depth.png')) 170 | depth_im = depth_im.astype(np.float)/1000.0 # depth was saved in mm 171 | depth_im += bg_depth*np.equal(depth_im,0).astype(np.float) 172 | disp_im = 1./depth_im 173 | amodal_depth = scipy.ndimage.interpolation.zoom( 174 | disp_im, (self.layout_size[0]/disp_im.shape[0], self.layout_size[1]/disp_im.shape[1]), order=0) 175 | amodal_depth = np.reshape(amodal_depth, (1, self.layout_size[0], self.layout_size[1])) 176 | return amodal_depth 177 | 178 | def forward_depth(self, house_name, view_id, bg_depth=1e4): 179 | depth_im = scipy.misc.imread(osp.join( 180 | self._suncg_dir, 'renderings_depth', house_name, view_id + '_depth.png')) 181 | depth_im = depth_im.astype(np.float)/1000.0 # depth was saved in mm 182 | depth_im += bg_depth*np.equal(depth_im,0).astype(np.float) 183 | disp_im = 1./depth_im 184 | modal_depth = scipy.ndimage.interpolation.zoom( 185 | disp_im, (self.layout_size[0]/disp_im.shape[0], self.layout_size[1]/disp_im.shape[1]), order=0) 186 | modal_depth = np.reshape(modal_depth, (1, self.layout_size[0], self.layout_size[1])) 187 | return modal_depth 188 | 189 | def forward_voxels(self, house_name, view_id): 190 | scene_voxels = sio.loadmat(osp.join( 191 | self._suncg_dir, 'scene_voxels', house_name, view_id + '_voxels.mat')) 192 | scene_voxels = render_utils.downsample( 193 | scene_voxels['sceneVox'].astype(np.float32), 194 | 64//self.voxels_size[1], use_max=True) 195 | return scene_voxels 196 | 197 | def __len__(self): 198 | return self.n_imgs 199 | 200 | def __getitem__(self, index): 201 | if self.output_fine_img: 202 | img, img_fine, house_name, view_id = self.forward_img(index) 203 | else: 204 | img, house_name, view_id = self.forward_img(index) 205 | 206 | elem = { 207 | 'img': img, 208 | 'house_name': house_name, 209 | 'view_id': view_id, 210 | } 211 | if self.output_layout: 212 | layout = self.forward_layout(house_name, view_id) 213 | elem['layout'] = layout 214 | 215 | if self.output_voxels: 216 | voxels = self.forward_voxels(house_name, view_id) 217 | elem['voxels'] = voxels 218 | 219 | if self.output_modal_depth: 220 | depth = self.forward_depth(house_name, view_id) 221 | elem['depth'] = depth 222 | 223 | if self.output_codes: 224 | codes_gt, bboxes_gt = self.forward_codes(house_name, view_id) 225 | elem['codes'] = codes_gt 226 | elem['bboxes'] = bboxes_gt 227 | 228 | if self.output_proposals: 229 | codes_proposals, bboxes_proposals, labels_proposals = self.forward_proposals( 230 | house_name, view_id, codes_gt, bboxes_gt) 231 | if labels_proposals.size == 0: 232 | print('No proposal found: ', house_name, view_id) 233 | elem['codes_proposals'] = codes_proposals 234 | elem['bboxes_proposals'] = bboxes_proposals 235 | elem['labels_proposals'] = labels_proposals 236 | 237 | if self.output_test_proposals: 238 | bboxes_proposals = self.forward_test_proposals(house_name, view_id) 239 | if bboxes_proposals.size == 0: 240 | print('No proposal found: ', house_name, view_id) 241 | elem['bboxes_test_proposals'] = bboxes_proposals 242 | 243 | if self.output_fine_img: 244 | elem['img_fine'] = img_fine 245 | 246 | #print('House : {}, View : {}, Code Length : {}'.format(house_name, view_id, len(code))) 247 | return elem 248 | 249 | #-------- Collate Function --------# 250 | #----------------------------------# 251 | def recursive_convert_to_torch(elem): 252 | if torch.is_tensor(elem): 253 | return elem 254 | elif type(elem).__module__ == 'numpy': 255 | if elem.size == 0: 256 | return torch.zeros(elem.shape).type(torch.DoubleTensor) 257 | else: 258 | return torch.from_numpy(elem) 259 | elif isinstance(elem, int): 260 | return torch.LongTensor([elem]) 261 | elif isinstance(elem, float): 262 | return torch.DoubleTensor([elem]) 263 | elif isinstance(elem, collections.Mapping): 264 | return {key: recursive_convert_to_torch(d[key]) for key in elem} 265 | elif isinstance(elem, collections.Sequence): 266 | return [recursive_convert_to_torch(samples) for samples in elem] 267 | else: 268 | return elem 269 | 270 | def collate_fn(batch): 271 | '''SUNCG data collater. 272 | 273 | Assumes each instance is a dict. 274 | Applies different collation rules for each field. 275 | 276 | Args: 277 | batch: List of loaded elements via Dataset.__getitem__ 278 | ''' 279 | collated_batch = {} 280 | # iterate over keys 281 | for key in batch[0]: 282 | if key =='codes' or key=='bboxes' or key=='codes_proposals' or key=='bboxes_proposals' or key=='bboxes_test_proposals': 283 | collated_batch[key] = [recursive_convert_to_torch(elem[key]) for elem in batch] 284 | elif key == 'labels_proposals': 285 | collated_batch[key] = torch.cat([default_collate(elem[key]) for elem in batch if elem[key].size > 0]) 286 | else: 287 | collated_batch[key] = default_collate([elem[key] for elem in batch]) 288 | return collated_batch 289 | 290 | #----------- Data Loader ----------# 291 | #----------------------------------# 292 | def suncg_data_loader(house_names, opts): 293 | dset = SuncgDataset(house_names, opts) 294 | return DataLoader( 295 | dset, batch_size=opts.batch_size, 296 | shuffle=True, num_workers=opts.n_data_workers, 297 | collate_fn=collate_fn) 298 | 299 | 300 | def suncg_data_loader_benchmark(house_names, opts): 301 | dset = SuncgDataset(house_names, opts) 302 | return DataLoader( 303 | dset, batch_size=opts.batch_size, 304 | shuffle=False, num_workers=opts.n_data_workers, 305 | collate_fn=collate_fn) -------------------------------------------------------------------------------- /preprocess/suncg/matUtils/read_wobj_safe.m: -------------------------------------------------------------------------------- 1 | function OBJ=read_wobj(fullfilename) 2 | % Read the objects from a Wavefront OBJ file 3 | % 4 | % OBJ=read_wobj(filename); 5 | % 6 | % OBJ struct containing: 7 | % 8 | % OBJ.vertices : Vertices coordinates 9 | % OBJ.vertices_texture: Texture coordinates 10 | % OBJ.vertices_normal : Normal vectors 11 | % OBJ.vertices_point : Vertice data used for points and lines 12 | % OBJ.material : Parameters from external .MTL file, will contain parameters like 13 | % newmtl, Ka, Kd, Ks, illum, Ns, map_Ka, map_Kd, map_Ks, 14 | % example of an entry from the material object: 15 | % OBJ.material(i).type = newmtl 16 | % OBJ.material(i).data = 'vase_tex' 17 | % OBJ.objects : Cell object with all objects in the OBJ file, 18 | % example of a mesh object: 19 | % OBJ.objects(i).type='f' 20 | % OBJ.objects(i).data.vertices: [n x 3 double] 21 | % OBJ.objects(i).data.texture: [n x 3 double] 22 | % OBJ.objects(i).data.normal: [n x 3 double] 23 | % 24 | % Example, 25 | % OBJ=read_wobj('examples\example10.obj'); 26 | % FV.vertices=OBJ.vertices; 27 | % FV.faces=OBJ.objects(3).data.vertices; 28 | % figure, patch(FV,'facecolor',[1 0 0]); camlight 29 | % 30 | % Function is written by D.Kroon University of Twente (June 2010) 31 | 32 | verbose=false; 33 | 34 | if(exist('fullfilename','var')==0) 35 | [filename, filefolder] = uigetfile('*.obj', 'Read obj-file'); 36 | fullfilename = [filefolder filename]; 37 | end 38 | filefolder = fileparts( fullfilename); 39 | if(verbose),disp(['Reading Object file : ' fullfilename]); end 40 | 41 | 42 | % Read the DI3D OBJ textfile to a cell array 43 | file_words = file2cellarray( fullfilename); 44 | % Remove empty cells, merge lines split by "\" and convert strings with values to double 45 | [ftype fdata]= fixlines(file_words); 46 | 47 | % Vertex data 48 | vertices=[]; nv=0; 49 | vertices_texture=[]; nvt=0; 50 | vertices_point=[]; nvp=0; 51 | vertices_normal=[]; nvn=0; 52 | material=[]; 53 | 54 | % Surface data 55 | no=0; 56 | 57 | % Loop through the Wavefront object file 58 | for iline=1:length(ftype) 59 | if(mod(iline,10000)==0), 60 | if(verbose),disp(['Lines processed : ' num2str(iline)]); end 61 | end 62 | 63 | type=ftype{iline}; data=fdata{iline}; 64 | 65 | % Switch on data type line 66 | switch(type) 67 | case{'mtllib'} 68 | if(iscell(data)) 69 | datanew=[]; 70 | for i=1:length(data) 71 | datanew=[datanew data{i}]; 72 | if(i1), 156 | val=tvals(2); 157 | if(val<0), val=val+1+nvt; end 158 | array_texture(i)=val; 159 | end 160 | end 161 | objects(no).type='l'; 162 | objects(no).data.vertices=array_vertices; 163 | objects(no).data.texture=array_texture; 164 | case('f') 165 | no=no+1; if(mod(no,10000)==1), objects(no+10001).data=0; end 166 | array_vertices=[]; 167 | array_texture=[]; 168 | array_normal=[]; 169 | for i=1:length(data); 170 | switch class(data) 171 | case 'cell' 172 | tvals=str2double(stringsplit(data{i},'/')); 173 | case 'string' 174 | tvals=str2double(stringsplit(data,'/')); 175 | otherwise 176 | tvals=data(i); 177 | end 178 | val=tvals(1); 179 | 180 | if(val<0), val=val+1+nv; end 181 | array_vertices(i)=val; 182 | if(length(tvals)>1), 183 | if(isfinite(tvals(2))) 184 | val=tvals(2); 185 | if(val<0), val=val+1+nvt; end 186 | array_texture(i)=val; 187 | end 188 | end 189 | if(length(tvals)>2), 190 | val=tvals(3); 191 | if(val<0), val=val+1+nvn; end 192 | array_normal(i)=val; 193 | end 194 | end 195 | 196 | % A face of more than 3 indices is always split into 197 | % multiple faces of only 3 indices. 198 | objects(no).type='f'; 199 | findex=1:min (3,length(array_vertices)); 200 | 201 | objects(no).data.vertices=array_vertices(findex); 202 | if(~isempty(array_texture)),objects(no).data.texture=array_texture(findex); end 203 | if(~isempty(array_normal)),objects(no).data.normal=array_normal(findex); end 204 | for i=1:length(array_vertices)-3; 205 | no=no+1; if(mod(no,10000)==1), objects(no+10001).data=0; end 206 | findex=[1 2+i 3+i]; 207 | findex(findex>length(array_vertices))=findex(findex>length(array_vertices))-length(array_vertices); 208 | objects(no).type='f'; 209 | objects(no).data.vertices=array_vertices(findex); 210 | if(~isempty(array_texture)),objects(no).data.texture=array_texture(findex); end 211 | if(~isempty(array_normal)),objects(no).data.normal=array_normal(findex); end 212 | end 213 | case{'#','$'} 214 | % Comment 215 | tline=' %'; 216 | if(iscell(data)) 217 | for i=1:length(data), tline=[tline ' ' data{i}]; end 218 | else 219 | tline=[tline data]; 220 | end 221 | if(verbose), disp(tline); end 222 | case{''} 223 | otherwise 224 | no=no+1; 225 | if(mod(no,10000)==1), objects(no+10001).data=0; end 226 | objects(no).type=type; 227 | objects(no).data=data; 228 | end 229 | end 230 | 231 | % Initialize new object list, which will contain the "collapsed" objects 232 | objects2(no).data=0; 233 | 234 | index=0; 235 | 236 | i=0; 237 | while (ichar')'; 302 | fclose(fid); 303 | file_lines = regexp(file_text, '\n+', 'split'); 304 | file_words = regexp(file_lines, '\s+', 'split'); 305 | 306 | function [ftype fdata]=fixlines(file_words) 307 | ftype=cell(size(file_words)); 308 | fdata=cell(size(file_words)); 309 | 310 | iline=0; jline=0; 311 | while(iline thresh: 29 | radius = pred_vol[x,y,z] 30 | v_all[v_counter:v_counter+8,:] *= radius 31 | v_all[v_counter:v_counter+8,:] += (np.array([[x, y, z]]) + 0.5) 32 | v_counter += 8 33 | 34 | return v_all, f_all 35 | 36 | 37 | def voxels_to_points(pred_vol, thresh=0.5): 38 | v_counter = 0 39 | tot_points = np.greater(pred_vol, thresh).sum() 40 | v_all = np.zeros([tot_points, 3]) 41 | for x in range(pred_vol.shape[0]): 42 | for y in range(pred_vol.shape[1]): 43 | for z in range(pred_vol.shape[2]): 44 | if pred_vol[x,y,z] > thresh: 45 | v_all[v_counter,:] = (np.array([[x, y, z]]) + 0.5) 46 | v_counter += 1 47 | return v_all 48 | 49 | 50 | def append_obj(mf_handle, vertices, faces): 51 | for vx in range(vertices.shape[0]): 52 | mf_handle.write('v {:f} {:f} {:f}\n'.format(vertices[vx, 0], vertices[vx, 1], vertices[vx, 2])) 53 | for fx in range(faces.shape[0]): 54 | mf_handle.write('f {:d} {:d} {:d}\n'.format(faces[fx, 0], faces[fx, 1], faces[fx, 2])) 55 | return 56 | 57 | 58 | def append_mtl_obj(mf_handle, vertices, faces, mtl_ids): 59 | for vx in range(vertices.shape[0]): 60 | mf_handle.write('v {:f} {:f} {:f}\n'.format(vertices[vx, 0], vertices[vx, 1], vertices[vx, 2])) 61 | for fx in range(faces.shape[0]): 62 | mf_handle.write('usemtl m{}\n'.format(mtl_ids[fx])) 63 | mf_handle.write('f {:d} {:d} {:d}\n'.format(faces[fx, 0], faces[fx, 1], faces[fx, 2])) 64 | return 65 | 66 | 67 | def append_mtl(mtl_handle, mtl_ids, colors): 68 | for mx in range(len(mtl_ids)): 69 | mtl_handle.write('newmtl m{}\n'.format(mtl_ids[mx])) 70 | mtl_handle.write('Kd {:f} {:f} {:f}\n'.format(colors[mx, 0], colors[mx, 1], colors[mx, 2])) 71 | mtl_handle.write('Ka 0 0 0\n') 72 | return 73 | 74 | def render_mesh(mesh_file, png_dir, scale=0.5): 75 | cmd = 'python3.4 {:s}/render_script.py --obj_file {:s} --out_dir {:s} --r 2 --delta_theta 30 --sz_x {} --sz_y {} >> /dev/null 2>&1'.format(blender_dir, mesh_file, png_dir, int(640*scale), int(480*scale)) 76 | os.system(cmd) 77 | return 78 | 79 | def render_directory(mesh_dir, png_dir, scale=0.5): 80 | cmd = 'python3.4 {:s}/render_dir_script.py --obj_dir {:s} --out_dir {:s} --r 2 --delta_theta 30 --sz_x {} --sz_y {} >> /dev/null 2>&1'.format(blender_dir, mesh_dir, png_dir, int(640*scale), int(480*scale)) 81 | os.system(cmd) 82 | return 83 | 84 | class Downsample(torch.nn.Module): 85 | def __init__(self, s, use_max=False, batch_mode=False): 86 | super(Downsample, self).__init__() 87 | self.batch_mode = batch_mode 88 | if(use_max): 89 | layer = torch.nn.MaxPool3d(s, stride=s) 90 | else: 91 | layer = torch.nn.Conv3d(1, 1, s, stride=s) 92 | layer.weight.data.fill_(1./layer.weight.data.nelement()) 93 | layer.bias.data.fill_(0) 94 | self.layer = layer 95 | 96 | def forward(self, vol): 97 | if self.batch_mode: 98 | out_vol = self.layer.forward(vol) 99 | else: 100 | out_vol = self.layer.forward(torch.unsqueeze(torch.unsqueeze(vol, 0), 0))[0,0] 101 | return out_vol 102 | 103 | def downsample(vol, s, use_max=False): 104 | module = Downsample(s, use_max=use_max).type(torch.FloatTensor) 105 | vol_var = Variable(torch.from_numpy(vol), requires_grad=False).type(torch.FloatTensor) 106 | return module.forward(vol_var).data.numpy() 107 | 108 | def prediction_to_entity(pred): 109 | if torch.is_tensor(pred[0]): 110 | pred = [p.numpy() for p in pred] 111 | volume = pred[0].astype(np.float) 112 | transform = pred[1].astype(np.float) 113 | if transform.shape[0] == 4 and transform.shape[1]==4: 114 | return volume, transform 115 | else: 116 | scale_mat = np.diag(pred[1].astype(np.float)) 117 | rot_mat = transformations.quaternion_matrix(pred[2].astype(np.float))[0:3, 0:3] 118 | transform = np.eye(4) 119 | transform[0:3, 0:3] = np.matmul(rot_mat, scale_mat) 120 | transform[0:3, 3] = pred[3].astype(np.float) 121 | return volume, transform 122 | 123 | def save_parse(mesh_file, codes, thresh=0.5, use_soft_voxels=True, save_objectwise=False): 124 | mtl_file = mesh_file.replace('.obj','.mtl') 125 | fout_mtl = open(mtl_file, 'w') 126 | mtl_file = mtl_file.split('/')[-1] 127 | 128 | n_parts = len(codes) 129 | color_inds = np.linspace(0, 255, n_parts).astype(np.int).tolist() 130 | for p in range(n_parts): 131 | cmap = colormap[color_inds[p]] 132 | fout_mtl.write( 133 | 'newmtl m{:d}\nKd {:f} {:f} {:f}\nKa 0 0 0\n'.format(p, cmap[0], cmap[1], cmap[2])) 134 | fout_mtl.close() 135 | if not save_objectwise: 136 | fout = open(mesh_file, 'w') 137 | fout.write('mtllib {:s}\n'.format(mtl_file)) 138 | 139 | f_counter = 0 140 | for p in range(n_parts): 141 | volume, transform = prediction_to_entity(codes[p]) 142 | if save_objectwise: 143 | fout = open(mesh_file.replace('.obj', '_' + str(p) + '.obj'), 'w') 144 | fout.write('mtllib {:s}\n'.format(mtl_file)) 145 | volume = downsample(volume, volume.shape[0]//32) 146 | v, f = voxels_to_mesh(volume, thresh=thresh) 147 | v = v/32 - 0.5 148 | 149 | if v.size > 0: 150 | n_verts = v.shape[0] 151 | v_homographic = np.concatenate((v, np.ones((n_verts, 1))), axis=1).transpose() 152 | v_transformed = np.matmul(transform[0:3,:], v_homographic).transpose() 153 | fout.write('usemtl m{:d}\n'.format(p)) 154 | append_obj(fout, v_transformed, f + f_counter) 155 | 156 | if not save_objectwise: 157 | f_counter += n_verts 158 | 159 | if save_objectwise or p==(n_parts-1): 160 | fout.close() 161 | 162 | 163 | def codes_to_points(codes, thresh=0.5, objectwise=False): 164 | scene_verts = [] 165 | n_parts = len(codes) 166 | for p in range(n_parts): 167 | volume, transform = prediction_to_entity(codes[p]) 168 | volume = downsample(volume, volume.shape[0]//32) 169 | v = voxels_to_points(volume, thresh=thresh) 170 | v = v/32 - 0.5 171 | 172 | if v.size > 0: 173 | n_verts = v.shape[0] 174 | v_homographic = np.concatenate((v, np.ones((n_verts, 1))), axis=1).transpose() 175 | v_transformed = np.matmul(transform[0:3,:], v_homographic).transpose() 176 | scene_verts.append(v_transformed) 177 | 178 | if not objectwise: 179 | scene_verts = np.concatenate(scene_verts, axis=0) 180 | 181 | return scene_verts 182 | 183 | 184 | def dispmap_to_mesh(dmap, k_mat, scale_x=1, scale_y=1, min_disp=1e-2): 185 | ''' 186 | Converts a inverse depth map to a 3D point cloud. 187 | 188 | Args: 189 | dmap: H X W inverse depth map 190 | k_mat : 3 X 3 intrinsic matrix 191 | scale_x: Scale the intrinsic matrix's x row by this factor e.g. scale=0.5 implies downsampling by factor of 2 192 | scale_y: Scale the intrinsic matrix's y row by this factor e.g. scale=0.5 implies downsampling by factor of 2 193 | min_disp: Points with disp less than this are not rendered 194 | Returns: 195 | vs: n_pts X 3 [x,y,z] coordinates 196 | fs: mesh faces 197 | ''' 198 | H = np.shape(dmap)[0] 199 | W = np.shape(dmap)[1] 200 | dmap = dmap.reshape((H, W)) 201 | k_mat[0, :] = scale_x*k_mat[0, :] 202 | k_mat[1, :] = scale_y*k_mat[1, :] 203 | k_inv = np.linalg.inv(k_mat) 204 | num_pts = H*W 205 | pts = np.ones((3, num_pts)) 206 | ctr = 0 207 | for y in range(H): 208 | for x in range(W): 209 | pts[0, ctr] = x + 0.5 210 | pts[1, ctr] = y + 0.5 211 | pts[:, ctr] *= (1/dmap[y,x]) 212 | ctr += 1 213 | 214 | verts = np.transpose(np.matmul(k_inv, pts)) 215 | num_faces_max = H*W*2 216 | faces = np.zeros((num_faces_max, 3)) 217 | face_ctr = 0 218 | for y in range(H-1): 219 | for x in range(W-1): 220 | if (dmap[y,x] > min_disp) and (dmap[y,x+1] > min_disp) and (dmap[y+1,x+1] > min_disp): 221 | faces[face_ctr, 0] = y*W + x + 1 222 | faces[face_ctr, 1] = y*W + (x+1) + 1 223 | faces[face_ctr, 2] = (y+1)*W + (x+1) + 1 224 | face_ctr += 1 225 | 226 | if (dmap[y,x] > min_disp) and (dmap[y+1,x] > min_disp) and (dmap[y+1,x+1] > min_disp): 227 | faces[face_ctr, 0] = y*W + x + 1 228 | faces[face_ctr, 1] = (y+1)*W + x + 1 229 | faces[face_ctr, 2] = (y+1)*W + (x+1) + 1 230 | face_ctr += 1 231 | faces = faces[0:face_ctr, :] 232 | return verts, faces.astype(np.int) 233 | 234 | 235 | def dispmap_to_points(dmap, k_mat, scale_x=1, scale_y=1, min_disp=1e-2): 236 | ''' 237 | Converts a inverse depth map to a 3D point cloud. 238 | 239 | Args: 240 | dmap: H X W inverse depth map 241 | k_mat : 3 X 3 intrinsic matrix 242 | scale_x: Scale the intrinsic matrix's x row by this factor e.g. scale=0.5 implies downsampling by factor of 2 243 | scale_y: Scale the intrinsic matrix's y row by this factor e.g. scale=0.5 implies downsampling by factor of 2 244 | min_disp: Points with disp less than this are not rendered 245 | Returns: 246 | n_pts X 3 [x,y,z] coordinates 247 | ''' 248 | H = np.shape(dmap)[0] 249 | W = np.shape(dmap)[1] 250 | dmap = dmap.reshape((H, W)) 251 | k_mat[0, :] = scale_x*k_mat[0, :] 252 | k_mat[1, :] = scale_y*k_mat[1, :] 253 | k_inv = np.linalg.inv(k_mat) 254 | num_pts = np.sum(np.greater(dmap, min_disp)) 255 | pts = np.ones((3, num_pts)) 256 | ctr = 0 257 | for y in range(H): 258 | for x in range(W): 259 | if (dmap[y,x] > min_disp) and (ctr < num_pts): 260 | pts[0, ctr] = x + 0.5 261 | pts[1, ctr] = y + 0.5 262 | pts[:, ctr] *= (1/dmap[y,x]) 263 | ctr += 1 264 | return np.transpose(np.matmul(k_inv, pts)) 265 | 266 | 267 | def points_to_cubes(points, edge_size=0.05): 268 | ''' 269 | Converts an input point cloud to a set of cubes. 270 | 271 | Args: 272 | points: N X 3 array 273 | edge_size: cube edge size 274 | Returns: 275 | vs: vertices 276 | fs: faces 277 | ''' 278 | v_counter = 0 279 | tot_points = points.shape[0] 280 | v_all = np.tile(cube_v, [tot_points, 1]) 281 | f_all = np.tile(cube_f, [tot_points, 1]) 282 | f_offset = np.tile(np.linspace(0, 12*tot_points-1, 12*tot_points), 3).reshape(3, 12*tot_points).transpose() 283 | f_offset = (f_offset//12 * 8).astype(np.int) 284 | f_all += f_offset 285 | for px in range(points.shape[0]): 286 | v_all[v_counter:v_counter+8,:] *= edge_size 287 | v_all[v_counter:v_counter+8,:] += points[px, :] 288 | v_counter += 8 289 | 290 | return v_all, f_all 291 | --------------------------------------------------------------------------------