├── __init__.py
├── data
    ├── __init__.py
    └── suncg.py
├── demo
    ├── __init__.py
    ├── data
    │   ├── suncg_img.png
    │   └── suncg_proposals.mat
    ├── cli_demo.py
    └── demo_utils.py
├── utils
    ├── __init__.py
    ├── setup.py
    ├── bbox_utils.pyx
    ├── metrics.py
    ├── html.py
    ├── visutil.py
    ├── make_html.py
    └── visualizer.py
├── benchmark
    ├── __init__.py
    └── suncg
    │   ├── __init__.py
    │   ├── pr_plots.py
    │   ├── sc_plots.py
    │   └── evaluate_detection.py
├── experiments
    ├── __init__.py
    └── suncg
    │   ├── __init__.py
    │   ├── layout.py
    │   ├── voxels.py
    │   ├── box3d.py
    │   └── dwr.py
├── nnutils
    ├── __init__.py
    ├── loss_utils.py
    ├── roi_pool_py.py
    ├── voxel_net.py
    ├── test_utils.py
    ├── disp_net.py
    ├── net_blocks.py
    ├── train_utils.py
    └── oc_net.py
├── renderer
    ├── __init__.py
    └── utils.py
├── preprocess
    └── suncg
    │   ├── globals.m
    │   ├── matUtils
    │       ├── volume_params.m
    │       ├── mkdirOptional.m
    │       ├── quatDist.m
    │       ├── getFileNamesFromDirectory.m
    │       ├── bboxOverlap.m
    │       ├── get_scene_vox.m
    │       └── read_wobj_safe.m
    │   ├── precompute_gt_bboxes.m
    │   ├── render_node_indices.py
    │   ├── render_layout_depth.py
    │   ├── voxelize_objects.py
    │   ├── precompute_scene_voxels.m
    │   └── precompute_edge_boxes.m
├── docs
    ├── requirements.txt
    ├── evaluation.md
    ├── suncg_data.md
    ├── installation.md
    ├── preprocessing.md
    └── training.md
├── .gitignore
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nnutils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/renderer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmark/suncg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/suncg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/demo/data/suncg_img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shubhtuls/factored3d/HEAD/demo/data/suncg_img.png


--------------------------------------------------------------------------------
/preprocess/suncg/globals.m:
--------------------------------------------------------------------------------
1 | global suncgDir;
2 | suncgDir = '/data0/shubhtuls/datasets/suncg_pbrs_release';


--------------------------------------------------------------------------------
/demo/data/suncg_proposals.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shubhtuls/factored3d/HEAD/demo/data/suncg_proposals.mat


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | # python requirements.
 2 | pip>=9.0
 3 | jupyter
 4 | scipy
 5 | numpy
 6 | matplotlib
 7 | absl-py
 8 | cython
 9 | visdom
10 | 


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/volume_params.m:
--------------------------------------------------------------------------------
1 | voxSize = [128;64;128];
2 | voxUnit = 0.04;
3 | camK = [517.97,0,320;0,517.97,240;0,0,1];
4 | im_w = 640; 
5 | im_h = 480;


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/mkdirOptional.m:
--------------------------------------------------------------------------------
 1 | function [] = mkdirOptional(dirName)
 2 | %MKDIROPTIONAL Summary of this function goes here
 3 | %   Detailed explanation goes here
 4 | 
 5 | if(~exist(dirName,'dir'))
 6 |     mkdir(dirName)
 7 | end
 8 | 
 9 | end
10 | 
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *ipynb_checkpoints
 2 | demo/data
 3 | release_files
 4 | external
 5 | utils/bbox_utils.c
 6 | utils/build
 7 | utils/bbox_utils.so
 8 | renderer/blender/bpy
 9 | cachedir
10 | *.ipynb
11 | *.pyc
12 | ipyNb
13 | torchNb
14 | renderer/blender
15 | .timedBar
16 | *Debug*
17 | 


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/quatDist.m:
--------------------------------------------------------------------------------
 1 | function d = quatDist(q, QJ)
 2 |     nj = size(QJ,1);
 3 |     d = zeros(nj,1);
 4 |     for n = 1:nj
 5 |         d(n) = qDist(q, QJ(n,:));
 6 |     end
 7 | end
 8 | 
 9 | function d = qDist(q1,q2)
10 |     %disp(q1);
11 |     %disp(q2);
12 |     r1 = quat2dcm(q1);
13 |     r2 = quat2dcm(q2);
14 |     r_rel = r1'*r2;
15 |     d = norm(logm(r_rel),'fro')/sqrt(2);
16 | end


--------------------------------------------------------------------------------
/utils/setup.py:
--------------------------------------------------------------------------------
 1 | # Usage:
 2 | '''
 3 | python setup.py build_ext --inplace
 4 | rm -rf build/
 5 | mv factored3d/utils/bbox_utils.so ./
 6 | rm -rf factored3d/
 7 | '''
 8 | import numpy
 9 | from distutils.core import setup
10 | from Cython.Build import cythonize
11 | 
12 | setup(
13 |     name = "Bbox utils",
14 |     ext_modules = cythonize('bbox_utils.pyx'),  # accepts a glob pattern
15 |     include_dirs=[numpy.get_include()]
16 | )
17 | 


--------------------------------------------------------------------------------
/docs/evaluation.md:
--------------------------------------------------------------------------------
 1 | # Instructions to evaluate models and baselines
 2 | 
 3 | ### Pre-requisites
 4 | Install pcl and pcl-python. Instructions for Ubuntu:
 5 | 
 6 | ```
 7 | # add pcl repo
 8 | sudo add-apt-repository ppa:v-launchpad-jochen-sprickerhof-de/pcl -y
 9 | sudo apt-get update -y
10 | 
11 | # install pcl
12 | sudo apt-get install libpcl-all
13 | 
14 | # install python wrapper
15 | cd CODE_ROOT/external
16 | touch __init__.py
17 | git clone git@github.com:s-gupta/python-pcl.git pythonpcl && cd pythonpcl && make
18 | ```
19 | 
20 | Note that the example scripts below are for the validation set. Please modify the arguments and plotting functions to use test set for the final evaluation.
21 | ### Comparing Scene Representations
22 | ```
23 | # Launch comparison evaluation
24 | # launch jobs from one level above code directory
25 | python -m factored3d.benchmark.suncg.scene_comparison --num_train_epoch=1 --name=dwr_shape_ft --classify_rot --pred_voxels=True --use_context --eval_set=val
26 | 
27 | # Plot comparisons
28 | cd CODE_ROOT/benchmark/suncg
29 | python sc_plots.py
30 | ```
31 | 
32 | ### Object Detection with Reconstruction Evaluation
33 | ```
34 | # Launch detection setting object 3D prediction evaluation
35 | # launch jobs from one level above code directory
36 | python -m factored3d.benchmark.suncg.dwr --num_train_epoch=1 --name=dwr_shape_ft --classify_rot --pred_voxels=True --use_context   --eval_set=val
37 | 
38 | # Plot precision-recall curves
39 | cd CODE_ROOT/benchmark/suncg
40 | python pr_plots.py
41 | ```


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/getFileNamesFromDirectory.m:
--------------------------------------------------------------------------------
 1 | function [nameStruct] = getFileNamesFromDirectory(dirPath,varargin)
 2 | %Returns a cell array of names of all files of a specified format in a
 3 | %given directory
 4 | %   dir is the directory from which image names are required
 5 | %   varargin can be used to specify mode (path/name) and filetypes to be
 6 | %   read
 7 | %   Example Usage - getFileNamesFromDirectory(dir,'mode','path','types',{'.png', '.jpg'})
 8 | %   Default mode is 'name' (just returns filenames). Default 'types' is all
 9 | %   image types
10 | 
11 | %% Initializing Variables
12 | nVarargs = length(varargin);
13 | mode = 'name'; % mode can be 'path' or 'name'
14 | types = {'.jpg','.png', '.bmp', '.tiff', '.jpeg'}; % types is a cell array
15 | nameStruct = {};
16 | 
17 | %% processing varargin
18 | if(nVarargs > 0)
19 |     for i=1:(nVarargs/2)
20 |         if(strcmp(varargin{2*i-1},'mode'))
21 |             mode = varargin{2*i};
22 |         end
23 |         
24 |         if(strcmp(varargin{2*i-1},'types'))
25 |             types = varargin{2*i};
26 |         end
27 |     end
28 | end
29 | 
30 | %% Getting the names of the files
31 | for i = 1:length(types)
32 |     t = dir([dirPath,'/*',types{i}]);
33 |     if(size(t,1) > 0)
34 |         nameStruct = [nameStruct extractfield(t,'name')];
35 |     end
36 | end
37 | 
38 | %% adding path if 'mode' == 'path'
39 | if (strcmp(mode,'path'))
40 |     for i=1:length(nameStruct)
41 |         nameStruct{i} = [dirPath,'/',nameStruct{i}];
42 |     end
43 | end
44 | 
45 | end
46 | 
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Factoring Shape, Pose, and Layout from the 2D Image of a 3D Scene
 2 | 
 3 | Shubham Tulsiani, Saurabh Gupta, David Fouhey, Alexei A. Efros, Jitendra Malik.
 4 | 
 5 | [Project Page](https://shubhtuls.github.io/factored3d/)
 6 | 
 7 | Note: Also see [3D-RelNet](https://github.com/nileshkulkarni/relative3d) that improves on this work by incporporating relationships among objects.
 8 | ![Teaser Image](https://shubhtuls.github.io/factored3d/resources/images/overview.png)
 9 | 
10 | ## Demo and Pre-trained Models
11 | 
12 | Please check out the [interactive notebook](demo/demo.ipynb) which shows reconstructions using the learned models. To run this, you'll first need to follow the [installation instructions](docs/installation.md) to download trained models and some pre-requisites.
13 | 
14 | ## Training and Evaluating
15 | To train or evaluate the (trained/downloaded) models, it is first required to [download the SUNCG dataset](docs/suncg_data.md) and [preprocess the data](docs/preprocessing.md). Please see the detailed README files for [Training](docs/training.md) or [Evaluation](docs/evaluation.md) of models for subsequent instructions.
16 | 
17 | ### Citation
18 | If you use this code for your research, please consider citing:
19 | ```
20 | @inProceedings{factored3dTulsiani17,
21 |   title={Factoring Shape, Pose, and Layout from the 2D Image of a 3D Scene},
22 |   author = {Shubham Tulsiani
23 |   and Saurabh Gupta
24 |   and David Fouhey
25 |   and Alexei A. Efros
26 |   and Jitendra Malik},
27 |   booktitle={Computer Vision and Pattern Regognition (CVPR)},
28 |   year={2018}
29 | }
30 | ```
31 | 


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/bboxOverlap.m:
--------------------------------------------------------------------------------
 1 | function [iu i a1 a2] = bboxOverlap(B1, B2)
 2 | % function [iu i a1 a2] = bboxOverlap(B1, B2)
 3 | %   B1 and B2 are N x 4 and M x 4 matrices with values [xmin ymin xmax ymax] quadtruples
 4 | %   iu is N x M matrix with intersection over union, i is N x M matrix of the intersection
 5 | %   a1 is the area of boxes in B1 and a2 is the area of boxes in B2.
 6 | 
 7 | % AUTORIGHTS
 8 | % ---------------------------------------------------------
 9 | % Copyright (c) 2014, Saurabh Gupta
10 | % 
11 | % This file is part of the Utils code and is available 
12 | % under the terms of the Simplified BSD License provided in 
13 | % LICENSE. Please retain this notice and LICENSE if you use 
14 | % this file (or any portion of it) in your project.
15 | % ---------------------------------------------------------
16 |   if(numel(B1) == 0 && numel(B2) == 0)
17 |     a1 = zeros(0,0); a2 = zeros(0,0); i = zeros(0,0); iu = zeros(0,0);
18 |   elseif(numel(B1) == 0)
19 |     a1 = zeros(0,0); i = zeros(0,size(B2,1)); iu = zeros(0,size(B2,1));
20 |     a2 = (B2(:,3)-B2(:,1)+1).*(B2(:,4)-B2(:,2)+1);
21 | 
22 |   elseif(numel(B2) == 0)
23 |     a2 = zeros(0,0); i = zeros(size(B1,1), 0); iu = zeros(size(B1,1), 0);
24 |     a1 = (B1(:,3)-B1(:,1)+1).*(B1(:,4)-B1(:,2)+1);
25 | 
26 |   else
27 |     a1 = (B1(:,3)-B1(:,1)+1).*(B1(:,4)-B1(:,2)+1);
28 |     a2 = (B2(:,3)-B2(:,1)+1).*(B2(:,4)-B2(:,2)+1);
29 |     
30 |     minX = bsxfun(@max, B1(:,1), B2(:,1)');
31 |     minY = bsxfun(@max, B1(:,2), B2(:,2)');
32 | 
33 |     maxX = bsxfun(@min, B1(:,3), B2(:,3)');
34 |     maxY = bsxfun(@min, B1(:,4), B2(:,4)');
35 |     
36 |     i = max(maxX-minX+1, 0).*max(maxY-minY+1, 0);
37 |     iu = i./max(eps, bsxfun(@plus, a1, a2')-i);
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/docs/suncg_data.md:
--------------------------------------------------------------------------------
 1 | # Instructions to download SUNCG
 2 | 
 3 | ### SUNCG Dataset
 4 | Donwload the [SUNCG dataset](http://suncg.cs.princeton.edu/) and extract the contents to SUNCG_DIR. There should be 5 folders named 'house', 'room', 'object', 'texture' and 'object_vox' in SUNCG_DIR. We now download additional meta-data.
 5 | ```
 6 | cd SUNCG_DIR;
 7 | 
 8 | # Download data splits
 9 | mkdir splits
10 | cd splits
11 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/suncg_split.pkl
12 | cd ..
13 | 
14 | # Download layout data (suncg houses with objects removed)
15 | # we use this data to render the amodal depths
16 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/layout.tar.gz
17 | tar -zxvf layout.tar.gz
18 | mv houseLayout layout
19 | 
20 | # Download meta-data
21 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/ModelCategoryMappingEdited.csv
22 | ```
23 | 
24 | ### Physically-based Renderings
25 | To use the [physically-based renderings](http://pbrs.cs.princeton.edu/) provided by Zhang et. al., we need to download the images, associated camera viewpoints and depth images (for training the baseline).
26 | 
27 | ```
28 | cd SUNCG_DIR;
29 | 
30 | mkdir zipfiles; cd zipfiles;
31 | 
32 | # Download camera viewpoints
33 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/camera_v2.zip
34 | unzip camera_v2.zip -d ../camera
35 | 
36 | # Download LDR renderings
37 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/mlt_v2.zip
38 | unzip mlt_v2.zip -d ../renderings_ldr
39 | 
40 | # meta-data
41 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/data_goodlist_v2.txt
42 | 
43 | 
44 | # Download depth images (needed to train the depth baseline)
45 | wget http://pbrs.cs.princeton.edu/pbrs_release/data/depth_v2.zip
46 | unzip depth_v2.zip -d ../renderings_depth
47 | ```
48 | 


--------------------------------------------------------------------------------
/utils/bbox_utils.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | """Evaluation matric utils.
 2 | """
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | import math
 7 | import numpy as np
 8 | import torch
 9 | from . import transformations
10 | 
11 | def volume_iou(pred, gt, thresh):
12 |     gt = gt.float().ge(0.5)
13 |     pred = pred.float().ge(thresh)
14 |     intersection = torch.mul(gt, pred).sum()
15 |     union = gt.sum() + pred.sum() - intersection
16 |     return intersection/union
17 | 
18 | def quat_dist(pred, gt):
19 |     rot_pred = transformations.quaternion_matrix(pred.numpy())
20 |     rot_gt = transformations.quaternion_matrix(gt.numpy())
21 |     rot_rel = np.matmul(rot_pred, np.transpose(rot_gt))
22 |     quat_rel = transformations.quaternion_from_matrix(rot_rel, isprecise=True)
23 |     angle = math.acos(abs(quat_rel[0]))*360/math.pi
24 |     return angle
25 | 
26 | def nms(dets, thresh, min_score=0):
27 |     '''
28 |     adapted from Fast R-CNN
29 |     Copyright (c) 2015 Microsoft
30 |     Licensed under The MIT License
31 |     Written by Ross Girshick
32 |     '''
33 | 
34 |     x1 = dets[:, 0]
35 |     y1 = dets[:, 1]
36 |     x2 = dets[:, 2]
37 |     y2 = dets[:, 3]
38 |     scores = dets[:, 4]
39 | 
40 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
41 |     order = scores.argsort()[::-1]
42 | 
43 |     keep = []
44 |     while order.size > 0:
45 |         i = order[0]
46 |         if scores[i] < min_score:
47 |             break
48 | 
49 |         keep.append(i)
50 |         xx1 = np.maximum(x1[i], x1[order[1:]])
51 |         yy1 = np.maximum(y1[i], y1[order[1:]])
52 |         xx2 = np.minimum(x2[i], x2[order[1:]])
53 |         yy2 = np.minimum(y2[i], y2[order[1:]])
54 | 
55 |         w = np.maximum(0.0, xx2 - xx1 + 1)
56 |         h = np.maximum(0.0, yy2 - yy1 + 1)
57 |         inter = w * h
58 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
59 | 
60 |         inds = np.where(ovr <= thresh)[0]
61 |         order = order[inds + 1]
62 | 
63 |     return keep


--------------------------------------------------------------------------------
/preprocess/suncg/precompute_gt_bboxes.m:
--------------------------------------------------------------------------------
 1 | function precompute_gt_bboxes(min_id, max_id)
 2 |     globals;
 3 |     suncgDir = suncgDir; % redundancy useful for parfor
 4 |     addpath(genpath('./matUtils'));
 5 |     basedir = pwd();
 6 |     fileNamesAll = strsplit(fileread(fullfile(suncgDir, 'zipfiles', 'data_goodlist_v2.txt')), '\n');
 7 | 
 8 |     saveDir = fullfile(suncgDir, 'bboxes_node');
 9 |     mkdirOptional(saveDir);
10 | 
11 |     sceneIds = getFileNamesFromDirectory(fullfile(suncgDir, 'camera'),'types',{''});
12 |     sceneIds = sceneIds(3:end);
13 |     sceneIds = sort(sceneIds);
14 |     if max_id == 0
15 |         max_id = length(sceneIds);
16 |     end
17 |     parfor ix = min_id:max_id
18 |     %for ix = min_id:max_id
19 |         sceneId = sceneIds{ix};
20 |         nodesBoxesDir = fullfile(saveDir, sceneId);
21 |         mkdirOptional(nodesBoxesDir);
22 |         imgsAll = getFileNamesFromDirectory(fullfile(suncgDir, 'renderings_node', sceneId),'types',{'.png'});
23 | 
24 |         for cameraId=1:length(imgsAll)
25 |             if ~ismember(sprintf('%s/%06d', sceneId, cameraId-1), fileNamesAll)
26 |                 continue
27 |             end
28 |             img = imread(fullfile(suncgDir, 'renderings_node', sceneId, sprintf('%06d_node.png', cameraId-1)));
29 |             ids = unique(img);
30 |             nIds = size(ids,1);
31 |             bboxes = zeros(nIds,4);
32 |             nPixels = zeros(nIds,1);
33 |             for o=1:nIds
34 |                 bboxes(o,:) = mask2bbox(img == ids(o));
35 |                 nPixels(o,:) = sum(sum(img == ids(o)));
36 |             end
37 |             saveFile = fullfile(nodesBoxesDir, sprintf('%06d_bboxes.mat', cameraId-1));
38 |             saveFunc(saveFile, ids, bboxes, nPixels);
39 |         end
40 |     end
41 | end
42 | 
43 | function saveFunc(filename, ids, bboxes, nPixels)
44 |     save(filename,'ids', 'bboxes', 'nPixels');
45 | end
46 | 
47 | function bbox = mask2bbox(mask)
48 |     [y,x] = find(mask);
49 |     bbox = [min(x) min(y) max(x) max(y)];
50 | end


--------------------------------------------------------------------------------
/preprocess/suncg/render_node_indices.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Example usage python render_node_indices.py --min=1 --nc=1
 3 | import argparse
 4 | import os
 5 | import os.path as osp
 6 | import threading
 7 | 
 8 | import subprocess
 9 | import time
10 | 
11 | parser = argparse.ArgumentParser(description='Parse arguments.')
12 | parser.add_argument('--nc', type=int, help='number of cores')
13 | parser.add_argument('--min', type=int, help='min id')
14 | parser.add_argument('--max', type=int, default=0, help='max id')
15 | parser.add_argument('--mesa', type=bool, default=False, help='Use Mesa')
16 | args = parser.parse_args()
17 | 
18 | sunCgDir = osp.join('/data0/shubhtuls/datasets', 'suncg_pbrs_release')
19 | toolboxDir = osp.join(sunCgDir, 'toolbox')
20 | execFolder = 'gaps/bin/x86_64'
21 | 
22 | modelsAll =  [f for f in os.listdir(osp.join(sunCgDir, 'camera'))]
23 | list.sort(modelsAll)
24 | 
25 | nCores = args.nc
26 | nMin = args.min
27 | nMax = args.max
28 | if(nMax == 0):
29 |     nMax = len(modelsAll)
30 | 
31 | class renderingThread(threading.Thread):
32 |     def __init__(self, c):
33 |         threading.Thread.__init__(self)
34 |         self.c = c
35 |         
36 |     def run(self):
37 |         for ix in range(nMin-1, nMax):
38 |             if(ix % nCores == self.c):
39 |                 modelId = modelsAll[ix]
40 |                 modelDir = osp.join(sunCgDir, 'house', modelId)
41 |                 saveDir = osp.join(sunCgDir, 'renderings_node', modelId)
42 |                 camFile = osp.join(sunCgDir, 'camera', modelId, 'room_camera.txt')
43 |                 
44 |                 if not os.path.exists(saveDir):
45 |                     os.makedirs(saveDir)
46 |                 renderFlags = '-capture_node_images'
47 |                 if args.mesa:
48 |                     renderFlags += ' -mesa'
49 |                 renderCommand = 'cd {}; {}/scn2img house.json {} {} {};'.format(modelDir, osp.join(toolboxDir, execFolder), renderFlags, camFile, saveDir)
50 | 
51 |                 os.system(renderCommand)
52 | 
53 | tList = [renderingThread(c) for c in range(nCores)]
54 | 
55 | for renderer in tList:
56 |     renderer.start()
57 |     


--------------------------------------------------------------------------------
/preprocess/suncg/render_layout_depth.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Example usage python render_layout_depth.py --min=1 --nc=1
 3 | import argparse
 4 | import os
 5 | import os.path as osp
 6 | import threading
 7 | 
 8 | import subprocess
 9 | import time
10 | 
11 | parser = argparse.ArgumentParser(description='Parse arguments.')
12 | parser.add_argument('--nc', type=int, help='number of cores')
13 | parser.add_argument('--min', type=int, help='min id')
14 | parser.add_argument('--max', type=int, default=0, help='max id')
15 | parser.add_argument('--mesa', type=bool, default=False, help='Use Mesa')
16 | args = parser.parse_args()
17 | 
18 | sunCgDir = osp.join('/data0/shubhtuls/datasets', 'suncg_pbrs_release')
19 | toolboxDir = osp.join(sunCgDir, 'toolbox')
20 | execFolder = 'gaps/bin/x86_64'
21 | 
22 | modelsAll =  [f for f in os.listdir(osp.join(sunCgDir, 'camera'))]
23 | list.sort(modelsAll)
24 | 
25 | nCores = args.nc
26 | nMin = args.min
27 | nMax = args.max
28 | if(nMax == 0):
29 |     nMax = len(modelsAll)
30 | 
31 | class renderingThread(threading.Thread):
32 |     def __init__(self, c):
33 |         threading.Thread.__init__(self)
34 |         self.c = c
35 |         
36 |     def run(self):
37 |         for ix in range(nMin-1, nMax):
38 |             if(ix % nCores == self.c):
39 |                 modelId = modelsAll[ix]
40 |                 modelDir = osp.join(sunCgDir, 'layout', modelId)
41 |                 saveDir = osp.join(sunCgDir, 'renderings_layout', modelId)
42 |                 camFile = osp.join(sunCgDir, 'camera', modelId, 'room_camera.txt')
43 |                 
44 |                 if not os.path.exists(saveDir):
45 |                     os.makedirs(saveDir)
46 |                 renderFlags = '-capture_depth_images'
47 |                 if args.mesa:
48 |                     renderFlags += ' -mesa'
49 | 
50 |                 renderCommand = 'cd {}; {}/scn2img layout.json {} {} {};'.format(modelDir, osp.join(toolboxDir, execFolder), renderFlags, camFile, saveDir)
51 | 
52 |                 os.system(renderCommand)
53 | 
54 | tList = [renderingThread(c) for c in range(nCores)]
55 | 
56 | for renderer in tList:
57 |     renderer.start()
58 |     


--------------------------------------------------------------------------------
/nnutils/loss_utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Loss building blocks.
 3 | '''
 4 | import torch
 5 | import torch.nn as nn
 6 | import math
 7 | from absl import flags
 8 | 
 9 | #-------------- flags -------------#
10 | #----------------------------------#
11 | flags.DEFINE_float('shape_loss_wt', 1, 'Shape loss weight.')
12 | flags.DEFINE_float('scale_loss_wt', 1, 'Scale loss weight.')
13 | flags.DEFINE_float('quat_loss_wt', 1, 'Quat loss weight.')
14 | flags.DEFINE_float('trans_loss_wt', 1, 'Trans loss weight.')
15 | 
16 | 
17 | def quat_loss(q1, q2):
18 |     '''
19 |     Anti-podal squared L2 loss.
20 |     
21 |     Args:
22 |         q1: N X 4
23 |         q2: N X 4
24 |     Returns:
25 |         loss : scalar
26 |     '''
27 |     q_diff_loss = (q1-q2).pow(2).sum(1)
28 |     q_sum_loss = (q1+q2).pow(2).sum(1)
29 |     q_loss, _ = torch.stack((q_diff_loss, q_sum_loss), dim=1).min(1)
30 |     return q_loss.mean()
31 | 
32 | 
33 | def code_loss(
34 |     code_pred, code_gt,
35 |     pred_voxels=True, classify_rot=True,
36 |     shape_wt=1.0, scale_wt=1.0, quat_wt=1.0, trans_wt=1.0):
37 |     '''
38 |     Code loss
39 | 
40 |     Args:
41 |         code_pred: [shape, scale, quat, trans]
42 |         code_gt: [shape, scale, quat, trans]
43 |     Returns:
44 |         total_loss : scalar
45 |     '''
46 |     if pred_voxels:
47 |         s_loss = torch.nn.functional.binary_cross_entropy(code_pred[0], code_gt[0])
48 |     else:
49 |         #print('Shape gt/pred mean : {}, {}'.format(code_pred[0].mean().data[0], code_gt[0].mean().data[0]))
50 |         s_loss = (code_pred[0] - code_gt[0]).pow(2).mean()
51 | 
52 |     if classify_rot:
53 |         q_loss = torch.nn.functional.nll_loss(code_pred[2], code_gt[2])
54 |     else:
55 |         q_loss = quat_loss(code_pred[2], code_gt[2])
56 | 
57 |     sc_loss = (code_pred[1].log() - code_gt[1].log()).abs().mean()
58 |     tr_loss = (code_pred[3] - code_gt[3]).pow(2).mean()
59 | 
60 |     total_loss = sc_loss*scale_wt
61 |     total_loss += q_loss*quat_wt
62 |     total_loss += tr_loss*trans_wt
63 |     total_loss += s_loss*shape_wt
64 |     
65 |     loss_factors = {
66 |         'shape': s_loss*shape_wt, 'scale': sc_loss*scale_wt, 'quat': q_loss*quat_wt, 'trans': tr_loss*trans_wt
67 |     }
68 |     return total_loss, loss_factors
69 | 


--------------------------------------------------------------------------------
/utils/html.py:
--------------------------------------------------------------------------------
 1 | '''Code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix'''
 2 | import dominate
 3 | from dominate.tags import *
 4 | import os
 5 | 
 6 | 
 7 | class HTML:
 8 |     def __init__(self, web_dir, title, reflesh=0):
 9 |         self.title = title
10 |         self.web_dir = web_dir
11 |         self.img_dir = os.path.join(self.web_dir, 'images')
12 |         if not os.path.exists(self.web_dir):
13 |             os.makedirs(self.web_dir)
14 |         if not os.path.exists(self.img_dir):
15 |             os.makedirs(self.img_dir)
16 |         # print(self.img_dir)
17 | 
18 |         self.doc = dominate.document(title=title)
19 |         if reflesh > 0:
20 |             with self.doc.head:
21 |                 meta(http_equiv="reflesh", content=str(reflesh))
22 | 
23 |     def get_image_dir(self):
24 |         return self.img_dir
25 | 
26 |     def add_header(self, str):
27 |         with self.doc:
28 |             h3(str)
29 | 
30 |     def add_table(self, border=1):
31 |         self.t = table(border=border, style="table-layout: fixed;")
32 |         self.doc.add(self.t)
33 | 
34 |     def add_images(self, ims, txts, links, width=400):
35 |         self.add_table()
36 |         with self.t:
37 |             with tr():
38 |                 for im, txt, link in zip(ims, txts, links):
39 |                     with td(style="word-wrap: break-word;", halign="center", valign="top"):
40 |                         with p():
41 |                             with a(href=os.path.join('images', link)):
42 |                                 img(style="width:%dpx" % width, src=os.path.join('images', im))
43 |                             br()
44 |                             p(txt)
45 | 
46 |     def save(self):
47 |         html_file = '%s/index.html' % self.web_dir
48 |         f = open(html_file, 'wt')
49 |         f.write(self.doc.render())
50 |         f.close()
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     html = HTML('web/', 'test_html')
55 |     html.add_header('hello world')
56 | 
57 |     ims = []
58 |     txts = []
59 |     links = []
60 |     for n in range(4):
61 |         ims.append('image_%d.png' % n)
62 |         txts.append('text_%d' % n)
63 |         links.append('image_%d.png' % n)
64 |     html.add_images(ims, txts, links)
65 |     html.save()
66 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation Instructions
 2 | 
 3 | Two overall comments:
 4 | * Result visualizations depend on blender. We provide a version, but if you have issues where the renderings don't show up or where the script cannot read the result images, you may have to compile blender and provide a bpy.so file that matches your precise system configuration. See [here](https://wiki.blender.org/index.php/User:Ideasman42/BlenderAsPyModule) for information about how to do this. The results by themselves do not depend on blender, and if you just want to compute predictions, you do not need blender.
 5 | * You should run each of these commands in the main root directory. 
 6 | 
 7 | #### Setup virtualenv.
 8 | ```
 9 | virtualenv venv
10 | source venv/bin/activate
11 | pip install -U pip
12 | deactivate
13 | source venv/bin/activate
14 | pip install -r docs/requirements.txt
15 | ```
16 | 
17 | #### Install pytorch.
18 | ```
19 | pip install http://download.pytorch.org/whl/cu80/torch-0.2.0.post3-cp27-cp27mu-manylinux1_x86_64.whl
20 | pip install torchvision visdom dominate
21 | ```
22 | 
23 | #### Compile cython modules.
24 | First, we need to compile some cython utilities.
25 | ```
26 | cd utils
27 | python setup.py build_ext --inplace
28 | mv factored3d/utils/bbox_utils.so ./
29 | rm -rf build/ # remove redundant folders
30 | rm -rf factored3d/ # remove redundant folders
31 | cd ..
32 | ```
33 | 
34 | #### Download pre-trained models.
35 | ```
36 | # Download pre-trained Resnet18 Model.
37 | wget https://download.pytorch.org/models/resnet18-5c106cde.pth -O ~/.torch/models/resnet18-5c106cde.pth
38 | 
39 | # Download our models.
40 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/cachedir.tar.gz && tar -xf cachedir.tar.gz
41 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/blender.tar.gz && tar -xf blender.tar.gz && mv blender renderer/.
42 | ```
43 | 
44 | #### Setup external dependencies.
45 | ```
46 | mkdir external; cd external;
47 | # Python interface for binvox
48 | git clone https://github.com/dimatura/binvox-rw-py ./binvox
49 | 
50 | # Piotr Dollar's toolbox
51 | git clone https://github.com/pdollar/toolbox ./toolbox
52 | 
53 | # Edgeboxes code
54 | git clone https://github.com/pdollar/edges ./edges
55 | 
56 | # SSC-Net code (used for computing voxelization for the baseline)
57 | git clone https://github.com/shurans/sscnet ./sscnet
58 | cd ..
59 | ```
60 | 


--------------------------------------------------------------------------------
/preprocess/suncg/voxelize_objects.py:
--------------------------------------------------------------------------------
 1 | # This needs to be executed onscreen
 2 | import os,sys
 3 | import os.path as osp
 4 | import numpy as np
 5 | import scipy.io as sio
 6 | 
 7 | sys.path.append('/data0/shubhtuls/code/factored3d/external/binvox')
 8 | sun_cg_dir = '/data0/shubhtuls/datasets/suncg_pbrs_release'
 9 | binvox_exec_file = '/data0/shubhtuls/datasets/suncg_pbrs_release/toolbox/binvox'
10 | 
11 | import binvox_rw
12 | 
13 | def sub_dirs(d):
14 |     return [o for o in os.listdir(d) if os.path.isdir(os.path.join(d,o))]
15 | 
16 | 
17 | obj_ids = sub_dirs(osp.join(sun_cg_dir,'object'))
18 | obj_ids = [o for o in obj_ids if 'copy' not in o]
19 | grid_size = 64
20 | dc1 = 'find {} -name "*.binvox" -type f -delete'.format(osp.join(sun_cg_dir,'object'))
21 | dc2 = 'find {} -name "*.mat" -type f -delete'.format(osp.join(sun_cg_dir,'object'))
22 | os.system(dc1) #delete old .binvox files
23 | os.system(dc2) #delete old .mat files
24 | 
25 | for ix in range(len(obj_ids)):
26 |     obj_id = obj_ids[ix]
27 |     print(obj_id)
28 |     object_dir = osp.join(sun_cg_dir, 'object', obj_id)
29 |     binvox_file_interior = osp.join(object_dir, obj_id + '.binvox')
30 |     binvox_file_surface = osp.join(object_dir, obj_id + '_1.binvox')
31 | 
32 |     cmd_interior = '{} -cb -d {} {}'.format(binvox_exec_file, grid_size, osp.join(object_dir, obj_id + '.obj'))
33 |     cmd_surface = '{} -cb -e -d {} {}'.format(binvox_exec_file, grid_size, osp.join(object_dir, obj_id + '.obj'))
34 |     os.system(cmd_interior)
35 |     os.system(cmd_surface)
36 | 
37 |     with open(binvox_file_interior, 'rb') as f0:
38 |         with open(binvox_file_surface, 'rb') as f1:
39 |             vox_read_interior = binvox_rw.read_as_3d_array(f0)
40 |             vox_read_surface = binvox_rw.read_as_3d_array(f1)
41 | 
42 |             #need to add translation corresponding to voxel centering
43 |             shape_vox = vox_read_interior.data.astype(np.bool) + vox_read_surface.data.astype(np.bool)
44 |             if(np.max(shape_vox) > 0):
45 |                 Xs, Ys, Zs = np.where(shape_vox)
46 |                 trans_centre = np.array([1.0*np.min(Xs)/(np.size(shape_vox,0)), 1.0*np.min(Ys)/(np.size(shape_vox,1)), 1.0*np.min(Zs)/(np.size(shape_vox,2)-1)] )
47 |                 translate = vox_read_surface.translate - trans_centre*vox_read_surface.scale
48 |                 sio.savemat(osp.join(object_dir, obj_id + '.mat'), {'voxels' : shape_vox, 'scale' : vox_read_surface.scale, 'translation' : translate})


--------------------------------------------------------------------------------
/nnutils/roi_pool_py.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ROI pooling layer.
 3 | Source adapted from https://github.com/longcw/faster_rcnn_pytorch/
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.autograd import Variable
 8 | import numpy as np
 9 | 
10 | 
11 | class RoIPool(nn.Module):
12 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
13 |         super(RoIPool, self).__init__()
14 |         self.pooled_width = int(pooled_width)
15 |         self.pooled_height = int(pooled_height)
16 |         self.spatial_scale = float(spatial_scale)
17 | 
18 |     def forward(self, features, rois):
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size()[0]
21 |         outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
22 | 
23 |         for roi_ind, roi in enumerate(rois):
24 |             batch_ind = int(roi[0].data[0])
25 |             roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
26 |                 roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
27 |             roi_width = max(roi_end_w - roi_start_w + 1, 1)
28 |             roi_height = max(roi_end_h - roi_start_h + 1, 1)
29 |             bin_size_w = float(roi_width) / float(self.pooled_width)
30 |             bin_size_h = float(roi_height) / float(self.pooled_height)
31 | 
32 |             for ph in range(self.pooled_height):
33 |                 hstart = int(np.floor(ph * bin_size_h))
34 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
35 |                 hstart = min(data_height, max(0, hstart + roi_start_h))
36 |                 hend = min(data_height, max(0, hend + roi_start_h))
37 |                 for pw in range(self.pooled_width):
38 |                     wstart = int(np.floor(pw * bin_size_w))
39 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
40 |                     wstart = min(data_width, max(0, wstart + roi_start_w))
41 |                     wend = min(data_width, max(0, wend + roi_start_w))
42 | 
43 |                     is_empty = (hend <= hstart) or(wend <= wstart)
44 |                     if is_empty:
45 |                         outputs[roi_ind, :, ph, pw] = 0
46 |                     else:
47 |                         data = features[batch_ind]
48 |                         outputs[roi_ind, :, ph, pw] = torch.max(
49 |                             torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 1)[0].view(-1)
50 | 
51 |         return outputs
52 | 
53 | 


--------------------------------------------------------------------------------
/docs/preprocessing.md:
--------------------------------------------------------------------------------
 1 | # Instructions to precompute data required for training
 2 | 
 3 | ### Compiling SUNCG Toolbox
 4 | 
 5 | ```
 6 | cd SUNCG_DIR;
 7 | 
 8 | # Download the toolbox
 9 | git clone https://github.com/shurans/SUNCGtoolbox ./toolbox
10 | cd toolbox
11 | 
12 | # Use our modified rendering function
13 | cp CODE_ROOT/preprocess/suncg/scn2img.cpp ./gaps/apps/scn2img/
14 | 
15 | # Compile
16 | make
17 | 
18 | # (or optionally instead of above) compile with offscreen mesa support
19 | make mesa
20 | ```
21 | In case you compile with offscreen support, you might need to edit [this line](https://github.com/shurans/SUNCGtoolbox/blob/master/gaps/makefiles/Makefile.apps#L42) to specify additional lib directories if you're using a locally compiled version of mesa, and possibly also specify a CPLUS_INCLUDE_PATH. Though note that locally compiling mesa with offscreen support can get a bit tricky.
22 | 
23 | We highly recommend using the offscreen version, as otherwise the rendering behaviour is often stochastic.
24 | 
25 | 
26 | ### Rendering Layout and Node Images
27 | You'll first need to edit the 'sunCgDir' variable in both the python scripts below. Note that both the rendering jobs can take a while. If you managed to compile the gaps toolbox with offscreen mesa, you can add --mesa=True to the commands below, else you'll need to run the rendering jobs in an onscreen mode.
28 | ```
29 | cd CODE_ROOT/preprocess/suncg
30 | 
31 | # Render amodal depths (edit the 'sunCgDir' variable before running)
32 | python render_layout_depth.py --min=1 --nc=1
33 | 
34 | # Render node indices (edit the 'sunCgDir' variable before running)
35 | python render_node_indices.py --min=1 --nc=1
36 | 
37 | ```
38 | 
39 | ### Voxelize Objects and Scenes
40 | Please download binvox from [here](http://www.patrickmin.com/binvox/) and store the binary as SUNCG_DIR/toolbox/binvox.
41 | 
42 | ```
43 | # Voxelize the objects (edit the 'sunCgDir' variable before running)
44 | # This needs to be run in onscreen mode with a display/desktop connected
45 | python voxelize_objects.py
46 | 
47 | # Compute voxelizations for the full scene (required for training the baseline)
48 | # Edit the 'suncgDir' in globals.m before running
49 | precompute_scene_voxels(1, 0);
50 | ```
51 | 
52 | ### Compute object proposals
53 | ```
54 | # Extract ground-truth object boxes
55 | # Edit the 'suncgDir' in globals.m before running
56 | precompute_gt_bboxes(1, 0);
57 | 
58 | # Extract edgebox proposals
59 | # Edit the 'suncgDir' in globals.m before running
60 | precompute_edge_boxes(1, 0);
61 | ```
62 | 


--------------------------------------------------------------------------------
/demo/cli_demo.py:
--------------------------------------------------------------------------------
 1 | # edit the code path accordingly
 2 | code_root = '/data0/shubhtuls/code/factored3d/'
 3 | import sys
 4 | import numpy as np
 5 | import os.path as osp
 6 | import scipy.misc
 7 | import scipy.io as sio
 8 | import torch
 9 | import matplotlib.pyplot as plt
10 | sys.path.append(osp.join(code_root, '..'))
11 | from absl import flags
12 | from factored3d.demo import demo_utils
13 | 
14 | flags.FLAGS(['demo'])
15 | opts = flags.FLAGS
16 | 
17 | # do not change the options below
18 | opts.batch_size = 1
19 | opts.num_train_epoch = 1
20 | opts.name = 'dwr_shape_ft'
21 | opts.classify_rot = True
22 | opts.pred_voxels = True
23 | opts.use_context = True
24 | 
25 | if opts.classify_rot:
26 |     opts.nz_rot = 24
27 | else:
28 |     opts.nz_rot = 4
29 | 
30 | ## Load the trained models
31 | tester = demo_utils.DemoTester(opts)
32 | tester.init_testing()
33 | 
34 | renderer = demo_utils.DemoRenderer(opts)
35 | ## Load input data
36 | dataset = 'suncg'
37 | 
38 | img = scipy.misc.imread('./data/{}_img.png'.format(dataset))
39 | 
40 | img_fine = scipy.misc.imresize(img, (opts.img_height_fine, opts.img_width_fine))
41 | img_fine = np.transpose(img_fine, (2,0,1))
42 | 
43 | img_coarse = scipy.misc.imresize(img, (opts.img_height, opts.img_width))
44 | img_coarse = np.transpose(img_coarse, (2,0,1))
45 | 
46 | proposals = sio.loadmat('./data/{}_proposals.mat'.format(dataset))['proposals'][:, 0:4]
47 | 
48 | inputs = {}
49 | inputs['img'] = torch.from_numpy(img_coarse/255.0).unsqueeze(0)
50 | inputs['img_fine'] = torch.from_numpy(img_fine/255.0).unsqueeze(0)
51 | inputs['bboxes_test_proposals'] = [torch.from_numpy(proposals)]
52 | tester.set_input(inputs)
53 | objects, layout = tester.predict_factored3d()
54 | scene_voxels  = tester.predict_scene_voxels()
55 | dmap = tester.predict_depth()
56 | img_factored_cam, img_factored_novel = renderer.render_factored3d(objects, layout)
57 | img_voxels_cam, img_voxels_novel = renderer.render_scene_vox(scene_voxels)
58 | img_dmap_cam, img_dmap_novel = renderer.render_depth(dmap)
59 | 
60 | f, axarr = plt.subplots(2, 4, figsize=(20, 8))
61 | 
62 | axarr[0, 0].imshow(img)
63 | axarr[0, 0].axis('off')
64 | axarr[1, 0].imshow(img*0 + 255)
65 | axarr[1, 0].axis('off')
66 | 
67 | axarr[0, 1].imshow(img_factored_cam)
68 | axarr[0, 1].axis('off')
69 | axarr[1, 1].imshow(img_factored_novel)
70 | axarr[1, 1].axis('off')
71 | 
72 | axarr[0, 2].imshow(img_voxels_cam)
73 | axarr[0, 2].axis('off')
74 | axarr[1, 2].imshow(img_voxels_novel)
75 | axarr[1, 2].axis('off')
76 | 
77 | axarr[0, 3].imshow(img_dmap_cam)
78 | axarr[0, 3].axis('off')
79 | axarr[1, 3].imshow(img_dmap_novel)
80 | axarr[1, 3].axis('off')
81 | 
82 | plt.show()
83 | 


--------------------------------------------------------------------------------
/preprocess/suncg/precompute_scene_voxels.m:
--------------------------------------------------------------------------------
 1 | function precompute_scene_voxels(min_id, max_id)
 2 |     globals;
 3 |     suncgDir = suncgDir; % redundancy useful for parfor
 4 |     addpath(genpath('./matUtils'));
 5 |     basedir = pwd();
 6 |     fileNamesAll = strsplit(fileread(fullfile(suncgDir, 'zipfiles', 'data_goodlist_v2.txt')), '\n');
 7 | 
 8 |     sscnetDir = fullfile(basedir, '..', '..', 'external', 'sscnet', 'matlab_code');
 9 |     addpath(sscnetDir);
10 | 
11 |     objectcategory = load(fullfile(sscnetDir, 'suncgObjcategory.mat'));
12 |     addpath(fullfile(sscnetDir, 'utils'));
13 |     addpath(fullfile(basedir, 'matUtils'));
14 |     sceneIds = getFileNamesFromDirectory(fullfile(suncgDir, 'camera'),'types',{''});
15 |     sceneIds = sceneIds(3:end);
16 |     sceneIds = sort(sceneIds);
17 | 
18 |     if max_id == 0
19 |         max_id = length(sceneIds);
20 |     end
21 |     parfor ix = min_id:max_id
22 |         genSceneData(sceneIds{ix}, suncgDir, objectcategory.objcategory, fileNamesAll);
23 |     end
24 | end
25 | 
26 | 
27 | function genSceneData(sceneId, suncgDir, objcategory, fileNamesAll)
28 |     %% generating scene voxels in camera view
29 |     camerafile = sprintf('%s/camera/%s/room_camera.txt', suncgDir, sceneId);
30 |     cameraInfofile = sprintf('%s/camera/%s/room_camera_name.txt', suncgDir, sceneId);
31 |     cameraInfo = readCameraName(cameraInfofile);
32 |     cameraPoses = readCameraPose(camerafile);
33 |     voxPath = fullfile(suncgDir, 'scene_voxels', sceneId);
34 |     mkdirOptional(voxPath);
35 | 
36 |     for cameraId = 1:length(cameraInfo)
37 |         if ~ismember(sprintf('%s/%06d', sceneId, cameraId-1), fileNamesAll)
38 |             continue
39 |         end
40 |         sceneVoxMatFilename = fullfile(voxPath,sprintf('%06d_voxels.mat',cameraId-1));
41 |         sceneVoxFilename = [sceneVoxMatFilename(1:(end-4)),'.bin'];
42 |         if exist(sceneVoxMatFilename, 'file')
43 |             continue
44 |         end
45 | 
46 |         % get camera extrisic yup -> zup
47 |         extCam2World = camPose2Extrinsics(cameraPoses(cameraId,:));
48 |         extCam2World = [[1 0 0; 0 0 1; 0 1 0]*extCam2World(1:3,1:3) extCam2World([1,3,2],4)];
49 | 
50 |         % generating scene voxels in camera view 
51 |         [sceneVox] = get_scene_vox(suncgDir,sceneId,cameraInfo(cameraId).floorId+1,cameraInfo(cameraId).roomId+1,extCam2World,objcategory);
52 |         camPoseArr = [extCam2World',[0;0;0;1]]; %'
53 |         % camPoseArr = camPoseArr(:);
54 |         sceneVox = (sceneVox ~= 0) & (sceneVox ~= 255);
55 | 
56 |         % Compress with RLE and save to binary file 
57 |         % writeRLEfile(sceneVoxFilename, sceneVox,camPoseArr,voxOriginWorld)
58 |         save(sceneVoxMatFilename,'sceneVox','camPoseArr');
59 |     end
60 | end


--------------------------------------------------------------------------------
/utils/visutil.py:
--------------------------------------------------------------------------------
 1 | '''Code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix'''
 2 | from __future__ import print_function
 3 | import torch
 4 | import numpy as np
 5 | from PIL import Image
 6 | import inspect, re
 7 | import numpy as np
 8 | import os
 9 | import collections
10 | 
11 | # Converts a Tensor into a Numpy array
12 | # |imtype|: the desired type of the converted numpy array
13 | def tensor2im(image_tensor, imtype=np.uint8):
14 |     image_numpy = image_tensor[0].cpu().float().numpy()
15 |     image_numpy = (np.transpose(image_numpy, (1, 2, 0))) * 255.0
16 |     return image_numpy.astype(imtype)
17 | 
18 | 
19 | def undo_resnet_preprocess(image_tensor):
20 |     image_tensor = image_tensor.clone()
21 |     image_tensor.narrow(1,0,1).mul_(.229).add_(.485)
22 |     image_tensor.narrow(1,1,1).mul_(.224).add_(.456)
23 |     image_tensor.narrow(1,2,1).mul_(.225).add_(.406)
24 |     return image_tensor
25 | 
26 | 
27 | def diagnose_network(net, name='network'):
28 |     mean = 0.0
29 |     count = 0
30 |     for param in net.parameters():
31 |         if param.grad is not None:
32 |             mean += torch.mean(torch.abs(param.grad.data))
33 |             count += 1
34 |     if count > 0:
35 |         mean = mean / count
36 |     print(name)
37 |     print(mean)
38 | 
39 | 
40 | def save_image(image_numpy, image_path):
41 |     image_pil = Image.fromarray(image_numpy)
42 |     image_pil.save(image_path)
43 | 
44 | def info(object, spacing=10, collapse=1):
45 |     """Print methods and doc strings.
46 |     Takes module, class, list, dictionary, or string."""
47 |     methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)]
48 |     processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s)
49 |     print( "\n".join(["%s %s" %
50 |                      (method.ljust(spacing),
51 |                       processFunc(str(getattr(object, method).__doc__)))
52 |                      for method in methodList]) )
53 | 
54 | def varname(p):
55 |     for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]:
56 |         m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line)
57 |         if m:
58 |             return m.group(1)
59 | 
60 | def print_numpy(x, val=True, shp=False):
61 |     x = x.astype(np.float64)
62 |     if shp:
63 |         print('shape,', x.shape)
64 |     if val:
65 |         x = x.flatten()
66 |         print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
67 |             np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
68 | 
69 | 
70 | def mkdirs(paths):
71 |     if isinstance(paths, list) and not isinstance(paths, str):
72 |         for path in paths:
73 |             mkdir(path)
74 |     else:
75 |         mkdir(paths)
76 | 
77 | 
78 | def mkdir(path):
79 |     if not os.path.exists(path):
80 |         os.makedirs(path)
81 | 


--------------------------------------------------------------------------------
/nnutils/voxel_net.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Scene level voxels prediction net.
 3 | '''
 4 | from __future__ import absolute_import
 5 | from __future__ import division
 6 | from __future__ import print_function
 7 | from absl import flags
 8 | import torch
 9 | import torch.nn as nn
10 | from . import net_blocks as nb
11 | import torchvision
12 | #from oc3d.nnutils import roi_pooling
13 | import pdb
14 | 
15 | #-------------- flags -------------#
16 | #----------------------------------#
17 | flags.DEFINE_integer('nz_voxels', 2000, 'Number of latent feat dimension for shape prediction')
18 | flags.DEFINE_integer('n_voxels_upconv', 5, 'Number of upconvolution layers')
19 | 
20 | #------------- Modules ------------#
21 | #----------------------------------#
22 | class ResNetConv(nn.Module):
23 |     def __init__(self, n_blocks=4):
24 |         super(ResNetConv, self).__init__()
25 |         self.resnet = torchvision.models.resnet18(pretrained=True)
26 |         self.n_blocks=n_blocks
27 | 
28 |     def forward(self, x):
29 |         n_blocks = self.n_blocks
30 |         x = self.resnet.conv1(x)
31 |         x = self.resnet.bn1(x)
32 |         x = self.resnet.relu(x)
33 |         x = self.resnet.maxpool(x)
34 | 
35 |         if n_blocks >= 1:
36 |             x = self.resnet.layer1(x)
37 |         if n_blocks >= 2:
38 |             x = self.resnet.layer2(x)
39 |         if n_blocks >= 3:
40 |             x = self.resnet.layer3(x)
41 |         if n_blocks >= 4:
42 |             x = self.resnet.layer4(x)
43 |         return x
44 | 
45 | #------------ Voxel Net -----------#
46 | #----------------------------------#
47 | class VoxelNet(nn.Module):
48 |     def __init__(
49 |         self, img_size,
50 |         voxel_size, nz_voxels=2000,
51 |         nz_init=256, n_voxels_upconv=5
52 |     ):
53 |         super(VoxelNet, self).__init__()
54 | 
55 |         self.resnet_conv = ResNetConv(n_blocks=4)
56 |         nc_inp = 512*(img_size[0]//32)*(img_size[1]//32)
57 | 
58 |         self.encoder = nb.fc_stack(nc_inp, nz_voxels, 2)
59 | 
60 |         upsamp_factor = pow(2, n_voxels_upconv)
61 |         self.spatial_size_init = [voxel_size[0]//upsamp_factor, voxel_size[1]//upsamp_factor, voxel_size[2]//upsamp_factor]
62 |         nz_spatial = self.spatial_size_init[0]*self.spatial_size_init[1]*self.spatial_size_init[2]
63 |         self.nz_init = nz_init
64 | 
65 |         self.decoder_reshape = nb.fc_stack(nz_voxels, nz_init*nz_spatial, 1)
66 |         self.decoder = nb.decoder3d(n_voxels_upconv, None, nz_init, init_fc=False)
67 | 
68 |     def forward(self, imgs_inp):
69 |         img_feat = self.resnet_conv.forward(imgs_inp)
70 |         img_feat = img_feat.view(imgs_inp.size(0), -1)
71 |         img_feat = self.encoder.forward(img_feat)
72 |         img_feat = self.decoder_reshape.forward(img_feat)
73 |         img_feat = img_feat.view(
74 |             imgs_inp.size(0),
75 |             self.nz_init,
76 |             self.spatial_size_init[0],
77 |             self.spatial_size_init[1],
78 |             self.spatial_size_init[2]
79 |         )
80 |         voxels_pred = self.decoder.forward(img_feat)
81 |         return voxels_pred
82 | 


--------------------------------------------------------------------------------
/utils/make_html.py:
--------------------------------------------------------------------------------
 1 | """Script for making html from a directory.
 2 | """
 3 | # Sample usage:
 4 | # (box3d_shape_ft) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/box3d/val/box3d_shape_ft' --html_name=box3d_shape_ft --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/'
 5 | 
 6 | # (dwr_shape_ft) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/dwr/val/dwr_shape_ft' --html_name=dwr_shape_ft --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/'
 7 | 
 8 | # (depth_baseline) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/depth_baseline' --html_name=depth_baseline --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/'
 9 | 
10 | # (voxels_baseline) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/voxels_baseline' --html_name=voxels_baseline --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/'
11 | 
12 | # (nyu) python make_html.py --imgs_root_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/nyu/test/dwr_shape_ft' --html_name=nyu_dwr_shape_ft --html_dir='/data0/shubhtuls/code/oc3d/cachedir/results_vis/pages/'
13 | 
14 | from __future__ import absolute_import
15 | from __future__ import division
16 | from __future__ import print_function
17 | from absl import app
18 | from absl import flags
19 | import os
20 | import os.path as osp
21 | from yattag import Doc
22 | from yattag import indent
23 | import numpy as np
24 | 
25 | flags.DEFINE_string('imgs_root_dir', '', 'Directory where renderings are saved')
26 | flags.DEFINE_string('html_name', '', 'Name of webpage')
27 | flags.DEFINE_string('html_dir', '', 'Directory where output should be saved')
28 | 
29 | def main(_):
30 |     opts = flags.FLAGS
31 |     vis_dir_names = os.listdir(opts.imgs_root_dir)
32 |     vis_dir_names.sort()
33 |     img_keys = os.listdir(osp.join(opts.imgs_root_dir, vis_dir_names[0]))
34 |     img_keys.sort()
35 |     img_root_rel_path = osp.relpath(opts.imgs_root_dir, opts.html_dir)
36 |     if not os.path.exists(opts.html_dir):
37 |         os.makedirs(opts.html_dir)
38 |     html_file = osp.join(opts.html_dir, opts.html_name + '.html')
39 |     ctr = 0
40 | 
41 |     doc, tag, text = Doc().tagtext()
42 |     with tag('html'):
43 |         with tag('body'):
44 |             with tag('table', style = 'width:100%', border="1"):
45 |                 with tag('tr'):
46 |                     for img_name in img_keys:
47 |                         with tag('td'):
48 |                             text(img_name)
49 | 
50 |                 for img_dir in vis_dir_names:
51 |                     with tag('tr'):
52 |                         for img_name in img_keys:
53 |                             with tag('td'):
54 |                                 with tag('img', width="640px", src=osp.join(img_root_rel_path, img_dir, img_name)):
55 |                                     ctr += 1
56 | 
57 |     r1 = doc.getvalue()
58 |     r2 = indent(r1)
59 | 
60 |     with open(html_file, 'wt') as f:
61 |         f.write(r2)
62 |     
63 | 
64 | if __name__ == '__main__':
65 |     app.run()
66 | 


--------------------------------------------------------------------------------
/docs/training.md:
--------------------------------------------------------------------------------
 1 | # Instructions to train models and baselines
 2 | 
 3 | ### Prerequisites
 4 | Make sure the data loading and preprocessing is complete. The training will also be visualized using visdom which can be started by
 5 | ```
 6 | python -m visdom.server
 7 | ```
 8 | Note that all the training jobs should be launched from one directory above CODE_ROOT. Additionally, the sample scripts below assume that the code folder is named 'factored3d'.
 9 | 
10 | 
11 | ### Training factored 3D prediction models
12 | We first train the object-centric 3D prediction module. Since training with proposals or predicting full voxels is computationally expensive, we train in stages to speed up the process.
13 | ```
14 | # Download a pretrained object voxel auto-encoder
15 | cd CODE_ROOT/cachedir/snapshots;
16 | wget https://people.eecs.berkeley.edu/~shubhtuls/cachedir/factored3d/object_autoenc_32.tar.gz && tar -xvzf object_autoenc_32.tar.gz
17 | 
18 | # All jobs should be launched from one level above code directory
19 | cd CODE_ROOT/..;
20 | 
21 | # First train the object-centric 3D prediction model on ground-truth boxes
22 | python -m factored3d.experiments.suncg.box3d --plot_scalars --display_visuals --display_freq=2000 --save_epoch_freq=1 --batch_size=8  --name=box3d_base --use_context --pred_voxels=False --classify_rot --shape_loss_wt=10 --n_data_workers=0 --num_epochs=8
23 | 
24 | # Fine-tune the above model using proposals
25 | python -m factored3d.experiments.suncg.dwr --name=dwr_base --classify_rot --pred_voxels=False --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --box3d_ft --shape_loss_wt=10 --label_loss_wt=10  --batch_size=8 --num_epochs=1
26 | 
27 | # Finally, also learn to predict shape voxels instead of auto-encoder shape code
28 | python -m factored3d.experiments.suncg.dwr --name=dwr_shape_ft --classify_rot --pred_voxels=True --shape_dec_ft --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --shape_loss_wt=2  --label_loss_wt=10 --batch_size=8 --ft_pretrain_epoch=1 --num_epochs=1
29 | ```
30 | 
31 | We also train the layout (amodal inverse depth) prediction CNN
32 | ```
33 | # job should be launched from one level above code directory
34 | cd CODE_ROOT/..
35 | 
36 | python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=layout_pred --display_freq=2000 --suncg_dl_out_layout=true --suncg_dl_out_depth=false --display_id=40 --num_epochs=8
37 | ```
38 | 
39 | ### Training (inverse) depth prediction baseline
40 | ```
41 | python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=depth_baseline --display_freq=2000 --suncg_dl_out_layout=false --suncg_dl_out_depth=true --display_id=20 --num_epochs=8
42 | ```
43 | 
44 | ### Training scene voxel prediction baseline
45 | ```
46 | # job should be launched from one level above code directory
47 | cd CODE_ROOT/..;
48 | 
49 | python -m factored3d.experiments.suncg.voxels --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=voxels_baseline --display_freq=2000 --num_epochs=8
50 | ```
51 | 


--------------------------------------------------------------------------------
/preprocess/suncg/precompute_edge_boxes.m:
--------------------------------------------------------------------------------
 1 | function precompute_edge_boxes(min_id, max_id)
 2 |     globals;
 3 |     suncgDir = suncgDir; % redundancy useful for parfor
 4 | 
 5 |     addpath(genpath('./matUtils'));
 6 |     basedir = pwd();
 7 |     fileNamesAll = strsplit(fileread(fullfile(suncgDir, 'zipfiles', 'data_goodlist_v2.txt')), '\n');
 8 | 
 9 |     addpath(genpath('./matUtils'));
10 |     addpath(genpath('../../external/edges/'));
11 |     addpath(genpath('../../external/toolbox/'));
12 | 
13 |     basedir = pwd();
14 |     saveDir = fullfile(suncgDir, 'edgebox_proposals');
15 |     nodeDir = fullfile(suncgDir, 'bboxes_node');
16 |     mkdirOptional(saveDir);
17 | 
18 |     sceneIds = getFileNamesFromDirectory(fullfile(suncgDir, 'camera'),'types',{''});
19 |     sceneIds = sceneIds(3:end);
20 |     sceneIds = sort(sceneIds);
21 |     if max_id == 0
22 |         max_id = length(sceneIds);
23 |     end
24 | 
25 |     %% load pre-trained edge detection model and set opts (see edgesDemo.m)
26 |     model=load('../../external/edges/models/forest/modelBsds'); model=model.model;
27 |     model.opts.multiscale=0; model.opts.sharpen=2; model.opts.nThreads=4;
28 | 
29 |     %% set up opts for edgeBoxes (see edgeBoxes.m)
30 |     opts = edgeBoxes;
31 |     opts.alpha = .65;     % step size of sliding window search
32 |     opts.beta  = .75;     % nms threshold for object proposals
33 |     opts.minScore = .01;  % min score of boxes to detect
34 |     opts.maxBoxes = 1e3;  % max number of boxes to detect
35 |     
36 |     for ix = min_id:max_id
37 |     %for ix = min_id:max_id
38 |         if mod(ix, 100) == 0
39 |             disp(ix)
40 |         end
41 |         sceneId = sceneIds{ix};
42 |         mkdirOptional(fullfile(saveDir, sceneId));
43 |         imgsAll = getFileNamesFromDirectory(fullfile(suncgDir, 'renderings_node', sceneId),'types',{'.png'});
44 | 
45 |         for cameraId=1:length(imgsAll)
46 |             saveFile = fullfile(saveDir, sceneId, sprintf('%06d_proposals.mat', cameraId-1));
47 |             if exist(saveFile, 'file')
48 |                 continue
49 |             end
50 |             if ~ismember(sprintf('%s/%06d', sceneId, cameraId-1), fileNamesAll)
51 |                 continue
52 |             end
53 |             img_file = fullfile(suncgDir, 'renderings_ldr', sceneId, sprintf('%06d_mlt.png', cameraId-1));
54 |             nodeFile = fullfile(nodeDir, sceneId, sprintf('%06d_bboxes.mat', cameraId-1));
55 | 
56 |             if ~exist(img_file, 'file')
57 |                 % Bad file
58 |                 disp(img_file);
59 |                 continue
60 |             end
61 | 
62 |             % disp(saveFile);
63 |             img = imread(img_file);
64 |             var = load(nodeFile);
65 |             prop=edgeBoxes(img,model,opts);
66 |             proposals = prop;
67 |             proposals(:,3:4) = proposals(:,3:4) + proposals(:,1:2);
68 |             
69 |             overlaps = bboxOverlap(var.bboxes, proposals(:,1:4));
70 |             overlapsGt = (max(overlaps, [], 2) > 0.7);
71 |             [overlapsProposals, gtInds] = max(overlaps, [], 1);
72 | 
73 |             saveFunc(saveFile, proposals, overlapsProposals, gtInds);
74 |         end
75 |     end
76 | end
77 | 
78 | function saveFunc(filename, proposals, overlaps, gtInds)
79 |     save(filename,'proposals', 'overlaps', 'gtInds');
80 | end


--------------------------------------------------------------------------------
/nnutils/test_utils.py:
--------------------------------------------------------------------------------
  1 | """Generic Testing Utils.
  2 | """
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | import torch
  8 | import os
  9 | import os.path as osp
 10 | import time
 11 | import pdb
 12 | from absl import flags
 13 | 
 14 | import scipy.misc
 15 | from ..utils.visualizer import Visualizer
 16 | 
 17 | #-------------- flags -------------#
 18 | #----------------------------------#
 19 | ## Flags for training
 20 | curr_path = osp.dirname(osp.abspath(__file__))
 21 | cache_path = osp.join(curr_path, '..', 'cachedir')
 22 | 
 23 | flags.DEFINE_string('name', 'exp_name', 'Experiment Name')
 24 | flags.DEFINE_string('cache_dir', cache_path, 'Cachedir')
 25 | flags.DEFINE_string('eval_set', 'val', 'which set to evaluate on')
 26 | flags.DEFINE_integer('gpu_id', 0, 'Which gpu to use')
 27 | 
 28 | flags.DEFINE_integer('batch_size', 4, 'Size of minibatches')
 29 | flags.DEFINE_integer('num_train_epoch', 0, 'Number of training iterations')
 30 | flags.DEFINE_integer('n_data_workers', 4, 'Number of data loading workers')
 31 | 
 32 | 
 33 | ## Flags for logging and snapshotting
 34 | flags.DEFINE_string('checkpoint_dir', osp.join(cache_path, 'snapshots'),
 35 |                     'Directory where networks are saved')
 36 | flags.DEFINE_string(
 37 |     'results_vis_dir', osp.join(cache_path, 'results_vis'),
 38 |     'Directory where intermittent results will be saved')
 39 | flags.DEFINE_string(
 40 |     'results_eval_dir', osp.join(cache_path, 'evaluation'),
 41 |     'Directory where evaluation results will be saved')
 42 | 
 43 | flags.DEFINE_boolean('save_visuals', False, 'Whether to save intermittent visuals')
 44 | flags.DEFINE_integer('visuals_freq', 50, 'Save visuals every few forward passes')
 45 | flags.DEFINE_integer('max_eval_iter', 0, 'Maximum evaluation iterations. 0 => 1 epoch.')
 46 | 
 47 | #-------- tranining class ---------#
 48 | #----------------------------------#
 49 | class Tester():
 50 |     def __init__(self, opts):
 51 |         self.opts = opts
 52 |         self.vis_iter = 0
 53 |         self.gpu_id = opts.gpu_id
 54 |         self.Tensor = torch.cuda.FloatTensor if (self.gpu_id is not None) else torch.Tensor
 55 |         self.invalid_batch = False #the trainer can optionally reset this every iteration during set_input call
 56 |         self.save_dir = os.path.join(opts.checkpoint_dir, opts.name)
 57 |         if not os.path.exists(self.save_dir):
 58 |             os.makedirs(self.save_dir)
 59 |         log_file = os.path.join(self.save_dir, 'opts_testing.log')
 60 |         with open(log_file, 'w') as f:
 61 |             for k in dir(opts):
 62 |                 f.write('{}: {}\n'.format(k, opts.__getattr__(k)))
 63 | 
 64 |     # helper loading function that can be used by subclasses
 65 |     def load_network(self, network, network_label, epoch_label, network_dir=None):
 66 |         save_filename = '{}_net_{}.pth'.format(network_label, epoch_label)
 67 |         if network_dir is None:
 68 |             network_dir = self.save_dir
 69 |         save_path = os.path.join(network_dir, save_filename)
 70 |         network.load_state_dict(torch.load(save_path))
 71 |         return
 72 | 
 73 |     def save_current_visuals(self):
 74 |         visuals = self.get_current_visuals()
 75 |         imgs_dir = osp.join(self.opts.results_vis_dir, 'vis_iter_{}'.format(self.vis_iter))
 76 |         if not os.path.exists(imgs_dir):
 77 |             os.makedirs(imgs_dir)
 78 |         for k in visuals:
 79 |             img_path = osp.join(imgs_dir, k + '.png')
 80 |             scipy.misc.imsave(img_path, visuals[k])
 81 |         self.vis_iter += 1
 82 |         
 83 |     def define_model(self):
 84 |         '''Should be implemented by the child class.'''
 85 |         raise NotImplementedError
 86 | 
 87 |     def init_dataset(self):
 88 |         '''Should be implemented by the child class.'''
 89 |         raise NotImplementedError
 90 | 
 91 |     def set_input(self, batch):
 92 |         '''Should be implemented by the child class.'''
 93 |         raise NotImplementedError
 94 | 
 95 |     def init_testing(self):
 96 |         opts = self.opts
 97 |         self.define_model()
 98 |         self.init_dataset()
 99 | 
100 |     def test(self):
101 |         '''Should be implemented by the child class.'''
102 |         raise NotImplementedError
103 | 


--------------------------------------------------------------------------------
/benchmark/suncg/pr_plots.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | matplotlib.use('Agg')
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import json
  7 | import os
  8 | import os.path as osp
  9 | import platform
 10 | 
 11 | eval_set = 'val'
 12 | net_name = 'dwr_shape_ft'
 13 | 
 14 | curr_path = osp.dirname(osp.abspath(__file__))
 15 | cache_path = osp.join(curr_path, '..', '..', 'cachedir')
 16 | plots_dir = os.path.join(cache_path, 'evaluation', 'icp', eval_set, 'plots')
 17 | 
 18 | def subplots(plt, Y_X, sz_y_sz_x=(10,10)):
 19 |   Y,X = Y_X
 20 |   sz_y,sz_x = sz_y_sz_x
 21 |   plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y)
 22 |   fig, axes = plt.subplots(Y, X)
 23 |   plt.subplots_adjust(wspace=0.1, hspace=0.1)
 24 |   return fig, axes
 25 | 
 26 | def pr_plots(net_name, iter_number, set_number):
 27 |   dir_name = os.path.join(cache_path, 'evaluation', 'dwr')
 28 |   json_file = os.path.join(dir_name, set_number, net_name, 'eval_set{}_0.json'.format(set_number))
 29 | 
 30 |   with open(json_file, 'rt') as f:
 31 |     a = json.load(f)
 32 |   imset = a['eval_params']['set'].title()
 33 | 
 34 |   plot_file = os.path.join(dir_name, set_number, net_name, 'eval_set{}_0_back.pdf'.format(set_number))
 35 |   print('Saving plot to {}'.format(osp.abspath(plot_file)))
 36 |   # Plot 1 with AP for all, and minus other things one at a time.
 37 |   #with sns.axes_style("darkgrid"):
 38 |   with plt.style.context('fivethirtyeight'):
 39 |     fig, axes = subplots(plt, (1,1), (7,7))
 40 |     ax = axes
 41 |     legs = []
 42 |     i_order = [0, 1, 2, 3, 5, 4]
 43 |     # for i in np.arange(6, 12):
 44 |     for jx in range(6):
 45 |       i = i_order[jx]
 46 |       prec = np.array(a['bench_summary'][i]['prec'])
 47 |       rec = np.array(a['bench_summary'][i]['rec'])
 48 |       if i == 0:
 49 |         ax.plot(rec, prec, '-')
 50 |         legs.append('{:4.1f} {:s}'.format(100*a['bench_summary'][i]['ap'], a['eval_params']['ap_str'][i]))
 51 |       else:
 52 |         ax.plot(rec, prec, '--')
 53 |         legs.append('{:4.1f}   {:s}'.format(100*a['bench_summary'][i]['ap'], a['eval_params']['ap_str'][i]))
 54 |     ax.set_xlim([0, 1]); ax.set_ylim([0, 1]);
 55 |     ax.set_xlabel('Recall', fontsize=20)
 56 |     ax.set_ylabel('Precision', fontsize=20)
 57 |     ax.set_title('Precision Recall Plots on {:s} Set'.format(imset), fontsize=20)
 58 | 
 59 |     l = ax.legend(legs, fontsize=18, bbox_to_anchor=(0,0), loc='lower left', framealpha=0.5, frameon=True)
 60 | 
 61 |     ax.plot([0,1], [0,0], 'k-')
 62 |     ax.plot([0,0], [0,1], 'k-')
 63 |     plt.tick_params(axis='both', which='major', labelsize=20)
 64 |     extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
 65 |     plt.savefig(plot_file, bbox_inches='tight')
 66 |     plt.close(fig)
 67 | 
 68 |   plot_file = os.path.join(dir_name, set_number, net_name, 'eval_set{}_0_frwd.pdf'.format(set_number))
 69 |   print('Saving plot to {}'.format(osp.abspath(plot_file)))
 70 | 
 71 |   with plt.style.context('fivethirtyeight'):
 72 |     fig, axes = subplots(plt, (1,1), (7,7))
 73 |     ax = axes
 74 |     legs = []
 75 |     i_order = [6, 9, 7, 8, 10, 11]
 76 |     # for i in np.arange(6, 12):
 77 |     for jx in range(6):
 78 |       i = i_order[jx]
 79 |       prec = np.array(a['bench_summary'][i]['prec'])
 80 |       rec = np.array(a['bench_summary'][i]['rec'])
 81 |       if i == 6:
 82 |         ax.plot(rec, prec, '-')
 83 |         legs.append('{:4.1f} {:s}'.format(100*a['bench_summary'][i]['ap'], a['eval_params']['ap_str'][i]))
 84 |       else:
 85 |         ax.plot(rec, prec, '--')
 86 |         str_ = '+'+'+'.join(a['eval_params']['ap_str'][i].split('+')[1:])
 87 |         legs.append('{:4.1f}   {:s}'.format(100*a['bench_summary'][i]['ap'], str_))
 88 |     ax.set_xlim([0, 1]); ax.set_ylim([0, 1]);
 89 |     ax.set_xlabel('Recall', fontsize=20)
 90 |     ax.set_ylabel('Precision', fontsize=20)
 91 |     ax.set_title('Precision Recall Plots on {:s} Set'.format(imset), fontsize=20)
 92 | 
 93 |     l = ax.legend(legs, fontsize=18, bbox_to_anchor=(0,0), loc='lower left', framealpha=0.5, frameon=True)
 94 |     ax.plot([0,1], [0,0], 'k-')
 95 |     ax.plot([0,0], [0,1], 'k-')
 96 |     plt.tick_params(axis='both', which='major', labelsize=20)
 97 |     extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
 98 |     plt.savefig(plot_file, bbox_inches='tight')
 99 |     plt.close(fig)
100 | 
101 | if __name__ == '__main__':
102 |   pr_plots(net_name, 0, eval_set)
103 | 


--------------------------------------------------------------------------------
/benchmark/suncg/sc_plots.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import matplotlib
  6 | matplotlib.use('Agg')
  7 | import seaborn as sns
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | import numpy as np
 11 | import json
 12 | import os
 13 | import os.path as osp
 14 | import scipy.io
 15 | 
 16 | eval_set = 'val'
 17 | netName = 'dwr_shape_ft'
 18 | 
 19 | curr_path = osp.dirname(osp.abspath(__file__))
 20 | cache_path = osp.join(curr_path, '..', '..', 'cachedir')
 21 | plots_dir = os.path.join(cache_path, 'evaluation', 'icp', eval_set, 'plots')
 22 | 
 23 | if not os.path.exists(plots_dir):
 24 |     os.makedirs(plots_dir)
 25 | 
 26 | def subplots(plt, Y_X, sz_y_sz_x=(10,10)):
 27 |     Y,X = Y_X
 28 |     sz_y,sz_x = sz_y_sz_x
 29 |     plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y)
 30 |     fig, axes = plt.subplots(Y, X)
 31 |     plt.subplots_adjust(wspace=0.1, hspace=0.1)
 32 |     return fig, axes
 33 | 
 34 | def toNpArray(matVar):
 35 |     out = np.zeros(len(matVar))
 36 |     for i in range(len(matVar)):
 37 |         out[i] = matVar[i][0]
 38 |     return out
 39 | 
 40 | def plotExperiment(expName, errors, representationNames, xLeg, varName, maxRange=1):
 41 |     with plt.style.context('fivethirtyeight'):
 42 |         fig, axes = subplots(plt, (1,1), (6,6))
 43 |         ax = axes
 44 | 
 45 |         legs = []
 46 |         for i in range(len(representationNames)):
 47 |             repName = representationNames[i]
 48 |             perf = np.sort(errors[varName][i, :])
 49 |             perf = perf[~np.isnan(perf)]
 50 |             perf = perf[perf < 1e6]
 51 |             medVal = np.median(perf)
 52 |             percentile = np.linspace(0,1,np.size(perf,0))
 53 |             ax.plot(percentile, perf, '-')
 54 |             legs.append('{:s}'.format(repName))
 55 |         ax.set_ylim([0, maxRange]); ax.set_xlim([0, 1]);
 56 |         ax.set_ylabel(xLeg, fontsize=20)
 57 |         ax.set_xlabel('Fraction of Data', fontsize=20)
 58 |         ax.set_title(expName, fontsize=20)
 59 | 
 60 |         l = ax.legend(legs, title="Scene Representations:", fontsize=14, bbox_to_anchor=(0,1), loc='upper left', framealpha=0.5, frameon=True)
 61 | 
 62 |         ax.plot([0,0], [0,maxRange], 'k-')
 63 |         ax.plot([0,1], [0,0], 'k-')
 64 |         plt.tick_params(axis='both', which='major', labelsize=20)
 65 |         extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
 66 |         plot_file = os.path.join(plots_dir, varName + '.pdf')
 67 |         plt.savefig(plot_file, bbox_inches='tight')
 68 |         plt.close(fig)
 69 |         
 70 | resultsDir = os.path.join(cache_path, 'evaluation', 'icp', eval_set, netName)
 71 | matFile = os.path.join(resultsDir, 'results.mat')
 72 | results = scipy.io.loadmat(matFile)
 73 | 
 74 | ########################################
 75 | ##############  Objects  ###############
 76 | expName = 'Object Representation Ability'
 77 | representationNames = ['Factored (ours)', 'Depth', 'Voxels']
 78 | xLeg = 'Scale-normalized Mean Squared Error'
 79 | varName = 'object_eval'
 80 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=1e-2)
 81 | 
 82 | ########################################
 83 | ###############  Depth  ################
 84 | expName = 'Depth Representation Ability'
 85 | representationNames = ['Factored (ours)', 'Depth', 'Voxels']
 86 | xLeg = 'Mean Squared Error (in $m^2$)'
 87 | varName = 'depth_eval'
 88 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=8e-1)
 89 | 
 90 | ########################################
 91 | ##############  Voxels  ################
 92 | expName = 'Volume Representation Ability'
 93 | representationNames = ['Factored (ours)', 'Depth', 'Voxels']
 94 | xLeg = 'IoU (Higher is better)'
 95 | varName = 'volume_overlap_eval'
 96 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=1)
 97 | 
 98 | ########################################
 99 | ##########  Visible Layout  ############
100 | expName = 'Visible Layout Representation Ability'
101 | representationNames = ['Factored (ours)', 'Depth', 'Voxels']
102 | xLeg = 'Mean Squared Error (in $m^2$)'
103 | varName = 'layout_eval'
104 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=5e-1)
105 | 
106 | ########################################
107 | ##########  Amodal Layout  #############
108 | expName = 'Amodal Layout Representation Ability'
109 | representationNames = ['Factored (ours)', 'Depth', 'Voxels']
110 | xLeg = 'Mean Squared Error (in $m^2$)'
111 | varName = 'layout_amodal_eval'
112 | plotExperiment(expName, results, representationNames, xLeg, varName, maxRange=8e-1)
113 | 
114 | print('Plots saved in {}'.format(osp.abspath(plots_dir)))


--------------------------------------------------------------------------------
/experiments/suncg/layout.py:
--------------------------------------------------------------------------------
  1 | """Script for layout prediction predictor experiment.
  2 | """
  3 | # example usage (depth baseline) : python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=depth_baseline --display_freq=2000 --suncg_dl_out_layout=false --suncg_dl_out_depth=true --display_id=20
  4 | 
  5 | # example usage (layout prediction) : python -m factored3d.experiments.suncg.layout --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=layout_pred --display_freq=2000 --suncg_dl_out_layout=true --suncg_dl_out_depth=false --display_id=40
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | from absl import app
 11 | from absl import flags
 12 | import os
 13 | import os.path as osp
 14 | import numpy as np
 15 | import torch
 16 | from torch.autograd import Variable
 17 | import time
 18 | import pdb
 19 | 
 20 | from ...data import suncg as suncg_data
 21 | from ...utils import suncg_parse
 22 | from ...nnutils import train_utils
 23 | from ...nnutils import disp_net
 24 | from ...utils import visutil
 25 | from ...renderer import utils as render_utils
 26 | 
 27 | FLAGS = flags.FLAGS
 28 | 
 29 | class LayoutTrainer(train_utils.Trainer):
 30 |     def define_model(self):
 31 |         self.model = disp_net.dispnet().cuda(device_id=self.opts.gpu_id)
 32 |         if self.opts.num_pretrain_epochs > 0:
 33 |             self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs-1)
 34 |         return
 35 | 
 36 |     def init_dataset(self):
 37 |         opts = self.opts
 38 |         split_dir = osp.join(opts.suncg_dir, 'splits')
 39 |         self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera')))
 40 |         self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts)
 41 | 
 42 |     def define_criterion(self):
 43 |         self.criterion = torch.nn.L1Loss().cuda(device_id=self.opts.gpu_id)
 44 | 
 45 |     def set_input(self, batch):
 46 |         opts = self.opts
 47 |         img_tensor = batch['img'].type(torch.FloatTensor)
 48 | 
 49 |         # batch_size=1 messes with batch norm
 50 |         self.invalid_batch = (img_tensor.size(0) == 1)
 51 | 
 52 |         if self.invalid_batch:
 53 |             return
 54 |         else:
 55 |             self.input_imgs = Variable(
 56 |                 img_tensor.cuda(device=self.opts.gpu_id), requires_grad=False)
 57 | 
 58 |             if opts.suncg_dl_out_layout:
 59 |                 trg_tensor = batch['layout']
 60 |             else:
 61 |                 assert(opts.suncg_dl_out_depth)
 62 |                 trg_tensor = batch['depth']
 63 | 
 64 |             self.trg_layout = Variable(
 65 |                 trg_tensor.type(torch.FloatTensor).cuda(device=self.opts.gpu_id), requires_grad=False)
 66 | 
 67 |     def forward(self):
 68 |         self.pred_layout = self.model.forward(self.input_imgs)
 69 |         self.total_loss = self.criterion.forward(self.pred_layout, self.trg_layout)
 70 | 
 71 |     def get_current_points(self):
 72 |         pts_dict = {}
 73 |         #for b in range(self.opts.batch_size):
 74 |         for b in range(1):
 75 |             dmap_gt = self.trg_layout.data[b].cpu().numpy().transpose((1,2,0))
 76 |             dmap_pred = self.pred_layout.data[b].cpu().numpy().transpose((1,2,0))
 77 |             keys = ['gt_layout_' + str(b), 'pred_layout_' + str(b)]
 78 |             dmaps = [dmap_gt, dmap_pred]
 79 |             min_disp = 1e-2
 80 |             for kx in range(2):
 81 |                 dmap_points = render_utils.dispmap_to_points(
 82 |                     dmaps[kx],
 83 |                     suncg_parse.cam_intrinsic(),
 84 |                     scale_x=self.opts.layout_width/640,
 85 |                     scale_y=self.opts.layout_height/480,
 86 |                     min_disp = min_disp
 87 |                 )
 88 |                 pts_dict[keys[kx]] = dmap_points
 89 |                 if kx == 0:
 90 |                     min_disp = 0.8/np.max(dmap_points[:, 2])
 91 | 
 92 |         return pts_dict
 93 | 
 94 |     def get_current_visuals(self):
 95 |         return {
 96 |             'img':visutil.tensor2im(self.input_imgs.data),
 97 |             'gt_layout':visutil.tensor2im(self.trg_layout.data),
 98 |             'pred_layout':visutil.tensor2im(self.pred_layout.data)
 99 |         }
100 | 
101 |     def get_current_scalars(self):
102 |         return {'total_loss': self.smoothed_total_loss, 'total_loss_repeat': self.smoothed_total_loss}
103 | 
104 | def main(_):
105 |     FLAGS.suncg_dl_out_codes = False
106 |     FLAGS.suncg_dl_out_fine_img = False
107 |     FLAGS.suncg_dl_out_voxels = False
108 |     torch.manual_seed(0)
109 |     trainer = LayoutTrainer(FLAGS)
110 |     trainer.init_training()
111 |     trainer.train()
112 | 
113 | if __name__ == '__main__':
114 |     app.run(main)


--------------------------------------------------------------------------------
/benchmark/suncg/evaluate_detection.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) 2015, Saurabh Gupta
  3 | #
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # ---------------------------------------------------------
  6 | from ...utils import bbox_utils
  7 | import numpy as np
  8 | 
  9 | def inst_bench_image(dt, gt, bOpts, overlap = None):
 10 | 
 11 |   nDt = len(dt['sc'])
 12 |   nGt = len(gt['diff'])
 13 |   numInst = np.sum(gt['diff'] == False)
 14 | 
 15 |   if overlap is None:
 16 |     overlap = bbox_utils.bbox_overlaps(dt['boxInfo'].astype(np.float), gt['boxInfo'].astype(np.float))
 17 |   # assert(issorted(-dt.sc), 'Scores are not sorted.\n');
 18 |   sc = dt['sc'];
 19 | 
 20 |   det    = np.zeros((nGt,1)).astype(np.bool)
 21 |   tp     = np.zeros((nDt,1)).astype(np.bool)
 22 |   fp     = np.zeros((nDt,1)).astype(np.bool)
 23 |   dupDet = np.zeros((nDt,1)).astype(np.bool)
 24 |   instId = np.zeros((nDt,1)).astype(np.int32)
 25 |   ov     = np.zeros((nDt,1)).astype(np.float32)
 26 | 
 27 |   # Walk through the detections in decreasing score
 28 |   # and assign tp, fp, fn, tn labels
 29 |   for i in xrange(nDt):
 30 |     # assign detection to ground truth object if any
 31 |     if nGt > 0:
 32 |       maxOverlap = overlap[i,:].max(); maxInd = overlap[i,:].argmax();
 33 |       instId[i] = maxInd; ov[i] = maxOverlap;
 34 |     else:
 35 |       maxOverlap = 0; instId[i] = -1; maxInd = -1;
 36 |     # assign detection as true positive/don't care/false positive
 37 |     if maxOverlap >= bOpts['minoverlap']:
 38 |       if gt['diff'][maxInd] == False:
 39 |         if det[maxInd] == False:
 40 |           # true positive
 41 |           tp[i] = True;
 42 |           det[maxInd] = True;
 43 |         else:
 44 |           # false positive (multiple detection)
 45 |           fp[i] = True;
 46 |           dupDet[i] = True;
 47 |     else:
 48 |       # false positive
 49 |       fp[i] = True;
 50 |   return tp, fp, sc, numInst, dupDet, instId, ov
 51 | 
 52 | 
 53 | def inst_bench(dt, gt, bOpts, tp=None, fp=None, score=None, numInst=None):
 54 |   """
 55 |   ap, rec, prec, npos, details = inst_bench(dt, gt, bOpts, tp = None, fp = None, sc = None, numInst = None)
 56 |   dt  - a list with a dict for each image and with following fields
 57 |     .boxInfo - info that will be used to cpmpute the overlap with ground truths, a list
 58 |     .sc - score
 59 |   gt
 60 |     .boxInfo - info used to compute the overlap,  a list
 61 |     .diff - a logical array of size nGtx1, saying if the instance is hard or not
 62 |   bOpt
 63 |     .minoverlap - the minimum overlap to call it a true positive
 64 |   [tp], [fp], [sc], [numInst]
 65 |       Optional arguments, in case the inst_bench_image is being called outside of this function
 66 |   """
 67 |   details = None
 68 |   if tp is None:
 69 |     # We do not have the tp, fp, sc, and numInst, so compute them from the structures gt, and out
 70 |     tp = []; fp = []; numInst = []; score = []; dupDet = []; instId = []; ov = [];
 71 |     for i in range(len(gt)):
 72 |       # Sort dt by the score
 73 |       sc = dt[i]['sc']
 74 |       bb = dt[i]['boxInfo']
 75 |       ind = np.argsort(sc, axis = 0);
 76 |       ind = ind[::-1]
 77 |       if len(ind) > 0:
 78 |         sc = np.vstack((sc[i,:] for i in ind))
 79 |         bb = np.vstack((bb[i,:] for i in ind))
 80 |       else:
 81 |         sc = np.zeros((0,1)).astype(np.float)
 82 |         bb = np.zeros((0,4)).astype(np.float)
 83 | 
 84 |       dtI = dict({'boxInfo': bb, 'sc': sc})
 85 |       tp_i, fp_i, sc_i, numInst_i, dupDet_i, instId_i, ov_i = inst_bench_image(dtI, gt[i], bOpts)
 86 |       tp.append(tp_i); fp.append(fp_i); score.append(sc_i); numInst.append(numInst_i);
 87 |       dupDet.append(dupDet_i); instId.append(instId_i); ov.append(ov_i);
 88 |     details = {'tp': list(tp), 'fp': list(fp), 'score': list(score), 'dupDet': list(dupDet),
 89 |       'numInst': list(numInst), 'instId': list(instId), 'ov': list(ov)}
 90 | 
 91 |   tp = np.vstack(tp[:])
 92 |   fp = np.vstack(fp[:])
 93 |   sc = np.vstack(score[:])
 94 | 
 95 |   cat_all = np.hstack((tp,fp,sc))
 96 |   ind = np.argsort(cat_all[:,2])
 97 |   cat_all = cat_all[ind[::-1],:]
 98 |   tp = np.cumsum(cat_all[:,0], axis = 0);
 99 |   fp = np.cumsum(cat_all[:,1], axis = 0);
100 |   thresh = cat_all[:,2];
101 |   npos = np.sum(numInst, axis = 0);
102 | 
103 |   # Compute precision/recall
104 |   rec = tp / npos;
105 |   prec = np.divide(tp, (fp+tp));
106 |   ap = VOCap(rec, prec);
107 |   return ap, rec, prec, npos, details
108 | 
109 | def VOCap(rec, prec):
110 |   rec = rec.reshape(rec.size,1); prec = prec.reshape(prec.size,1)
111 |   z = np.zeros((1,1)); o = np.ones((1,1));
112 |   mrec = np.vstack((z, rec, o))
113 |   mpre = np.vstack((z, prec, z))
114 |   for i in range(len(mpre)-2, -1, -1):
115 |     mpre[i] = max(mpre[i], mpre[i+1])
116 | 
117 |   I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
118 |   ap = 0;
119 |   for i in I:
120 |     ap = ap + (mrec[i] - mrec[i-1])*mpre[i];
121 |   return ap
122 | 


--------------------------------------------------------------------------------
/experiments/suncg/voxels.py:
--------------------------------------------------------------------------------
  1 | """Script for scene level voxels prediction experiment.
  2 | """
  3 | # example usage : python -m factored3d.experiments.suncg.voxels --plot_scalars --display_visuals --save_epoch_freq=1 --batch_size=8 --name=voxels_baseline --display_freq=2000
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | from absl import app
  8 | from absl import flags
  9 | import os
 10 | import os.path as osp
 11 | import numpy as np
 12 | import scipy.misc
 13 | import torch
 14 | from torch.autograd import Variable
 15 | import time
 16 | import pdb
 17 | 
 18 | from ...data import suncg as suncg_data
 19 | from ...nnutils import train_utils
 20 | from ...nnutils import voxel_net
 21 | from ...utils import visutil
 22 | from ...utils import suncg_parse
 23 | from ...renderer import utils as render_utils
 24 | 
 25 | curr_path = osp.dirname(osp.abspath(__file__))
 26 | cache_path = osp.join(curr_path, '..', '..', 'cachedir')
 27 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved')
 28 | 
 29 | FLAGS = flags.FLAGS
 30 | 
 31 | class VoxelTrainer(train_utils.Trainer):
 32 |     def define_model(self):
 33 |         opts = self.opts
 34 |         self.model = voxel_net.VoxelNet(
 35 |             [opts.img_height, opts.img_width],
 36 |             [opts.voxels_width, opts.voxels_height, opts.voxels_depth],
 37 |             nz_voxels=opts.nz_voxels,
 38 |             n_voxels_upconv=opts.n_voxels_upconv
 39 |         )
 40 |         if self.opts.num_pretrain_epochs > 0:
 41 |             self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs)
 42 |         self.model = self.model.cuda(device_id=self.opts.gpu_id)
 43 |         return
 44 | 
 45 |     def init_dataset(self):
 46 |         opts = self.opts
 47 |         split_dir = osp.join(opts.suncg_dir, 'splits')
 48 |         self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera')))
 49 |         self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts)
 50 | 
 51 |     def define_criterion(self):
 52 |         self.criterion = torch.nn.BCEWithLogitsLoss().cuda(device_id=self.opts.gpu_id)
 53 | 
 54 |     def set_input(self, batch):
 55 |         opts = self.opts
 56 |         img_tensor = batch['img'].type(torch.FloatTensor)
 57 | 
 58 |         # batch_size=1 messes with batch norm
 59 |         self.invalid_batch = (img_tensor.size(0) == 1)
 60 | 
 61 |         if self.invalid_batch:
 62 |             return
 63 |         else:
 64 |             self.input_imgs = Variable(
 65 |                 img_tensor.cuda(device=self.opts.gpu_id), requires_grad=False)
 66 | 
 67 |             trg_tensor = batch['voxels'].unsqueeze(1)
 68 |             self.trg_voxels = Variable(
 69 |                 trg_tensor.type(torch.FloatTensor).cuda(device=self.opts.gpu_id), requires_grad=False)
 70 | 
 71 |     def forward(self):
 72 |         self.pred_voxels = self.model.forward(self.input_imgs)
 73 |         self.total_loss = self.criterion.forward(self.pred_voxels, self.trg_voxels)
 74 | 
 75 |     def render_voxels(self, voxels, prefix='mesh'):
 76 |         opts = self.opts
 77 |         voxels = voxels.data.cpu()[0,0].numpy()
 78 | 
 79 |         mesh_dir = osp.join(opts.rendering_dir, opts.name)
 80 |         if not os.path.exists(mesh_dir):
 81 |             os.makedirs(mesh_dir)
 82 | 
 83 |         mesh_file = osp.join(mesh_dir, prefix + '.obj')
 84 |         vs, fs = render_utils.voxels_to_mesh(voxels.astype(np.float32))
 85 |         vs[:,0] -= voxels.shape[0]/2.0
 86 |         vs[:,1] -= voxels.shape[1]/2.0
 87 |         vs *= 0.04*(64//opts.voxels_height)
 88 |         fout = open(mesh_file, 'w')
 89 |         render_utils.append_obj(fout, vs, fs)
 90 |         fout.close()
 91 | 
 92 |         png_dir = mesh_file.replace('.obj', '/')
 93 |         render_utils.render_mesh(mesh_file, png_dir)
 94 | 
 95 |         return scipy.misc.imread(osp.join(png_dir, prefix + '_render_000.png'))
 96 | 
 97 |     def get_current_visuals(self):
 98 |         visuals = {
 99 |             'img':visutil.tensor2im(self.input_imgs.data)
100 |         }
101 |         visuals['voxels_gt'] = self.render_voxels(self.trg_voxels, prefix='gt')
102 |         visuals['voxels_pred'] = self.render_voxels(
103 |             torch.nn.functional.sigmoid(self.pred_voxels), prefix='pred')
104 |         return visuals
105 |     
106 |     def get_current_points(self):
107 |         return {}
108 | 
109 |     def get_current_scalars(self):
110 |         return {'total_loss': self.smoothed_total_loss, 'total_loss_repeat': self.smoothed_total_loss}
111 | 
112 | def main(_):
113 |     FLAGS.suncg_dl_out_codes = False
114 |     FLAGS.suncg_dl_out_fine_img = False
115 |     FLAGS.suncg_dl_out_voxels = True
116 |     FLAGS.suncg_dl_out_layout = False
117 |     FLAGS.suncg_dl_out_depth = False
118 |     torch.manual_seed(0)
119 |     trainer = VoxelTrainer(FLAGS)
120 |     trainer.init_training()
121 |     trainer.train()
122 | 
123 | if __name__ == '__main__':
124 |     app.run()


--------------------------------------------------------------------------------
/nnutils/disp_net.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Inverse depth prediction net.
  3 | Code based on https://github.com/ClementPinard/dispNetPytorch/
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import math
  8 | from . import net_blocks as nb
  9 | 
 10 | def predict_disp(in_planes):
 11 |     return nn.Conv2d(in_planes,1,kernel_size=3,stride=1,padding=1,bias=False)
 12 | 
 13 | class DispNet(nn.Module):
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, batch_norm=True):
 17 |         super(DispNet, self).__init__()
 18 | 
 19 |         self.batch_norm = batch_norm
 20 |         self.conv1   = nb.conv2d(self.batch_norm,   3,   64, kernel_size=7, stride=2)
 21 |         self.conv2   = nb.conv2d(self.batch_norm,  64,  128, kernel_size=5, stride=2)
 22 |         self.conv3   = nb.conv2d(self.batch_norm, 128,  256, kernel_size=5, stride=2)
 23 |         self.conv3_1 = nb.conv2d(self.batch_norm, 256,  256)
 24 |         self.conv4   = nb.conv2d(self.batch_norm, 256,  512, stride=2)
 25 |         self.conv4_1 = nb.conv2d(self.batch_norm, 512,  512)
 26 |         self.conv5   = nb.conv2d(self.batch_norm, 512,  512, stride=2)
 27 |         self.conv5_1 = nb.conv2d(self.batch_norm, 512,  512)
 28 |         self.conv6   = nb.conv2d(self.batch_norm, 512, 1024, stride=2)
 29 |         self.conv6_1 = nb.conv2d(self.batch_norm,1024, 1024)
 30 | 
 31 |         self.deconv5 = nb.deconv2d(1024,512)
 32 |         self.deconv4 = nb.deconv2d(1025,256)
 33 |         self.deconv3 = nb.deconv2d(769,128)
 34 |         self.deconv2 = nb.deconv2d(385,64)
 35 |         self.deconv1 = nb.deconv2d(193,64)
 36 | 
 37 |         self.predict_disp6 = predict_disp(1024)
 38 |         self.predict_disp5 = predict_disp(1025)
 39 |         self.predict_disp4 = predict_disp(769)
 40 |         self.predict_disp3 = predict_disp(385)
 41 |         self.predict_disp2 = predict_disp(193)
 42 |         self.predict_disp1 = predict_disp(129)
 43 | 
 44 |         self.upsampled_disp6_to_5 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False)
 45 |         self.upsampled_disp5_to_4 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False)
 46 |         self.upsampled_disp4_to_3 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False)
 47 |         self.upsampled_disp3_to_2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False)
 48 |         self.upsampled_disp2_to_1 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False)
 49 | 
 50 |         for m in self.modules():
 51 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 52 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 53 |                 m.weight.data.normal_(0, 0.02 / n) #this modified initialization seems to work better, but it's very hacky
 54 |                 if m.bias is not None:
 55 |                     m.bias.data.zero_()
 56 |             elif isinstance(m, nn.BatchNorm2d):
 57 |                 m.weight.data.fill_(1)
 58 |                 m.bias.data.zero_()
 59 | 
 60 |     def forward(self, x):
 61 |         out_conv1 = self.conv1(x)
 62 |         out_conv2 = self.conv2(out_conv1)
 63 |         out_conv3 = self.conv3_1(self.conv3(out_conv2))
 64 |         out_conv4 = self.conv4_1(self.conv4(out_conv3))
 65 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
 66 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
 67 | 
 68 |         disp6       = self.predict_disp6(out_conv6)
 69 |         disp6_up    = self.upsampled_disp6_to_5(disp6)
 70 |         out_deconv5 = self.deconv5(out_conv6)
 71 | 
 72 |         concat5 = torch.cat((out_conv5,out_deconv5,disp6_up),1)
 73 |         disp5       = self.predict_disp5(concat5)
 74 |         disp5_up    = self.upsampled_disp5_to_4(disp5)
 75 |         out_deconv4 = self.deconv4(concat5)
 76 | 
 77 |         concat4 = torch.cat((out_conv4,out_deconv4,disp5_up),1)
 78 |         disp4       = self.predict_disp4(concat4)
 79 |         disp4_up    = self.upsampled_disp4_to_3(disp4)
 80 |         out_deconv3 = self.deconv3(concat4)
 81 | 
 82 |         concat3 = torch.cat((out_conv3,out_deconv3,disp4_up),1)
 83 |         disp3       = self.predict_disp3(concat3)
 84 |         disp3_up    = self.upsampled_disp3_to_2(disp3)
 85 |         out_deconv2 = self.deconv2(concat3)
 86 | 
 87 |         concat2 = torch.cat((out_conv2,out_deconv2,disp3_up),1)
 88 |         disp2 = self.predict_disp2(concat2)
 89 |         disp2_up    = self.upsampled_disp2_to_1(disp2)
 90 |         out_deconv1 = self.deconv1(concat2)
 91 | 
 92 |         concat1 = torch.cat((out_conv1,out_deconv1,disp2_up),1)
 93 |         disp1       = self.predict_disp1(concat1)
 94 | 
 95 |         if self.training:
 96 |             #return disp1,disp2,disp3,disp4,disp5,disp6
 97 |             return disp1
 98 |         else:
 99 |             return disp1
100 | 
101 | 
102 | def dispnet(path=None, batch_norm=True):
103 |     """dispNet model architecture.
104 | 
105 |     Args:
106 |         path : where to load pretrained network. will create a new one if not set
107 |     """
108 |     model = DispNet(batch_norm=batch_norm)
109 |     if path is not None:
110 |         data = torch.load(path)
111 |         if 'state_dict' in data.keys():
112 |             model.load_state_dict(data['state_dict'])
113 |         else:
114 |             model.load_state_dict(data)
115 |     return model


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/get_scene_vox.m:
--------------------------------------------------------------------------------
  1 | function [sceneVox] = get_scene_vox(pathToData,sceneId,floorId,roomId,extCam2World,objcategory)
  2 | % Notes: grid is Z up while the The loaded houses are Y up
  3 | % Adapted from the sscnet codebase - https://github.com/shurans/sscnet
  4 | 
  5 | volume_params;
  6 | ignore_classes = {'people', 'plants'};
  7 | % Compute voxel range in cam coordinates
  8 | voxOriginCam = - [voxSize(1)/2*voxUnit;voxSize(2)/2*voxUnit;0];
  9 | [gridPtsCamX,gridPtsCamY,gridPtsCamZ] = ndgrid(voxOriginCam(1):voxUnit:(voxOriginCam(1)+(voxSize(1)-1)*voxUnit), ...
 10 |                                                voxOriginCam(2):voxUnit:(voxOriginCam(2)+(voxSize(2)-1)*voxUnit), ...
 11 |                                                voxOriginCam(3):voxUnit:(voxOriginCam(3)+(voxSize(3)-1)*voxUnit));
 12 | gridPtsCam_init = [gridPtsCamX(:),gridPtsCamY(:),gridPtsCamZ(:)]'; %'
 13 | 
 14 | % Compute voxel grid centres in world coordinates
 15 | gridPtsWorld = bsxfun(@plus,extCam2World(1:3,1:3)*gridPtsCam_init, extCam2World(1:3,4));
 16 | gridPtsWorldX = gridPtsWorld(1,:);
 17 | gridPtsWorldY = gridPtsWorld(2,:);
 18 | gridPtsWorldZ = gridPtsWorld(3,:);
 19 | gridPtsLabel = zeros(1,size(gridPtsWorld,2));
 20 | 
 21 | house = loadjson(fullfile(pathToData,'house', sceneId,'house.json'));
 22 | roomStruct = house.levels{floorId}.nodes{roomId};
 23 | floorStruct = house.levels{floorId};
 24 | 
 25 | % find all grid in the room 
 26 | floorObj = read_wobj_safe([fullfile(pathToData,'room',sceneId,roomStruct.modelId) 'f.obj']);
 27 | inRoom = zeros(size(gridPtsWorldX));
 28 | for i = 1:length(floorObj.objects(3).data.vertices)
 29 |     faceId = floorObj.objects(3).data.vertices(i,:);
 30 |     floorP = floorObj.vertices(faceId,[1,3])';
 31 |     inRoom = inRoom|inpolygon(gridPtsWorldX,gridPtsWorldY,floorP(1,:),floorP(2,:)); %'
 32 | end
 33 | 
 34 | % find floor 
 35 | floorZ = mean(floorObj.vertices(:,2));
 36 | gridPtsObjWorldInd = inRoom(:)'&(abs(gridPtsWorld(3,:)-floorZ) <= voxUnit/2); %'
 37 | [~,classRootId] = getobjclassSUNCG('floor',objcategory);
 38 | gridPtsLabel(gridPtsObjWorldInd) = classRootId;  
 39 | 
 40 | % find ceiling 
 41 | ceilObj = read_wobj_safe([fullfile(pathToData,'room',sceneId,roomStruct.modelId) 'c.obj']);
 42 | ceilZ = mean(ceilObj.vertices(:,2));
 43 | gridPtsObjWorldInd = inRoom(:)'&abs(gridPtsWorld(3,:)-ceilZ) <= voxUnit/2; %'
 44 | [~,classRootId] = getobjclassSUNCG('ceiling',objcategory);
 45 | gridPtsLabel(gridPtsObjWorldInd) = classRootId;  
 46 | 
 47 | % Load walls
 48 | WallObj = read_wobj_safe([fullfile(pathToData,'room',sceneId,roomStruct.modelId) 'w.obj']);
 49 | inWall = zeros(size(gridPtsWorldX));
 50 | for oi = 1:length(WallObj.objects)
 51 |     if WallObj.objects(oi).type == 'f'
 52 |         for i = 1:length(WallObj.objects(oi).data.vertices)
 53 |             faceId = WallObj.objects(oi).data.vertices(i,:);
 54 |             floorP = WallObj.vertices(faceId,[1,3])'; %'
 55 |             inWall = inWall|inpolygon(gridPtsWorldX,gridPtsWorldY,floorP(1,:),floorP(2,:));
 56 |         end
 57 |     end
 58 | end
 59 | gridPtsObjWorldInd = inWall(:)'&(gridPtsWorld(3,:)<ceilZ-voxUnit/2)&(gridPtsWorld(3,:)>floorZ+voxUnit/2); %'
 60 | [~,classRootId] = getobjclassSUNCG('wall',objcategory);
 61 | gridPtsLabel(gridPtsObjWorldInd) = classRootId;     
 62 | 
 63 | % Loop through each object and set voxels to class ID
 64 | for objId = roomStruct.nodeIndices
 65 |     object_struct = floorStruct.nodes{objId+1};
 66 |     if isfield(object_struct, 'modelId') && isfield(object_struct, 'valid') && (object_struct.valid)
 67 |         % Set segmentation class ID
 68 |         [classRootName,classRootId,className] = getobjclassSUNCG(strrep(object_struct.modelId,'/','__'),objcategory);
 69 |         if ismember(className, ignore_classes)
 70 |             continue
 71 |         end
 72 | 
 73 |         % Compute object bbox in world coordinates
 74 |         objBbox = [object_struct.bbox.min([1,3,2])',object_struct.bbox.max([1,3,2])'];
 75 | 
 76 |         % Load segmentation of object in object coordinates
 77 |         filename= fullfile(pathToData,'object_vox/object_vox_data/',strrep(object_struct.modelId,'/','__'), [strrep(object_struct.modelId,'/','__'), '.binvox']);
 78 |         [voxels,scale,translate] = read_binvox(filename);
 79 |         [x,y,z] = ind2sub(size(voxels),find(voxels(:)>0));   
 80 |         objSegPts = bsxfun(@plus,[x,y,z]*scale,translate'); %'
 81 | 
 82 |         % Convert object to world coordinates
 83 |         extObj2World_yup = reshape(object_struct.transform,[4,4]);
 84 |         objSegPts = extObj2World_yup*[objSegPts(:,[1,3,2])';ones(1,size(x,1))]; %'
 85 |         objSegPts = objSegPts([1,3,2],:);
 86 | 
 87 |         % Get all grid points within the object bbox in world coordinates
 88 |         gridPtsObjWorldInd =      gridPtsWorld(1,:) >= objBbox(1,1) - voxUnit & gridPtsWorld(1,:) <= objBbox(1,2) + voxUnit & ...
 89 |                                   gridPtsWorld(2,:) >= objBbox(2,1) - voxUnit & gridPtsWorld(2,:) <= objBbox(2,2) + voxUnit & ...
 90 |                                   gridPtsWorld(3,:) >= objBbox(3,1) - voxUnit & gridPtsWorld(3,:) <= objBbox(3,2) + voxUnit;
 91 |         gridPtsObjWorld = gridPtsWorld(:,find(gridPtsObjWorldInd));
 92 | 
 93 | 
 94 |         % If object is a window or door, clear voxels in object bbox
 95 |         [~,wallId] = getobjclassSUNCG('wall',objcategory); 
 96 |         if classRootId == 4 || classRootId == 5
 97 |            gridPtsObjClearInd = gridPtsObjWorldInd&gridPtsLabel==wallId;
 98 |            gridPtsLabel(gridPtsObjClearInd) = 0;
 99 |         end
100 | 
101 |         % Apply segmentation to grid points of object
102 |         if numel(gridPtsObjWorld) > 0
103 |             [indices, dists] = multiQueryKNNSearchImpl(pointCloud(objSegPts'), gridPtsObjWorld',1);
104 |             objOccInd = find(sqrt(dists) <= (sqrt(3)/2)*scale);
105 |             gridPtsObjWorldLinearIdx = find(gridPtsObjWorldInd);
106 |             gridPtsLabel(gridPtsObjWorldLinearIdx(objOccInd)) = classRootId;
107 |         end
108 |     end
109 | end
110 | 
111 | % Remove grid points not in field of view
112 | extWorld2Cam = inv([extCam2World;[0,0,0,1]]);
113 | gridPtsCam = extWorld2Cam(1:3,1:3)*gridPtsWorld + repmat(extWorld2Cam(1:3,4),1,size(gridPtsWorld,2));
114 | gridPtsPixX = gridPtsCam(1,:).*(camK(1,1))./gridPtsCam(3,:)+camK(1,3);
115 | gridPtsPixY = gridPtsCam(2,:).*(camK(2,2))./gridPtsCam(3,:)+camK(2,3);
116 | invalidPixInd = (gridPtsPixX < 0 | gridPtsPixX >= im_w | gridPtsPixY < 0 | gridPtsPixY >= im_h | gridPtsCam(3,:) < 0);
117 | gridPtsLabel(find(invalidPixInd)) = 0;
118 | 
119 | % Remove grid points not in the room
120 | gridPtsLabel(~inRoom(:)&gridPtsLabel(:)==0) = 255;
121 | 
122 | % Save the volume
123 | sceneVox = reshape(gridPtsLabel,voxSize'); %'
124 | 
125 | end


--------------------------------------------------------------------------------
/utils/visualizer.py:
--------------------------------------------------------------------------------
  1 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix'''
  2 | import numpy as np
  3 | import os
  4 | import ntpath
  5 | import time
  6 | import visdom
  7 | from . import visutil as util
  8 | from . import html
  9 | 
 10 | class Visualizer():
 11 |     def __init__(self, opt):
 12 |         # self.opt = opt
 13 |         self.display_id = opt.display_id
 14 |         self.use_html = opt.is_train and opt.use_html
 15 |         self.win_size = opt.display_winsize
 16 |         self.name = opt.name
 17 |         if self.display_id > 0:
 18 |             self.vis = visdom.Visdom(port = opt.display_port)
 19 |             self.display_single_pane_ncols = opt.display_single_pane_ncols
 20 | 
 21 |         if self.use_html:
 22 |             self.web_dir = os.path.join(opt.checkpoint_dir, opt.name, 'web')
 23 |             self.img_dir = os.path.join(self.web_dir, 'images')
 24 |             print('create web directory %s...' % self.web_dir)
 25 |             util.mkdirs([self.web_dir, self.img_dir])
 26 |         self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt')
 27 |         with open(self.log_name, "a") as log_file:
 28 |             now = time.strftime("%c")
 29 |             log_file.write('================ Training Loss (%s) ================\n' % now)
 30 | 
 31 |     # |visuals|: dictionary of images to display or save
 32 |     def display_current_results(self, visuals, epoch):
 33 |         if self.display_id > 0: # show images in the browser
 34 |             if self.display_single_pane_ncols > 0:
 35 |                 h, w = next(iter(visuals.values())).shape[:2]
 36 |                 table_css = """<style>
 37 |     table {border-collapse: separate; border-spacing:4px; white-space:nowrap; text-align:center}
 38 |     table td {width: %dpx; height: %dpx; padding: 4px; outline: 4px solid black}
 39 | </style>""" % (w, h)
 40 |                 ncols = self.display_single_pane_ncols
 41 |                 title = self.name
 42 |                 label_html = ''
 43 |                 label_html_row = ''
 44 |                 nrows = int(np.ceil(len(visuals.items()) / ncols))
 45 |                 images = []
 46 |                 idx = 0
 47 |                 # for label, image_numpy in visuals.items():
 48 |                 img_keys = visuals.keys()
 49 |                 list.sort(img_keys)
 50 |                 for label in img_keys:
 51 |                     image_numpy = visuals[label]
 52 |                     label_html_row += '<td>%s</td>' % label
 53 |                     images.append(image_numpy.transpose([2, 0, 1]))
 54 |                     idx += 1
 55 |                     if idx % ncols == 0:
 56 |                         label_html += '<tr>%s</tr>' % label_html_row
 57 |                         label_html_row = ''
 58 |                 white_image = np.ones_like(image_numpy.transpose([2, 0, 1]))*255
 59 |                 while idx % ncols != 0:
 60 |                     images.append(white_image)
 61 |                     label_html_row += '<td></td>'
 62 |                     idx += 1
 63 |                 if label_html_row != '':
 64 |                     label_html += '<tr>%s</tr>' % label_html_row
 65 |                 # pane col = image row
 66 |                 self.vis.images(images, nrow=ncols, win=self.display_id + 1,
 67 |                                 padding=2, opts=dict(title=title + ' images'))
 68 |                 label_html = '<table>%s</table>' % label_html
 69 |                 self.vis.text(table_css + label_html, win = self.display_id + 2,
 70 |                               opts=dict(title=title + ' labels'))
 71 |             else:
 72 |                 idx = 1
 73 |                 for label, image_numpy in visuals.items():
 74 |                     self.vis.image(
 75 |                         image_numpy.transpose([2,0,1]), opts=dict(title=label),
 76 |                         win=self.display_id + idx)
 77 |                     idx += 1
 78 | 
 79 |         if self.use_html: # save images to a html file
 80 |             for label, image_numpy in visuals.items():
 81 |                 img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
 82 |                 util.save_image(image_numpy, img_path)
 83 |             # update website
 84 |             webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
 85 |             for n in range(epoch, 0, -1):
 86 |                 webpage.add_header('epoch [%d]' % n)
 87 |                 ims = []
 88 |                 txts = []
 89 |                 links = []
 90 | 
 91 |                 for label, image_numpy in visuals.items():
 92 |                     img_path = 'epoch%.3d_%s.png' % (n, label)
 93 |                     ims.append(img_path)
 94 |                     txts.append(label)
 95 |                     links.append(img_path)
 96 |                 webpage.add_images(ims, txts, links, width=self.win_size)
 97 |             webpage.save()
 98 | 
 99 |     # scalars: dictionary of scalar labels and values
100 |     def plot_current_scalars(self, epoch, counter_ratio, opt, scalars):
101 |         if not hasattr(self, 'plot_data'):
102 |             self.plot_data = {'X':[],'Y':[], 'legend':list(scalars.keys())}
103 |         self.plot_data['X'].append(epoch + counter_ratio)
104 |         self.plot_data['Y'].append([scalars[k] for k in self.plot_data['legend']])
105 |         self.vis.line(
106 |             X=np.stack([np.array(self.plot_data['X'])]*len(self.plot_data['legend']),1),
107 |             Y=np.array(self.plot_data['Y']),
108 |             opts={
109 |                 'title': self.name + ' loss over time',
110 |                 'legend': self.plot_data['legend'],
111 |                 'xlabel': 'epoch',
112 |                 'ylabel': 'loss'},
113 |             win=self.display_id)
114 | 
115 |     # scatter plots
116 |     def plot_current_points(self, points, disp_offset=10):
117 |         idx = disp_offset
118 |         for label, pts in points.items():
119 |             #image_numpy = np.flipud(image_numpy)
120 |             self.vis.scatter(
121 |                 pts, opts=dict(title=label, markersize=1), win=self.display_id + idx)
122 |             idx += 1
123 | 
124 |     # scalars: same format as |scalars| of plot_current_scalars
125 |     def print_current_scalars(self, epoch, i, scalars):
126 |         message = '(epoch: %d, iters: %d) ' % (epoch, i)
127 |         for k, v in scalars.items():
128 |             message += '%s: %.3f ' % (k, v)
129 | 
130 |         print(message)
131 |         with open(self.log_name, "a") as log_file:
132 |             log_file.write('%s\n' % message)
133 | 
134 |     # save image to the disk
135 |     def save_images(self, webpage, visuals, image_path):
136 |         image_dir = webpage.get_image_dir()
137 |         short_path = ntpath.basename(image_path[0])
138 |         name = os.path.splitext(short_path)[0]
139 | 
140 |         webpage.add_header(name)
141 |         ims = []
142 |         txts = []
143 |         links = []
144 | 
145 |         for label, image_numpy in visuals.items():
146 |             image_name = '%s_%s.png' % (name, label)
147 |             save_path = os.path.join(image_dir, image_name)
148 |             util.save_image(image_numpy, save_path)
149 | 
150 |             ims.append(image_name)
151 |             txts.append(label)
152 |             links.append(image_name)
153 |         webpage.add_images(ims, txts, links, width=self.win_size)
154 | 


--------------------------------------------------------------------------------
/nnutils/net_blocks.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | CNN building blocks.
  3 | '''
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | import torch
  7 | import torch.nn as nn
  8 | import math
  9 | 
 10 | class Flatten(nn.Module):
 11 |     def forward(self, x):
 12 |         return x.view(x.size()[0], -1)
 13 | 
 14 | class Unsqueeze(nn.Module):
 15 |     def __init__(self, dim):
 16 |         super(Unsqueeze, self).__init__()
 17 |         self.dim = dim
 18 | 
 19 |     def forward(self, x):
 20 |         return x.unsqueeze(self.dim)
 21 | 
 22 | ## fc layers
 23 | def fc(batch_norm, nc_inp, nc_out):
 24 |     if batch_norm:
 25 |         return nn.Sequential(
 26 |             nn.Linear(nc_inp, nc_out, bias=True),
 27 |             nn.BatchNorm1d(nc_out),
 28 |             nn.LeakyReLU(0.2,inplace=True)
 29 |         )
 30 |     else:
 31 |         return nn.Sequential(
 32 |             nn.Linear(nc_inp, nc_out),
 33 |             nn.LeakyReLU(0.1,inplace=True)
 34 |         )
 35 | 
 36 | def fc_stack(nc_inp, nc_out, nlayers, use_bn=True):
 37 |     modules = []
 38 |     for l in range(nlayers):
 39 |         modules.append(fc(use_bn, nc_inp, nc_out))
 40 |         nc_inp = nc_out
 41 |     encoder = nn.Sequential(*modules)
 42 |     net_init(encoder)
 43 |     return encoder
 44 | 
 45 | ## 2D convolution layers
 46 | def conv2d(batch_norm, in_planes, out_planes, kernel_size=3, stride=1):
 47 |     if batch_norm:
 48 |         return nn.Sequential(
 49 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
 50 |             nn.BatchNorm2d(out_planes),
 51 |             nn.LeakyReLU(0.2,inplace=True)
 52 |         )
 53 |     else:
 54 |         return nn.Sequential(
 55 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
 56 |             nn.LeakyReLU(0.2,inplace=True)
 57 |         )
 58 | 
 59 | 
 60 | def deconv2d(in_planes, out_planes):
 61 |     return nn.Sequential(
 62 |         nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
 63 |         nn.LeakyReLU(0.2,inplace=True)
 64 |     )
 65 | 
 66 | ## 3D convolution layers
 67 | def conv3d(batch_norm, in_planes, out_planes, kernel_size=3, stride=1):
 68 |     if batch_norm:
 69 |         return nn.Sequential(
 70 |             nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
 71 |             nn.BatchNorm3d(out_planes),
 72 |             nn.LeakyReLU(0.2,inplace=True)
 73 |         )
 74 |     else:
 75 |         return nn.Sequential(
 76 |             nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
 77 |             nn.LeakyReLU(0.2,inplace=True)
 78 |         )
 79 | 
 80 | 
 81 | def deconv3d(batch_norm, in_planes, out_planes):
 82 |     if batch_norm:
 83 |         return nn.Sequential(
 84 |             nn.ConvTranspose3d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
 85 |             nn.BatchNorm3d(out_planes),
 86 |             nn.LeakyReLU(0.2,inplace=True)
 87 |         )
 88 |     else:        
 89 |         return nn.Sequential(
 90 |             nn.ConvTranspose3d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
 91 |             nn.LeakyReLU(0.2,inplace=True)
 92 |         )
 93 | 
 94 | 
 95 | ## 3D Network Modules
 96 | def encoder3d(nlayers, use_bn=True, nc_input=1, nc_max=128, nc_l1=8, nc_step=1, nz_shape=20):
 97 |     ''' Simple 3D encoder with nlayers.
 98 |     
 99 |     Args:
100 |         nlayers: number of encoder layers
101 |         use_bn: whether to use batch_norm
102 |         nc_input: number of input channels
103 |         nc_max: number of max channels
104 |         nc_l1: number of channels in layer 1
105 |         nc_step: double number of channels every nc_step layers      
106 |         nz_shape: size of bottleneck layer
107 |     '''
108 |     modules = []
109 |     nc_output = nc_l1
110 |     for nl in range(nlayers):
111 |         if (nl>=1) and (nl%nc_step==0) and (nc_output <= nc_max*2):
112 |             nc_output *= 2
113 | 
114 |         modules.append(conv3d(use_bn, nc_input, nc_output, stride=1))
115 |         nc_input = nc_output
116 |         modules.append(conv3d(use_bn, nc_input, nc_output, stride=1))
117 |         modules.append(torch.nn.MaxPool3d(kernel_size=2, stride=2))
118 | 
119 |     modules.append(Flatten())
120 |     modules.append(fc_stack(nc_output, nz_shape, 2, use_bn=True))
121 |     encoder = nn.Sequential(*modules)
122 |     net_init(encoder)
123 |     return encoder, nc_output
124 | 
125 | 
126 | def decoder3d(nlayers, nz_shape, nc_input, use_bn=True, nc_final=1, nc_min=8, nc_step=1, init_fc=True):
127 |     ''' Simple 3D encoder with nlayers.
128 |     
129 |     Args:
130 |         nlayers: number of decoder layers
131 |         nz_shape: number of bottleneck
132 |         nc_input: number of channels to start upconvolution from
133 |         use_bn: whether to use batch_norm
134 |         nc_final: number of output channels
135 |         nc_min: number of min channels
136 |         nc_step: double number of channels every nc_step layers
137 |         init_fc: initial features are not spatial, use an fc & unsqueezing to make them 3D
138 |     '''
139 |     modules = []
140 |     if init_fc:
141 |         modules.append(fc(use_bn, nz_shape, nc_input))
142 |         for d in range(3):
143 |             modules.append(Unsqueeze(2))
144 |     nc_output = nc_input
145 |     for nl in range(nlayers):
146 |         if (nl%nc_step==0) and (nc_output//2 >= nc_min):
147 |             nc_output = nc_output//2
148 | 
149 |         modules.append(deconv3d(use_bn, nc_input, nc_output))
150 |         nc_input = nc_output
151 |         modules.append(conv3d(use_bn, nc_input, nc_output))
152 | 
153 |     modules.append(nn.Conv3d(nc_output, nc_final, kernel_size=3, stride=1, padding=1, bias=True))
154 |     decoder = nn.Sequential(*modules)
155 |     net_init(decoder)
156 |     return decoder
157 | 
158 | 
159 | def net_init(net):
160 |     for m in net.modules():
161 |         if isinstance(m, nn.Linear):
162 |             #n = m.out_features
163 |             #m.weight.data.normal_(0, 0.02 / n) #this modified initialization seems to work better, but it's very hacky
164 |             #n = m.in_features
165 |             #m.weight.data.normal_(0, math.sqrt(2. / n)) #xavier
166 |             m.weight.data.normal_(0, 0.02)
167 |             if m.bias is not None:
168 |                 m.bias.data.zero_()
169 | 
170 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
171 |             #n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
172 |             #m.weight.data.normal_(0, math.sqrt(2. / n)) #this modified initialization seems to work better, but it's very hacky
173 |             m.weight.data.normal_(0, 0.02)
174 |             if m.bias is not None:
175 |                 m.bias.data.zero_()
176 | 
177 |         if isinstance(m, nn.Conv3d) or isinstance(m, nn.ConvTranspose3d):
178 |             #n = m.kernel_size[0] * m.kernel_size[1] * m.kernel_size[2] * m.in_channels
179 |             #m.weight.data.normal_(0, math.sqrt(2. / n))
180 |             m.weight.data.normal_(0, 0.02)
181 |             if m.bias is not None:
182 |                 m.bias.data.zero_()
183 | 
184 |         elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm3d):
185 |             m.weight.data.fill_(1)
186 |             m.bias.data.zero_()
187 | 


--------------------------------------------------------------------------------
/nnutils/train_utils.py:
--------------------------------------------------------------------------------
  1 | """Generic Training Utils.
  2 | """
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | import torch
  8 | import os
  9 | import os.path as osp
 10 | import time
 11 | import pdb
 12 | from absl import flags
 13 | 
 14 | from ..utils.visualizer import Visualizer
 15 | 
 16 | #-------------- flags -------------#
 17 | #----------------------------------#
 18 | ## Flags for training
 19 | curr_path = osp.dirname(osp.abspath(__file__))
 20 | cache_path = osp.join(curr_path, '..', 'cachedir')
 21 | 
 22 | flags.DEFINE_string('name', 'exp_name', 'Experiment Name')
 23 | flags.DEFINE_string('cache_dir', cache_path, 'Cachedir')
 24 | flags.DEFINE_integer('gpu_id', 0, 'Which gpu to use')
 25 | flags.DEFINE_integer('num_epochs', 10, 'Number of epochs to train')
 26 | flags.DEFINE_integer('num_pretrain_epochs', 0, 'If >0, we will pretain from an existing saved model.')
 27 | flags.DEFINE_float('learning_rate', 0.0001, 'learning rate')
 28 | flags.DEFINE_float('beta1', 0.9, 'Momentum term of adam')
 29 | 
 30 | flags.DEFINE_integer('batch_size', 4, 'Size of minibatches')
 31 | flags.DEFINE_integer('num_iter', 0, 'Number of training iterations. 0 -> Use epoch_iter')
 32 | flags.DEFINE_integer('n_data_workers', 4, 'Number of data loading workers')
 33 | 
 34 | ## Flags for logging and snapshotting
 35 | flags.DEFINE_string('checkpoint_dir', osp.join(cache_path, 'snapshots'),
 36 |                     'Root directory for output files')
 37 | flags.DEFINE_integer('print_freq', 20, 'scalar logging frequency')
 38 | flags.DEFINE_integer('save_latest_freq', 10000, 'save latest model every x iterations')
 39 | flags.DEFINE_integer('save_epoch_freq', 2, 'save model every k epochs')
 40 | 
 41 | ## Flags for visualization
 42 | flags.DEFINE_integer('display_freq', 100, 'visuals logging frequency')
 43 | flags.DEFINE_boolean('display_visuals', False, 'whether to display images')
 44 | flags.DEFINE_boolean('print_scalars', True, 'whether to print scalars')
 45 | flags.DEFINE_boolean('plot_scalars', False, 'whether to plot scalars')
 46 | flags.DEFINE_boolean('is_train', True, 'Are we training ?')
 47 | flags.DEFINE_boolean('use_html', False, 'Save html visualizations')
 48 | flags.DEFINE_integer('display_id', 1, 'Display Id')
 49 | flags.DEFINE_integer('display_winsize', 256, 'Display Size')
 50 | flags.DEFINE_integer('display_port', 8097, 'Display port')
 51 | flags.DEFINE_integer('display_single_pane_ncols', 0, 'if positive, display all images in a single visdom web panel with certain number of images per row.')
 52 | 
 53 | #-------- tranining class ---------#
 54 | #----------------------------------#
 55 | class Trainer():
 56 |     def __init__(self, opts):
 57 |         self.opts = opts
 58 |         self.gpu_id = opts.gpu_id
 59 |         self.Tensor = torch.cuda.FloatTensor if (self.gpu_id is not None) else torch.Tensor
 60 |         self.invalid_batch = False #the trainer can optionally reset this every iteration during set_input call
 61 |         self.save_dir = os.path.join(opts.checkpoint_dir, opts.name)
 62 |         if not os.path.exists(self.save_dir):
 63 |             os.makedirs(self.save_dir)
 64 |         log_file = os.path.join(self.save_dir, 'opts.log')
 65 |         with open(log_file, 'w') as f:
 66 |             for k in dir(opts):
 67 |                 f.write('{}: {}\n'.format(k, opts.__getattr__(k)))
 68 | 
 69 | 
 70 |     # helper saving function that can be used by subclasses
 71 |     def save_network(self, network, network_label, epoch_label, gpu_id=None):
 72 |         save_filename = '{}_net_{}.pth'.format(network_label, epoch_label)
 73 |         save_path = os.path.join(self.save_dir, save_filename)
 74 |         torch.save(network.cpu().state_dict(), save_path)
 75 |         if gpu_id is not None and torch.cuda.is_available():
 76 |             network.cuda(device_id=gpu_id)
 77 |         return
 78 | 
 79 |     # helper loading function that can be used by subclasses
 80 |     def load_network(self, network, network_label, epoch_label, network_dir=None):
 81 |         save_filename = '{}_net_{}.pth'.format(network_label, epoch_label)
 82 |         if network_dir is None:
 83 |             network_dir = self.save_dir
 84 |         save_path = os.path.join(network_dir, save_filename)
 85 |         network.load_state_dict(torch.load(save_path))
 86 |         return
 87 | 
 88 |     def define_model(self):
 89 |         '''Should be implemented by the child class.'''
 90 |         raise NotImplementedError
 91 | 
 92 |     def init_dataset(self):
 93 |         '''Should be implemented by the child class.'''
 94 |         raise NotImplementedError
 95 | 
 96 |     def define_criterion(self):
 97 |         '''Should be implemented by the child class.'''
 98 |         raise NotImplementedError
 99 | 
100 |     def set_input(self, batch):
101 |         '''Should be implemented by the child class.'''
102 |         raise NotImplementedError
103 | 
104 |     def forward(self):
105 |         '''Should compute self.total_loss. To be implemented by the child class.'''
106 |         raise NotImplementedError
107 | 
108 |     def save(self, epoch_prefix):
109 |         '''Saves the model.'''
110 |         self.save_network(self.model, 'pred', epoch_prefix, gpu_id=self.opts.gpu_id)
111 |         return
112 | 
113 |     def get_current_visuals(self):
114 |         '''Should be implemented by the child class.'''
115 |         raise NotImplementedError
116 | 
117 |     def get_current_scalars(self):
118 |         '''Should be implemented by the child class.'''
119 |         raise NotImplementedError
120 | 
121 |     def get_current_points(self):
122 |         '''Should be implemented by the child class.'''
123 |         raise NotImplementedError
124 | 
125 |     def init_training(self):
126 |         opts = self.opts
127 |         self.define_model()
128 |         self.init_dataset()
129 |         self.define_criterion()
130 |         self.optimizer = torch.optim.Adam(
131 |             self.model.parameters(), lr=opts.learning_rate, betas=(opts.beta1, 0.999))
132 | 
133 |     def train(self):
134 |         opts = self.opts
135 |         self.smoothed_total_loss = 0
136 |         self.visualizer = Visualizer(opts)
137 |         visualizer = self.visualizer
138 |         total_steps = 0
139 |         dataset_size = len(self.dataloader)
140 | 
141 |         for epoch in range(opts.num_pretrain_epochs, opts.num_epochs):
142 |             epoch_iter = 0
143 |             for i, batch in enumerate(self.dataloader):
144 |                 iter_start_time = time.time()
145 |                 self.set_input(batch)
146 |                 if not self.invalid_batch:
147 |                     self.optimizer.zero_grad()
148 |                     self.forward()
149 |                     self.smoothed_total_loss = self.smoothed_total_loss*0.99 + 0.01*self.total_loss.data[0]
150 |                     self.total_loss.backward()
151 |                     # pdb.set_trace()
152 |                     self.optimizer.step()
153 | 
154 |                 total_steps += 1
155 |                 epoch_iter += 1
156 | 
157 |                 if opts.display_visuals and (total_steps % opts.display_freq == 0):
158 |                     visualizer.display_current_results(self.get_current_visuals(), epoch)
159 |                     visualizer.plot_current_points(self.get_current_points())
160 | 
161 |                 if opts.print_scalars and (total_steps % opts.print_freq == 0):
162 |                     scalars = self.get_current_scalars()
163 |                     visualizer.print_current_scalars(epoch, epoch_iter, scalars)
164 |                     if opts.plot_scalars:
165 |                         visualizer.plot_current_scalars(epoch, float(epoch_iter)/dataset_size, opts, scalars)
166 | 
167 |                 if total_steps % opts.save_latest_freq == 0:
168 |                     print('saving the model at the end of epoch {:d}, iters {:d}'.format(epoch, total_steps))
169 |                     self.save('latest')
170 | 
171 |                 if total_steps == opts.num_iter:
172 |                     return
173 | 
174 |             if (epoch+1) % opts.save_epoch_freq == 0:
175 |                 print('saving the model at the end of epoch {:d}, iters {:d}'.format(epoch, total_steps))
176 |                 self.save('latest')
177 |                 self.save(epoch+1)


--------------------------------------------------------------------------------
/nnutils/oc_net.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Object-centric prediction net.
  3 | '''
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | from absl import flags
  8 | import torch
  9 | import torch.nn as nn
 10 | import torchvision
 11 | from . import net_blocks as nb
 12 | from . import roi_pool_py as roi_pool
 13 | #from oc3d.nnutils import roi_pooling
 14 | import pdb
 15 | 
 16 | #-------------- flags -------------#
 17 | #----------------------------------#
 18 | flags.DEFINE_integer('roi_size', 4, 'RoI feat spatial size.')
 19 | flags.DEFINE_integer('nz_shape', 20, 'Number of latent feat dimension for shape prediction')
 20 | flags.DEFINE_integer('nz_feat', 300, 'RoI encoded feature size')
 21 | flags.DEFINE_boolean('use_context', True, 'Should we use bbox + full image features')
 22 | flags.DEFINE_boolean('pred_voxels', True, 'Predict voxels, or code instead')
 23 | flags.DEFINE_boolean('classify_rot', False, 'Classify rotation, or regress quaternion instead')
 24 | flags.DEFINE_integer('nz_rot', 4, 'Number of outputs for rot prediction. Value overriden in code.')
 25 | 
 26 | 
 27 | #------------- Modules ------------#
 28 | #----------------------------------#
 29 | class ResNetConv(nn.Module):
 30 |     def __init__(self, n_blocks=4):
 31 |         super(ResNetConv, self).__init__()
 32 |         self.resnet = torchvision.models.resnet18(pretrained=True)
 33 |         self.n_blocks=n_blocks
 34 | 
 35 |     def forward(self, x):
 36 |         n_blocks = self.n_blocks
 37 |         x = self.resnet.conv1(x)
 38 |         x = self.resnet.bn1(x)
 39 |         x = self.resnet.relu(x)
 40 |         x = self.resnet.maxpool(x)
 41 | 
 42 |         if n_blocks >= 1:
 43 |             x = self.resnet.layer1(x)
 44 |         if n_blocks >= 2:
 45 |             x = self.resnet.layer2(x)
 46 |         if n_blocks >= 3:
 47 |             x = self.resnet.layer3(x)
 48 |         if n_blocks >= 4:
 49 |             x = self.resnet.layer4(x)
 50 |         return x
 51 | 
 52 | 
 53 | class ShapePredictor(nn.Module):
 54 |     def __init__(self, nz_feat, nz_shape, pred_voxels=True):
 55 |         super(ShapePredictor, self).__init__()
 56 |         self.pred_layer = nb.fc(True, nz_feat, nz_shape)
 57 |         self.pred_voxels = pred_voxels
 58 | 
 59 |     def forward(self, feat):
 60 |         # pdb.set_trace()
 61 |         shape = self.pred_layer.forward(feat)
 62 |         # print('shape: ( Mean = {}, Var = {} )'.format(shape.mean().data[0], shape.var().data[0]))
 63 |         if self.pred_voxels:
 64 |             shape = torch.nn.functional.sigmoid(self.decoder.forward(shape))
 65 |         return shape
 66 |     
 67 |     def add_voxel_decoder(self, voxel_decoder=None):
 68 |         # if self.pred_voxels:
 69 |         self.decoder = voxel_decoder        
 70 | 
 71 | 
 72 | class QuatPredictor(nn.Module):
 73 |     def __init__(self, nz_feat, nz_rot, classify_rot=True):
 74 |         super(QuatPredictor, self).__init__()
 75 |         self.pred_layer = nn.Linear(nz_feat, nz_rot)
 76 |         self.classify_rot = classify_rot
 77 | 
 78 |     def forward(self, feat):
 79 |         quat = self.pred_layer.forward(feat)
 80 |         if self.classify_rot:
 81 |             quat = torch.nn.functional.log_softmax(quat)
 82 |         else:
 83 |             quat = torch.nn.functional.normalize(quat)
 84 |         return quat
 85 | 
 86 | 
 87 | class ScalePredictor(nn.Module):
 88 |     def __init__(self, nz):
 89 |         super(ScalePredictor, self).__init__()
 90 |         self.pred_layer = nn.Linear(nz, 3)
 91 |     
 92 |     def forward(self, feat):
 93 |         scale = self.pred_layer.forward(feat) + 1 #biasing the scale to 1
 94 |         scale = torch.nn.functional.relu(scale) + 1e-12
 95 |         # print('scale: ( Mean = {}, Var = {} )'.format(scale.mean().data[0], scale.var().data[0]))
 96 |         return scale
 97 | 
 98 | 
 99 | class TransPredictor(nn.Module):
100 |     def __init__(self, nz):
101 |         super(TransPredictor, self).__init__()
102 |         self.pred_layer = nn.Linear(nz, 3)
103 |     
104 |     def forward(self, feat):
105 |         #pdb.set_trace()
106 |         trans = self.pred_layer.forward(feat)
107 |         # print('trans: ( Mean = {}, Var = {} )'.format(trans.mean().data[0], trans.var().data[0]))
108 |         return trans
109 | 
110 | 
111 | class LabelPredictor(nn.Module):
112 |     def __init__(self, nz_feat, classify_rot=True):
113 |         super(LabelPredictor, self).__init__()
114 |         self.pred_layer = nn.Linear(nz_feat, 1)
115 | 
116 |     def forward(self, feat):
117 |         pred = self.pred_layer.forward(feat)
118 |         pred = torch.nn.functional.sigmoid(pred)
119 |         return pred
120 | 
121 | 
122 | class CodePredictor(nn.Module):
123 |     def __init__(
124 |         self, nz_feat=200,
125 |         pred_voxels=True, nz_shape=100,
126 |         classify_rot=True, nz_rot=4
127 |     ):
128 |         super(CodePredictor, self).__init__()
129 |         self.quat_predictor = QuatPredictor(nz_feat, classify_rot=classify_rot, nz_rot=nz_rot)
130 |         self.shape_predictor = ShapePredictor(nz_feat, nz_shape=nz_shape, pred_voxels=pred_voxels)
131 |         self.scale_predictor = ScalePredictor(nz_feat)
132 |         self.trans_predictor = TransPredictor(nz_feat)
133 | 
134 |     def forward(self, feat):
135 |         shape_pred = self.shape_predictor.forward(feat)
136 |         scale_pred = self.scale_predictor.forward(feat)
137 |         quat_pred = self.quat_predictor.forward(feat)
138 |         trans_pred = self.trans_predictor.forward(feat)
139 |         return shape_pred, scale_pred, quat_pred, trans_pred
140 | 
141 | 
142 | class RoiEncoder(nn.Module):
143 |     def __init__(self, nc_inp_fine, nc_inp_coarse, use_context=True, nz_joint=300, nz_roi=300, nz_coarse=300, nz_box=50):
144 |         super(RoiEncoder, self).__init__()
145 | 
146 |         self.encoder_fine = nb.fc_stack(nc_inp_fine, nz_roi, 2)
147 |         self.encoder_coarse = nb.fc_stack(nc_inp_coarse, nz_coarse, 2)
148 |         self.encoder_bbox = nb.fc_stack(4, nz_box, 3)
149 | 
150 |         self.encoder_joint = nb.fc_stack(nz_roi+nz_coarse+nz_box, nz_joint, 2)
151 |         self.use_context = use_context
152 | 
153 |     def forward(self, feats):
154 |         roi_img_feat, img_feat_coarse, rois_inp = feats
155 |         feat_fine = self.encoder_fine.forward(roi_img_feat)
156 |         feat_coarse = self.encoder_coarse.forward(img_feat_coarse)
157 | 
158 |         #dividing by img_height that the inputs are not too high
159 |         feat_bbox = self.encoder_bbox.forward(rois_inp[:, 1:5]/480.0)
160 |         if not self.use_context:
161 |             feat_bbox = feat_bbox*0
162 |             feat_coarse = feat_coarse*0
163 |         feat_coarse_rep = torch.index_select(feat_coarse, 0, rois_inp[:, 0].type(torch.LongTensor).cuda())
164 | 
165 |         # print(feat_fine.size(), feat_coarse_rep.size(), feat_bbox.size())
166 |         feat_roi = self.encoder_joint.forward(torch.cat((feat_fine, feat_coarse_rep, feat_bbox), dim=1))
167 |         return feat_roi
168 | 
169 | 
170 | #------------- OC Net -------------#
171 | #----------------------------------#
172 | class OCNet(nn.Module):
173 |     def __init__(
174 |         self, img_size_coarse,
175 |         roi_size=4,
176 |         use_context=True, nz_feat=1000,
177 |         pred_voxels=True, nz_shape=100,
178 |         classify_rot=False, nz_rot=4,
179 |         pred_labels=False, filter_positives=False
180 |     ):
181 |         super(OCNet, self).__init__()
182 |         self.pred_labels = pred_labels
183 |         self.filter_positives = filter_positives
184 |         self.nz_feat = nz_feat
185 | 
186 |         self.resnet_conv_fine = ResNetConv(n_blocks=3)
187 |         self.resnet_conv_coarse = ResNetConv(n_blocks=4)
188 |         self.roi_size = roi_size
189 |         self.roi_pool = roi_pool.RoIPool(roi_size, roi_size, 1/16)
190 |         nc_inp_fine = 256*roi_size*roi_size
191 |         nc_inp_coarse = 512*(img_size_coarse[0]//32)*(img_size_coarse[1]//32)
192 | 
193 |         self.roi_encoder = RoiEncoder(nc_inp_fine, nc_inp_coarse, use_context=use_context, nz_joint=nz_feat)
194 | 
195 |         self.code_predictor = CodePredictor(
196 |             nz_feat=nz_feat,
197 |             pred_voxels=pred_voxels, nz_shape=nz_shape,
198 |             classify_rot=classify_rot, nz_rot=nz_rot)
199 |         nb.net_init(self.roi_encoder)
200 |         nb.net_init(self.code_predictor)
201 | 
202 |     def add_label_predictor(self):
203 |         self.label_predictor = LabelPredictor(self.nz_feat)
204 |         nb.net_init(self.label_predictor)
205 | 
206 |     def forward(self, imgs_rois):
207 |         imgs_inp_fine = imgs_rois[0]
208 |         imgs_inp_coarse = imgs_rois[1]
209 |         rois_inp = imgs_rois[2]
210 | 
211 |         img_feat_coarse = self.resnet_conv_coarse.forward(imgs_inp_coarse)
212 |         img_feat_coarse = img_feat_coarse.view(img_feat_coarse.size(0), -1)
213 | 
214 |         img_feat_fine = self.resnet_conv_fine.forward(imgs_inp_fine)
215 | 
216 |         roi_img_feat = self.roi_pool.forward(img_feat_fine, rois_inp)
217 |         roi_img_feat = roi_img_feat.view(roi_img_feat.size(0), -1)
218 | 
219 |         roi_feat = self.roi_encoder.forward((roi_img_feat, img_feat_coarse, rois_inp))
220 | 
221 |         if self.pred_labels:
222 |             labels_pred = self.label_predictor.forward(roi_feat)
223 | 
224 |         if self.filter_positives:
225 |             pos_inds = imgs_rois[3].squeeze().data.nonzero().squeeze()
226 |             pos_inds = torch.autograd.Variable(
227 |                 pos_inds.type(torch.LongTensor).cuda(), requires_grad=False)
228 |             roi_feat = torch.index_select(roi_feat, 0, pos_inds)
229 | 
230 |         codes_pred = self.code_predictor.forward(roi_feat)
231 | 
232 |         if self.pred_labels:
233 |             return codes_pred, labels_pred
234 |         else:
235 |             return codes_pred


--------------------------------------------------------------------------------
/experiments/suncg/box3d.py:
--------------------------------------------------------------------------------
  1 | """Script for box3d prediction experiment.
  2 | """
  3 | # Sample usage: python -m factored3d.experiments.suncg.box3d --plot_scalars --display_visuals --display_freq=2000 --save_epoch_freq=1 --batch_size=8  --name=box3d_base --use_context --pred_voxels=False --classify_rot --shape_loss_wt=10
  4 | 
  5 | from __future__ import absolute_import
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | from absl import app
  9 | from absl import flags
 10 | import os
 11 | import os.path as osp
 12 | import numpy as np
 13 | import torch
 14 | import torchvision
 15 | from torch.autograd import Variable
 16 | import time
 17 | import scipy.misc
 18 | import pdb
 19 | import copy
 20 | 
 21 | from ...data import suncg as suncg_data
 22 | from ...utils import suncg_parse
 23 | from ...nnutils import train_utils
 24 | from ...nnutils import net_blocks
 25 | from ...nnutils import loss_utils
 26 | from ...nnutils import oc_net
 27 | from ...nnutils import disp_net
 28 | from ...utils import visutil
 29 | from ...renderer import utils as render_utils
 30 | 
 31 | curr_path = osp.dirname(osp.abspath(__file__))
 32 | cache_path = osp.join(curr_path, '..', '..', 'cachedir')
 33 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved')
 34 | 
 35 | flags.DEFINE_integer('voxel_size', 32, 'Spatial dimension of shape voxels')
 36 | flags.DEFINE_integer('n_voxel_layers', 5, 'Number of layers ')
 37 | flags.DEFINE_integer('voxel_nc_max', 128, 'Max 3D channels')
 38 | flags.DEFINE_integer('voxel_nc_l1', 8, 'Initial shape encder/decoder layer dimension')
 39 | 
 40 | flags.DEFINE_string('shape_pretrain_name', 'object_autoenc_32', 'Experiment name for pretrained shape encoder-decoder')
 41 | flags.DEFINE_integer('shape_pretrain_epoch', 800, 'Experiment name for shape decoder')
 42 | flags.DEFINE_boolean('shape_dec_ft', False, 'If predicting voxels, should we pretrain from an existing deocder')
 43 | 
 44 | flags.DEFINE_string('ft_pretrain_name', 'box3d_base', 'Experiment name from which we will pretrain the OCNet')
 45 | flags.DEFINE_integer('ft_pretrain_epoch', 0, 'Network epoch from which we will finetune')
 46 | 
 47 | flags.DEFINE_integer('max_rois', 5, 'If we have more objects than this per image, we will subsample.')
 48 | flags.DEFINE_integer('max_total_rois', 40, 'If we have more objects than this per batch, we will reject the batch.')
 49 | 
 50 | FLAGS = flags.FLAGS
 51 | 
 52 | 
 53 | class Box3dTrainer(train_utils.Trainer):
 54 |     def define_model(self):
 55 |         '''
 56 |         Define the pytorch net 'model' whose weights will be updated during training.
 57 |         '''
 58 |         opts = self.opts
 59 |         assert(not (opts.ft_pretrain_epoch > 0 and opts.num_pretrain_epochs > 0))
 60 | 
 61 |         self.voxel_encoder, nc_enc_voxel = net_blocks.encoder3d(
 62 |             opts.n_voxel_layers, nc_max=opts.voxel_nc_max, nc_l1=opts.voxel_nc_l1, nz_shape=opts.nz_shape)
 63 | 
 64 |         self.voxel_decoder = net_blocks.decoder3d(
 65 |             opts.n_voxel_layers, opts.nz_shape, nc_enc_voxel, nc_min=opts.voxel_nc_l1)
 66 | 
 67 |         self.model = oc_net.OCNet(
 68 |             (opts.img_height, opts.img_width),
 69 |             roi_size=opts.roi_size,
 70 |             use_context=opts.use_context, nz_feat=opts.nz_feat,
 71 |             pred_voxels=opts.pred_voxels, nz_shape=opts.nz_shape,
 72 |             classify_rot=opts.classify_rot, nz_rot=opts.nz_rot)
 73 | 
 74 |         if opts.ft_pretrain_epoch > 0:
 75 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.ft_pretrain_name)
 76 |             self.load_network(
 77 |                 self.model, 'pred', opts.ft_pretrain_epoch, network_dir=network_dir)
 78 | 
 79 |         if opts.pred_voxels:
 80 |             self.model.code_predictor.shape_predictor.add_voxel_decoder(
 81 |                 copy.deepcopy(self.voxel_decoder))
 82 | 
 83 |         if opts.pred_voxels and opts.shape_dec_ft:
 84 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name)
 85 |             self.load_network(
 86 |                 self.model.code_predictor.shape_predictor.decoder,
 87 |                 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir)
 88 | 
 89 |         if self.opts.num_pretrain_epochs > 0:
 90 |             self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs-1)
 91 |         self.model = self.model.cuda(device_id=self.opts.gpu_id)
 92 |         return
 93 | 
 94 |     def init_dataset(self):
 95 |         opts = self.opts
 96 |         self.real_iter = 1 # number of iterations we actually updated the net for
 97 |         self.data_iter = 1 # number of iterations we called the data loader
 98 |         self.resnet_transform = torchvision.transforms.Normalize(
 99 |             mean=[0.485, 0.456, 0.406],
100 |             std=[0.229, 0.224, 0.225])
101 |         split_dir = osp.join(opts.suncg_dir, 'splits')
102 |         self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera')))
103 |         self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts)
104 | 
105 |         if not opts.pred_voxels:
106 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name)
107 |             self.load_network(
108 |                 self.voxel_encoder,
109 |                 'encoder', opts.shape_pretrain_epoch, network_dir=network_dir)
110 |             self.load_network(
111 |                 self.voxel_decoder,
112 |                 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir)
113 |             self.voxel_encoder.eval()
114 |             self.voxel_encoder = self.voxel_encoder.cuda(device_id=self.opts.gpu_id)
115 |             self.voxel_decoder.eval()
116 |             self.voxel_decoder = self.voxel_decoder.cuda(device_id=self.opts.gpu_id)
117 | 
118 |         if opts.voxel_size < 64:
119 |             self.downsample_voxels = True
120 |             self.downsampler = render_utils.Downsample(
121 |                 64//opts.voxel_size, use_max=True, batch_mode=True
122 |             ).cuda(device_id=self.opts.gpu_id)
123 | 
124 |         if opts.classify_rot:
125 |             self.quat_medoids = torch.from_numpy(
126 |                 scipy.io.loadmat(osp.join(opts.cache_dir, 'quat_medoids.mat'))['medoids']).type(torch.FloatTensor)
127 | 
128 | 
129 |     def define_criterion(self):
130 |         self.smoothed_factor_losses = {
131 |             'shape': 0, 'scale': 0, 'quat': 0, 'trans': 0
132 |         }
133 | 
134 |     def set_input(self, batch):
135 |         opts = self.opts
136 |         rois = suncg_parse.bboxes_to_rois(batch['bboxes'])
137 |         self.data_iter += 1
138 |         if rois.numel() <= 5 or rois.numel() >= 5*opts.max_total_rois: #with just one element, batch_norm will screw up
139 |             self.invalid_batch = True
140 |             return
141 |         else:
142 |             self.invalid_batch = False
143 |             self.real_iter += 1
144 | 
145 |         input_imgs_fine = batch['img_fine'].type(torch.FloatTensor)
146 |         input_imgs = batch['img'].type(torch.FloatTensor)
147 |         for b in range(input_imgs_fine.size(0)):
148 |             input_imgs_fine[b] = self.resnet_transform(input_imgs_fine[b])
149 |             input_imgs[b] = self.resnet_transform(input_imgs[b])
150 | 
151 |         self.input_imgs = Variable(
152 |             input_imgs.cuda(device=opts.gpu_id), requires_grad=False)
153 | 
154 |         self.input_imgs_fine = Variable(
155 |             input_imgs_fine.cuda(device=opts.gpu_id), requires_grad=False)
156 | 
157 |         self.rois = Variable(
158 |             rois.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False)
159 | 
160 |         code_tensors = suncg_parse.collate_codes(batch['codes'])
161 |         code_tensors[0] = code_tensors[0].unsqueeze(1) #unsqueeze voxels
162 | 
163 |         if opts.classify_rot:
164 |             quats_gt = code_tensors[2].clone()
165 |             code_tensors[2] = suncg_parse.quats_to_bininds(code_tensors[2], self.quat_medoids)
166 |             quats_binned = suncg_parse.bininds_to_quats(code_tensors[2], self.quat_medoids)
167 |             # q_diff_loss = (quats_gt-quats_binned).pow(2).sum(1)
168 |             # q_sum_loss = (quats_gt+quats_binned).pow(2).sum(1)
169 |             # q_loss, _ = torch.stack((q_diff_loss, q_sum_loss), dim=1).min(1)
170 |             # print(quats_gt, quats_binned)
171 |             # print(q_loss)
172 | 
173 | 
174 |         self.codes_gt = [
175 |             Variable(t.cuda(device=opts.gpu_id), requires_grad=False) for t in code_tensors]
176 | 
177 |         if self.downsample_voxels:
178 |             self.codes_gt[0] = self.downsampler.forward(self.codes_gt[0])
179 | 
180 |         if not opts.pred_voxels:
181 |             self.codes_gt[0] = self.voxel_encoder.forward(self.codes_gt[0])
182 | 
183 |     def get_current_scalars(self):
184 |         loss_dict = {'total_loss': self.smoothed_total_loss, 'iter_frac': self.real_iter/self.data_iter}
185 |         for k in self.smoothed_factor_losses.keys():
186 |             loss_dict['loss_' + k] = self.smoothed_factor_losses[k]
187 |         return loss_dict
188 | 
189 |     def render_codes(self, code_vars, prefix='mesh'):
190 |         opts = self.opts
191 |         code_list = suncg_parse.uncollate_codes(code_vars, self.input_imgs.data.size(0), self.rois.data.cpu()[:,0])
192 | 
193 |         mesh_dir = osp.join(opts.rendering_dir, opts.name)
194 |         if not os.path.exists(mesh_dir):
195 |             os.makedirs(mesh_dir)
196 |         mesh_file = osp.join(mesh_dir, prefix + '.obj')
197 |         render_utils.save_parse(mesh_file, code_list[0], save_objectwise=False)
198 | 
199 |         png_dir = mesh_file.replace('.obj', '/')
200 |         render_utils.render_mesh(mesh_file, png_dir)
201 | 
202 |         return scipy.misc.imread(osp.join(png_dir, prefix + '_render_000.png'))
203 | 
204 | 
205 |     def get_current_visuals(self):
206 |         visuals = {}
207 |         opts = self.opts
208 |         visuals['img'] = visutil.tensor2im(visutil.undo_resnet_preprocess(
209 |             self.input_imgs_fine.data))
210 | 
211 |         codes_gt_vis = [t for t in self.codes_gt]
212 |         if not opts.pred_voxels:
213 |             codes_gt_vis[0] = torch.nn.functional.sigmoid(
214 |                 self.voxel_decoder.forward(self.codes_gt[0])
215 |             )
216 | 
217 |         if opts.classify_rot:
218 |             codes_gt_vis[2] = Variable(suncg_parse.bininds_to_quats(
219 |                 codes_gt_vis[2].cpu().data, self.quat_medoids), requires_grad=False)
220 | 
221 |         visuals['codes_gt'] = self.render_codes(codes_gt_vis, prefix='gt')
222 | 
223 |         codes_pred_vis = [t for t in self.codes_pred]
224 |         if not opts.pred_voxels:
225 |             codes_pred_vis[0] = torch.nn.functional.sigmoid(
226 |                 self.voxel_decoder.forward(self.codes_pred[0])
227 |             )
228 | 
229 |         if opts.classify_rot:
230 |             _, bin_inds = torch.max(codes_pred_vis[2].data.cpu(), 1)
231 |             codes_pred_vis[2] = Variable(suncg_parse.bininds_to_quats(
232 |                 bin_inds, self.quat_medoids), requires_grad=False)
233 | 
234 |         visuals['codes_pred'] = self.render_codes(codes_pred_vis, prefix='pred')
235 | 
236 |         return visuals
237 | 
238 | 
239 |     def get_current_points(self):
240 |         pts_dict = {}
241 |         return pts_dict
242 | 
243 |     def forward(self):
244 |         opts = self.opts
245 | 
246 |         self.codes_pred = self.model.forward((self.input_imgs_fine, self.input_imgs, self.rois))
247 |         self.total_loss, self.loss_factors = loss_utils.code_loss(
248 |             self.codes_pred, self.codes_gt,
249 |             pred_voxels=opts.pred_voxels,
250 |             classify_rot=opts.classify_rot,
251 |             shape_wt=opts.shape_loss_wt,
252 |             scale_wt=opts.scale_loss_wt,
253 |             quat_wt=opts.quat_loss_wt,
254 |             trans_wt=opts.trans_loss_wt
255 |         )
256 |         for k in self.smoothed_factor_losses.keys():
257 |             self.smoothed_factor_losses[k] = 0.99*self.smoothed_factor_losses[k] + 0.01*self.loss_factors[k].data[0]
258 | 
259 | 
260 | def main(_):
261 |     torch.manual_seed(0)
262 |     if FLAGS.classify_rot:
263 |         FLAGS.nz_rot = 24
264 |     else:
265 |         FLAGS.nz_rot = 4
266 |     FLAGS.n_data_workers = 0 # code crashes otherwise due to json not liking parallelization
267 |     trainer = Box3dTrainer(FLAGS)
268 |     trainer.init_training()
269 |     trainer.train()
270 | 
271 | 
272 | if __name__ == '__main__':
273 |     app.run()


--------------------------------------------------------------------------------
/experiments/suncg/dwr.py:
--------------------------------------------------------------------------------
  1 | """Script for dwr experiment.
  2 | """
  3 | # Sample usage: 
  4 | 
  5 | # (init) : python -m factored3d.experiments.suncg.dwr --name=dwr_base --classify_rot --pred_voxels=False --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --box3d_ft --shape_loss_wt=10 --label_loss_wt=10  --batch_size=8
  6 | 
  7 | # shape_ft : python -m factored3d.experiments.suncg.dwr --name=dwr_shape_ft --classify_rot --pred_voxels=True --shape_dec_ft --use_context --plot_scalars --display_visuals --save_epoch_freq=1 --display_freq=1000 --display_id=100 --shape_loss_wt=10  --label_loss_wt=10 --batch_size=8 --ft_pretrain_epoch=1
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | from absl import app
 13 | from absl import flags
 14 | import os
 15 | import os.path as osp
 16 | import numpy as np
 17 | import torch
 18 | import torchvision
 19 | from torch.autograd import Variable
 20 | import time
 21 | import scipy.misc
 22 | import pdb
 23 | import copy
 24 | 
 25 | from ...data import suncg as suncg_data
 26 | from ...utils import suncg_parse
 27 | from ...nnutils import train_utils
 28 | from ...nnutils import net_blocks
 29 | from ...nnutils import loss_utils
 30 | from ...nnutils import oc_net
 31 | from ...nnutils import disp_net
 32 | from ...utils import visutil
 33 | from ...renderer import utils as render_utils
 34 | 
 35 | curr_path = osp.dirname(osp.abspath(__file__))
 36 | cache_path = osp.join(curr_path, '..', '..', 'cachedir')
 37 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved')
 38 | 
 39 | flags.DEFINE_integer('voxel_size', 32, 'Spatial dimension of shape voxels')
 40 | flags.DEFINE_integer('n_voxel_layers', 5, 'Number of layers ')
 41 | flags.DEFINE_integer('voxel_nc_max', 128, 'Max 3D channels')
 42 | flags.DEFINE_integer('voxel_nc_l1', 8, 'Initial shape encder/decoder layer dimension')
 43 | 
 44 | flags.DEFINE_string('shape_pretrain_name', 'object_autoenc_32', 'Experiment name for pretrained shape encoder-decoder')
 45 | flags.DEFINE_integer('shape_pretrain_epoch', 800, 'Experiment name for shape decoder')
 46 | flags.DEFINE_boolean('shape_dec_ft', False, 'If predicting voxels, should we pretrain from an existing deocder')
 47 | 
 48 | flags.DEFINE_string('box3d_pretrain_name', 'box3d_base', 'Experiment name for pretrained box3d experiment')
 49 | flags.DEFINE_integer('box3d_pretrain_epoch', 8, 'Experiment name for shape decoder')
 50 | flags.DEFINE_boolean('box3d_ft', False, 'Finetune from existing net trained with gt boxes')
 51 | 
 52 | flags.DEFINE_string('ft_pretrain_name', 'dwr_base', 'Experiment name from which we will pretrain the OCNet')
 53 | flags.DEFINE_integer('ft_pretrain_epoch', 0, 'Network epoch from which we will finetune')
 54 | 
 55 | flags.DEFINE_float('label_loss_wt', 1, 'Label loss weight')
 56 | 
 57 | flags.DEFINE_integer('max_rois', 100, 'If we have more objects than this per image, we will subsample. Set to very large value')
 58 | flags.DEFINE_integer('max_total_rois', 100, 'If we have more objects than this per batch, we will reject the batch.')
 59 | 
 60 | FLAGS = flags.FLAGS
 61 | 
 62 | 
 63 | class DWRTrainer(train_utils.Trainer):
 64 |     def define_model(self):
 65 |         '''
 66 |         Define the pytorch net 'model' whose weights will be updated during training.
 67 |         '''
 68 | 
 69 |         opts = self.opts
 70 | 
 71 |         assert(not (opts.ft_pretrain_epoch > 0 and opts.num_pretrain_epochs > 0))
 72 |         assert(not (opts.ft_pretrain_epoch > 0 and opts.box3d_ft))
 73 |         assert(not (opts.num_pretrain_epochs > 0 and opts.box3d_ft))
 74 | 
 75 |         self.voxel_encoder, nc_enc_voxel = net_blocks.encoder3d(
 76 |             opts.n_voxel_layers, nc_max=opts.voxel_nc_max, nc_l1=opts.voxel_nc_l1, nz_shape=opts.nz_shape)
 77 | 
 78 |         self.voxel_decoder = net_blocks.decoder3d(
 79 |             opts.n_voxel_layers, opts.nz_shape, nc_enc_voxel, nc_min=opts.voxel_nc_l1)
 80 | 
 81 |         self.model = oc_net.OCNet(
 82 |             (opts.img_height, opts.img_width),
 83 |             roi_size=opts.roi_size,
 84 |             use_context=opts.use_context, nz_feat=opts.nz_feat,
 85 |             pred_voxels=opts.pred_voxels, nz_shape=opts.nz_shape,
 86 |             classify_rot=opts.classify_rot, nz_rot=opts.nz_rot,
 87 |             pred_labels=True, filter_positives=True)
 88 | 
 89 |         if opts.box3d_ft:
 90 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.box3d_pretrain_name)
 91 |             self.load_network(
 92 |                 self.model,
 93 |                 'pred', opts.box3d_pretrain_epoch, network_dir=network_dir)
 94 | 
 95 |         # need to add label pred separately to allow finetuning from existing box3d net
 96 |         self.model.add_label_predictor()
 97 | 
 98 |         if opts.ft_pretrain_epoch > 0:
 99 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.ft_pretrain_name)
100 |             self.load_network(
101 |                 self.model, 'pred', opts.ft_pretrain_epoch, network_dir=network_dir)
102 | 
103 |         if opts.pred_voxels:
104 |             self.model.code_predictor.shape_predictor.add_voxel_decoder(
105 |                 copy.deepcopy(self.voxel_decoder))
106 | 
107 |         if opts.pred_voxels and opts.shape_dec_ft:
108 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name)
109 |             self.load_network(
110 |                 self.model.code_predictor.shape_predictor.decoder,
111 |                 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir)
112 | 
113 |         if self.opts.num_pretrain_epochs > 0:
114 |             self.load_network(self.model, 'pred', self.opts.num_pretrain_epochs-1)
115 |         self.model = self.model.cuda(device_id=self.opts.gpu_id)
116 |         return
117 | 
118 |     def init_dataset(self):
119 |         opts = self.opts
120 |         self.real_iter = 1 # number of iterations we actually updated the net for
121 |         self.data_iter = 1 # number of iterations we called the data loader
122 |         self.resnet_transform = torchvision.transforms.Normalize(
123 |             mean=[0.485, 0.456, 0.406],
124 |             std=[0.229, 0.224, 0.225])
125 |         split_dir = osp.join(opts.suncg_dir, 'splits')
126 |         self.split = suncg_parse.get_split(split_dir, house_names=os.listdir(osp.join(opts.suncg_dir, 'camera')))
127 |         self.dataloader = suncg_data.suncg_data_loader(self.split['train'], opts)
128 | 
129 |         if not opts.pred_voxels:
130 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name)
131 |             self.load_network(
132 |                 self.voxel_encoder,
133 |                 'encoder', opts.shape_pretrain_epoch, network_dir=network_dir)
134 |             self.load_network(
135 |                 self.voxel_decoder,
136 |                 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir)
137 |             self.voxel_encoder.eval()
138 |             self.voxel_encoder = self.voxel_encoder.cuda(device_id=self.opts.gpu_id)
139 |             self.voxel_decoder.eval()
140 |             self.voxel_decoder = self.voxel_decoder.cuda(device_id=self.opts.gpu_id)
141 | 
142 |         if opts.voxel_size < 64:
143 |             self.downsample_voxels = True
144 |             self.downsampler = render_utils.Downsample(
145 |                 64//opts.voxel_size, use_max=True, batch_mode=True
146 |             ).cuda(device_id=self.opts.gpu_id)
147 | 
148 |         if opts.classify_rot:
149 |             self.quat_medoids = torch.from_numpy(
150 |                 scipy.io.loadmat(osp.join(opts.cache_dir, 'quat_medoids.mat'))['medoids']).type(torch.FloatTensor)
151 | 
152 | 
153 |     def define_criterion(self):
154 |         self.smoothed_factor_losses = {
155 |             'shape': 0, 'scale': 0, 'quat': 0, 'trans': 0
156 |         }
157 |         self.labels_criterion = torch.nn.BCELoss()
158 |         self.smoothed_label_loss = 0
159 | 
160 |     def set_input(self, batch):
161 |         opts = self.opts
162 |         rois = suncg_parse.bboxes_to_rois(batch['bboxes_proposals'])
163 |         roi_labels = batch['labels_proposals']
164 |         self.data_iter += 1
165 |         if roi_labels.sum() <= 1 or roi_labels.sum() >= opts.max_total_rois: #with just one element, batch_norm will screw up
166 |             self.invalid_batch = True
167 |             return
168 |         else:
169 |             self.invalid_batch = False
170 |             self.real_iter += 1
171 | 
172 |         input_imgs_fine = batch['img_fine'].type(torch.FloatTensor)
173 |         input_imgs = batch['img'].type(torch.FloatTensor)
174 |         for b in range(input_imgs_fine.size(0)):
175 |             input_imgs_fine[b] = self.resnet_transform(input_imgs_fine[b])
176 |             input_imgs[b] = self.resnet_transform(input_imgs[b])
177 | 
178 |         self.input_imgs = Variable(
179 |             input_imgs.cuda(device=opts.gpu_id), requires_grad=False)
180 | 
181 |         self.input_imgs_fine = Variable(
182 |             input_imgs_fine.cuda(device=opts.gpu_id), requires_grad=False)
183 | 
184 |         self.rois = Variable(
185 |             rois.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False)
186 | 
187 |         self.roi_labels = Variable(
188 |             roi_labels.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False)
189 | 
190 |         code_tensors = suncg_parse.collate_codes(batch['codes_proposals'])
191 |         code_tensors[0] = code_tensors[0].unsqueeze(1) #unsqueeze voxels
192 | 
193 |         if opts.classify_rot:
194 |             quats_gt = code_tensors[2].clone()
195 |             code_tensors[2] = suncg_parse.quats_to_bininds(code_tensors[2], self.quat_medoids)
196 |             quats_binned = suncg_parse.bininds_to_quats(code_tensors[2], self.quat_medoids)
197 | 
198 |         self.codes_gt = [
199 |             Variable(t.cuda(device=opts.gpu_id), requires_grad=False) for t in code_tensors]
200 | 
201 |         if self.downsample_voxels:
202 |             self.codes_gt[0] = self.downsampler.forward(self.codes_gt[0])
203 | 
204 |         if not opts.pred_voxels:
205 |             self.codes_gt[0] = self.voxel_encoder.forward(self.codes_gt[0])
206 | 
207 |     def get_current_scalars(self):
208 |         loss_dict = {'total_loss': self.smoothed_total_loss, 'iter_frac': self.real_iter/self.data_iter}
209 |         loss_dict['label_loss'] = self.smoothed_label_loss
210 |         for k in self.smoothed_factor_losses.keys():
211 |             loss_dict['loss_' + k] = self.smoothed_factor_losses[k]
212 |         return loss_dict
213 | 
214 |     def get_current_visuals(self):
215 |         visuals = {}
216 |         opts = self.opts
217 |         visuals['img'] = visutil.tensor2im(visutil.undo_resnet_preprocess(
218 |             self.input_imgs_fine.data))
219 |         return visuals
220 | 
221 |     def get_current_points(self):
222 |         pts_dict = {}
223 |         return pts_dict
224 | 
225 |     def forward(self):
226 |         opts = self.opts
227 | 
228 |         self.codes_pred, self.labels_pred = self.model.forward((
229 |             self.input_imgs_fine, self.input_imgs, self.rois, self.roi_labels))
230 |         self.total_loss, self.loss_factors = loss_utils.code_loss(
231 |             self.codes_pred, self.codes_gt,
232 |             pred_voxels=opts.pred_voxels,
233 |             classify_rot=opts.classify_rot,
234 |             shape_wt=opts.shape_loss_wt,
235 |             scale_wt=opts.scale_loss_wt,
236 |             quat_wt=opts.quat_loss_wt,
237 |             trans_wt=opts.trans_loss_wt
238 |         )
239 |         labels_loss = self.labels_criterion.forward(self.labels_pred, self.roi_labels.unsqueeze(1))
240 |         self.total_loss += opts.label_loss_wt*labels_loss
241 | 
242 |         for k in self.smoothed_factor_losses.keys():
243 |             self.smoothed_factor_losses[k] = 0.99*self.smoothed_factor_losses[k] + 0.01*self.loss_factors[k].data[0]
244 |         self.smoothed_label_loss = 0.99*self.smoothed_label_loss + 0.01*labels_loss.data[0]
245 | 
246 | 
247 | def main(_):
248 |     torch.manual_seed(0)
249 |     FLAGS.suncg_dl_out_codes = True
250 |     FLAGS.suncg_dl_out_fine_img = True
251 |     FLAGS.suncg_dl_out_proposals = True
252 |     FLAGS.suncg_dl_out_voxels = False
253 |     FLAGS.suncg_dl_out_layout = False
254 |     FLAGS.suncg_dl_out_depth = False
255 |     FLAGS.n_data_workers = 0 # code crashes otherwise due to json not liking parallelization
256 |     torch.manual_seed(0)
257 | 
258 |     if FLAGS.classify_rot:
259 |         FLAGS.nz_rot = 24
260 |     else:
261 |         FLAGS.nz_rot = 4
262 |     trainer = DWRTrainer(FLAGS)
263 |     trainer.init_training()
264 |     trainer.train()
265 | 
266 | 
267 | if __name__ == '__main__':
268 |     app.run()


--------------------------------------------------------------------------------
/demo/demo_utils.py:
--------------------------------------------------------------------------------
  1 | """Testing class for the demo.
  2 | """
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | from absl import flags
  9 | import os
 10 | import os.path as osp
 11 | import numpy as np
 12 | import torch
 13 | import torchvision
 14 | from torch.autograd import Variable
 15 | import scipy.misc
 16 | import pdb
 17 | import copy
 18 | import scipy.io as sio
 19 | 
 20 | from ..nnutils import test_utils
 21 | from ..nnutils import net_blocks
 22 | from ..nnutils import voxel_net
 23 | from ..nnutils import oc_net
 24 | from ..nnutils import disp_net
 25 | 
 26 | from ..utils import suncg_parse
 27 | from ..utils import metrics
 28 | 
 29 | from ..renderer import utils as render_utils
 30 | 
 31 | 
 32 | curr_path = osp.dirname(osp.abspath(__file__))
 33 | cache_path = osp.join(curr_path, '..', 'cachedir')
 34 | flags.DEFINE_string('rendering_dir', osp.join(cache_path, 'rendering'), 'Directory where intermittent renderings are saved')
 35 | 
 36 | flags.DEFINE_integer('voxel_size', 32, 'Spatial dimension of shape voxels')
 37 | flags.DEFINE_integer('n_voxel_layers', 5, 'Number of layers ')
 38 | flags.DEFINE_integer('voxel_nc_max', 128, 'Max 3D channels')
 39 | flags.DEFINE_integer('voxel_nc_l1', 8, 'Initial shape encder/decoder layer dimension')
 40 | flags.DEFINE_float('voxel_eval_thresh', 0.25, 'Voxel evaluation threshold')
 41 | 
 42 | flags.DEFINE_string('shape_pretrain_name', 'object_autoenc_32', 'Experiment name for pretrained shape encoder-decoder')
 43 | flags.DEFINE_integer('shape_pretrain_epoch', 800, 'Experiment name for shape decoder')
 44 | 
 45 | flags.DEFINE_string('layout_name', 'layout_pred', 'Experiment name for layout predictor')
 46 | flags.DEFINE_integer('layout_train_epoch', 8, 'Experiment name for layout predictor')
 47 | 
 48 | flags.DEFINE_string('depth_name', 'depth_baseline', 'Experiment name for layout predictor')
 49 | flags.DEFINE_integer('depth_train_epoch', 8, 'Experiment name for layout predictor')
 50 | 
 51 | flags.DEFINE_string('scene_voxels_name', 'voxels_baseline', 'Experiment name for layout predictor')
 52 | flags.DEFINE_integer('scene_voxels_train_epoch', 8, 'Experiment name for layout predictor')
 53 | flags.DEFINE_float('scene_voxels_thresh', 0.25, 'Threshold for scene voxels prediction')
 54 | 
 55 | flags.DEFINE_integer('img_height', 128, 'image height')
 56 | flags.DEFINE_integer('img_width', 256, 'image width')
 57 | 
 58 | flags.DEFINE_integer('img_height_fine', 480, 'image height')
 59 | flags.DEFINE_integer('img_width_fine', 640, 'image width')
 60 | 
 61 | flags.DEFINE_integer('layout_height', 64, 'amodal depth height : should be half image height')
 62 | flags.DEFINE_integer('layout_width', 128, 'amodal depth width : should be half image width')
 63 | 
 64 | flags.DEFINE_integer('voxels_height', 32, 'scene voxels height. Should be half of width and depth.')
 65 | flags.DEFINE_integer('voxels_width', 64, 'scene voxels width')
 66 | flags.DEFINE_integer('voxels_depth', 64, 'scene voxels depth')
 67 | 
 68 | class DemoTester(test_utils.Tester):
 69 |     def load_oc3d_model(self):
 70 |         opts = self.opts
 71 |         self.voxel_encoder, nc_enc_voxel = net_blocks.encoder3d(
 72 |             opts.n_voxel_layers, nc_max=opts.voxel_nc_max, nc_l1=opts.voxel_nc_l1, nz_shape=opts.nz_shape)
 73 | 
 74 |         self.voxel_decoder = net_blocks.decoder3d(
 75 |             opts.n_voxel_layers, opts.nz_shape, nc_enc_voxel, nc_min=opts.voxel_nc_l1)
 76 | 
 77 |         self.oc3d_model = oc_net.OCNet(
 78 |             (opts.img_height, opts.img_width),
 79 |             roi_size=opts.roi_size,
 80 |             use_context=opts.use_context, nz_feat=opts.nz_feat,
 81 |             pred_voxels=False, nz_shape=opts.nz_shape, pred_labels=True,
 82 |             classify_rot=opts.classify_rot, nz_rot=opts.nz_rot)
 83 |         self.oc3d_model.add_label_predictor()
 84 | 
 85 |         if opts.pred_voxels:
 86 |             self.oc3d_model.code_predictor.shape_predictor.add_voxel_decoder(
 87 |                 copy.deepcopy(self.voxel_decoder))
 88 | 
 89 |         self.load_network(self.oc3d_model, 'pred', self.opts.num_train_epoch)
 90 |         self.oc3d_model.eval()
 91 |         self.oc3d_model = self.oc3d_model.cuda(device_id=self.opts.gpu_id)
 92 | 
 93 |         if opts.pred_voxels:
 94 |              self.voxel_decoder = copy.deepcopy(self.oc3d_model.code_predictor.shape_predictor.decoder)
 95 | 
 96 |     def load_depth_model(self):
 97 |         opts = self.opts
 98 |         ## Load depth prediction network
 99 |         self.depth_model = disp_net.dispnet()
100 |         network_dir = osp.join(opts.cache_dir, 'snapshots', opts.depth_name)
101 |         self.load_network(
102 |             self.depth_model, 'pred', opts.depth_train_epoch, network_dir=network_dir)
103 |         self.depth_model.eval()
104 |         self.depth_model = self.depth_model.cuda(device_id=self.opts.gpu_id)
105 | 
106 |     def load_layout_model(self):
107 |         opts = self.opts
108 |         self.layout_model = disp_net.dispnet()
109 |         network_dir = osp.join(opts.cache_dir, 'snapshots', opts.layout_name)
110 |         self.load_network(
111 |             self.layout_model, 'pred', opts.layout_train_epoch, network_dir=network_dir)
112 |         self.layout_model.eval()
113 |         self.layout_model = self.layout_model.cuda(device_id=self.opts.gpu_id)
114 | 
115 |     def load_scene_voxels_model(self):
116 |         opts = self.opts
117 |         self.scene_voxels_model = voxel_net.VoxelNet(
118 |             [opts.img_height, opts.img_width],
119 |             [opts.voxels_width, opts.voxels_height, opts.voxels_depth],
120 |             nz_voxels=opts.nz_voxels,
121 |             n_voxels_upconv=opts.n_voxels_upconv
122 |         )
123 |         network_dir = osp.join(opts.cache_dir, 'snapshots', opts.scene_voxels_name)
124 |         self.load_network(
125 |             self.scene_voxels_model, 'pred', opts.layout_train_epoch, network_dir=network_dir)
126 |         self.scene_voxels_model.eval()
127 |         self.scene_voxels_model = self.scene_voxels_model.cuda(device_id=self.opts.gpu_id)
128 | 
129 |     def define_model(self):
130 |         self.load_oc3d_model()
131 |         self.load_layout_model()
132 |         self.load_depth_model()
133 |         self.load_scene_voxels_model()
134 |         return
135 | 
136 |     def init_dataset(self):
137 |         opts = self.opts
138 |         self.resnet_transform = torchvision.transforms.Normalize(
139 |             mean=[0.485, 0.456, 0.406],
140 |             std=[0.229, 0.224, 0.225])
141 | 
142 |         if opts.voxel_size < 64:
143 |             self.downsample_voxels = True
144 |             self.downsampler = render_utils.Downsample(
145 |                 64//opts.voxel_size, use_max=True, batch_mode=True
146 |             ).cuda(device_id=self.opts.gpu_id)
147 |         else:
148 |             self.downsampler = None
149 | 
150 |         if opts.classify_rot:
151 |             self.quat_medoids = torch.from_numpy(
152 |                 scipy.io.loadmat(osp.join(opts.cache_dir, 'quat_medoids.mat'))['medoids']).type(torch.FloatTensor)
153 | 
154 |         if not opts.pred_voxels:
155 |             network_dir = osp.join(opts.cache_dir, 'snapshots', opts.shape_pretrain_name)
156 |             self.load_network(
157 |                 self.voxel_decoder,
158 |                 'decoder', opts.shape_pretrain_epoch, network_dir=network_dir)
159 |             self.voxel_decoder.eval()
160 |             self.voxel_decoder = self.voxel_decoder.cuda(device_id=self.opts.gpu_id)
161 | 
162 |     def decode_shape(self, pred_shape):
163 |         opts = self.opts
164 |         pred_shape = torch.nn.functional.sigmoid(
165 |             self.voxel_decoder.forward(pred_shape)
166 |         )
167 |         return pred_shape
168 | 
169 |     def decode_rotation(self, pred_rot):
170 |         opts = self.opts
171 |         if opts.classify_rot:
172 |             _, bin_inds = torch.max(pred_rot.data.cpu(), 1)
173 |             pred_rot = Variable(suncg_parse.bininds_to_quats(
174 |                 bin_inds, self.quat_medoids), requires_grad=False)
175 |         return pred_rot
176 | 
177 |     def set_input(self, batch):
178 |         opts = self.opts
179 |         rois = suncg_parse.bboxes_to_rois(batch['bboxes_test_proposals'])
180 | 
181 |         # Inputs for prediction
182 |         input_imgs_fine = batch['img_fine'].type(torch.FloatTensor)
183 |         input_imgs = batch['img'].type(torch.FloatTensor)
184 | 
185 |         self.input_imgs_orig = Variable(
186 |             input_imgs.cuda(device=opts.gpu_id), requires_grad=False)
187 | 
188 |         for b in range(input_imgs_fine.size(0)):
189 |             input_imgs_fine[b] = self.resnet_transform(input_imgs_fine[b])
190 |             input_imgs[b] = self.resnet_transform(input_imgs[b])
191 | 
192 |         self.input_imgs = Variable(
193 |             input_imgs.cuda(device=opts.gpu_id), requires_grad=False)
194 | 
195 |         self.input_imgs_fine = Variable(
196 |             input_imgs_fine.cuda(device=opts.gpu_id), requires_grad=False)
197 | 
198 |         self.rois = Variable(
199 |             rois.type(torch.FloatTensor).cuda(device=opts.gpu_id), requires_grad=False)
200 | 
201 |     def filter_pos(self, codes, pos_inds):
202 |         pos_inds = torch.from_numpy(np.array(pos_inds)).squeeze()
203 |         pos_inds = torch.autograd.Variable(
204 |                 pos_inds.type(torch.LongTensor).cuda(), requires_grad=False)
205 |         filtered_codes = [torch.index_select(code, 0, pos_inds) for code in codes]
206 |         return filtered_codes
207 | 
208 |     def predict_factored3d(self):
209 |         codes_pred_all, labels_pred = self.oc3d_model.forward(
210 |             (self.input_imgs_fine, self.input_imgs, self.rois))
211 |         scores_pred = labels_pred.cpu().data.numpy()
212 |         bboxes_pred = self.rois.data.cpu().numpy()[:, 1:]
213 |         min_score_vis = np.minimum(0.7, np.max(scores_pred))
214 |         pos_inds_vis = metrics.nms(
215 |             np.concatenate((bboxes_pred, scores_pred), axis=1),
216 |             0.3, min_score=min_score_vis)
217 |         
218 |         codes_pred_vis = self.filter_pos(codes_pred_all, pos_inds_vis)
219 |         rois_pos_vis = self.filter_pos([self.rois], pos_inds_vis)[0]
220 |         codes_pred_vis[0] = self.decode_shape(codes_pred_vis[0])
221 |         codes_pred_vis[2] = self.decode_rotation(codes_pred_vis[2])
222 | 
223 |         layout_pred = self.layout_model.forward(self.input_imgs_orig)
224 |         return codes_pred_vis, layout_pred
225 |     
226 |     def predict_depth(self):
227 |         depth_pred = self.depth_model.forward(self.input_imgs_orig)
228 |         return depth_pred
229 | 
230 |     def predict_scene_voxels(self):
231 |         scene_voxels_pred = self.scene_voxels_model.forward(self.input_imgs_orig)
232 |         return scene_voxels_pred
233 | 
234 | 
235 | class DemoRenderer():
236 |     def __init__(self, opts):
237 |         self.opts = opts
238 |         self.mesh_dir = osp.join(opts.rendering_dir, opts.name)
239 |         if not os.path.exists(self.mesh_dir):
240 |             os.makedirs(self.mesh_dir)
241 | 
242 |     def save_layout_mesh(self, mesh_dir, layout, prefix='layout'):
243 |         opts = self.opts
244 |         layout_vis = layout.data[0].cpu().numpy().transpose((1,2,0))
245 |         vs, fs = render_utils.dispmap_to_mesh(
246 |             layout_vis,
247 |             suncg_parse.cam_intrinsic(),
248 |             scale_x=self.opts.layout_width/640,
249 |             scale_y=self.opts.layout_height/480
250 |         )
251 |         mesh_file = osp.join(self.mesh_dir, prefix + '.obj')
252 |         fout = open(mesh_file, 'w')
253 |         render_utils.append_obj(fout, vs, fs)
254 |         fout.close()
255 | 
256 |     def save_codes_mesh(self, mesh_dir, code_vars, prefix='codes'):
257 |         n_rois = code_vars[0].size()[0]
258 |         code_list = suncg_parse.uncollate_codes(code_vars, 1, torch.Tensor(n_rois).fill_(0))
259 |         mesh_file = osp.join(mesh_dir, prefix + '.obj')
260 |         render_utils.save_parse(mesh_file, code_list[0], save_objectwise=False, thresh=0.1)
261 | 
262 |     def render_visuals(self, mesh_dir, obj_name=None):
263 |         png_dir = osp.join(mesh_dir, 'rendering')
264 |         if obj_name is not None:
265 |             render_utils.render_mesh(osp.join(mesh_dir, obj_name + '.obj'), png_dir)
266 |             im_view1 = scipy.misc.imread(osp.join(png_dir, '{}_render_000.png'.format(obj_name)))
267 |             im_view2 = scipy.misc.imread(osp.join(png_dir, '{}_render_003.png'.format(obj_name)))
268 |         else:
269 |             render_utils.render_directory(mesh_dir, png_dir)
270 |             im_view1 = scipy.misc.imread(osp.join(png_dir, 'render_000.png'))
271 |             im_view2 = scipy.misc.imread(osp.join(png_dir, 'render_003.png'))
272 |         return im_view1, im_view2
273 | 
274 |     def render_factored3d(self, codes, layout):
275 |         os.system('rm {}/*.obj'.format(self.mesh_dir))
276 |         self.save_codes_mesh(self.mesh_dir, codes)
277 |         self.save_layout_mesh(self.mesh_dir, layout)
278 |         return self.render_visuals(self.mesh_dir)
279 |     
280 |     def render_scene_vox(self, scene_vox):
281 |         opts = self.opts
282 |         os.system('rm {}/*.obj'.format(self.mesh_dir))
283 |         voxels = scene_vox.data.cpu()[0,0].numpy()
284 | 
285 |         mesh_file = osp.join(self.mesh_dir, 'scene_vox.obj')
286 |         vs, fs = render_utils.voxels_to_mesh(voxels.astype(np.float32), thresh=0.25)
287 |         vs[:,0] -= voxels.shape[0]/2.0
288 |         vs[:,1] -= voxels.shape[1]/2.0
289 |         vs *= 0.04*(64//opts.voxels_height)
290 |         fout = open(mesh_file, 'w')
291 |         render_utils.append_obj(fout, vs, fs)
292 |         fout.close()
293 |         return self.render_visuals(self.mesh_dir, obj_name='scene_vox')
294 | 
295 |     def render_depth(self, dmap):
296 |         opts = self.opts
297 |         os.system('rm {}/*.obj'.format(self.mesh_dir))
298 |         dmap_pred = dmap.data[0].cpu().numpy().transpose((1,2,0))
299 |         mesh_file = osp.join(self.mesh_dir, 'depth.obj')
300 |         dmap_points = render_utils.dispmap_to_points(
301 |             dmap_pred,
302 |             suncg_parse.cam_intrinsic(),
303 |             scale_x=self.opts.layout_width/640,
304 |             scale_y=self.opts.layout_height/480
305 |         )
306 | 
307 |         vs, fs = render_utils.points_to_cubes(dmap_points)
308 |         fout = open(mesh_file, 'w')
309 |         render_utils.append_obj(fout, vs, fs)
310 |         fout.close()
311 | 
312 |         return self.render_visuals(self.mesh_dir, obj_name='depth')
313 | 


--------------------------------------------------------------------------------
/data/suncg.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import os.path as osp
  6 | import numpy as np
  7 | import collections
  8 | 
  9 | import scipy.misc
 10 | import scipy.linalg
 11 | import scipy.io as sio
 12 | import scipy.ndimage.interpolation
 13 | from absl import flags
 14 | 
 15 | import torch
 16 | from torch.utils.data import Dataset
 17 | from torch.utils.data import DataLoader
 18 | from torch.utils.data.dataloader import default_collate
 19 | 
 20 | from ..utils import suncg_parse
 21 | 
 22 | from ..renderer import utils as render_utils
 23 | 
 24 | #-------------- flags -------------#
 25 | #----------------------------------#
 26 | flags.DEFINE_string('suncg_dir', '/data0/shubhtuls/datasets/suncg_pbrs_release', 'Suncg Data Directory')
 27 | flags.DEFINE_boolean('filter_objects', True, 'Restrict object classes to main semantic classes.')
 28 | flags.DEFINE_integer('max_views_per_house', 0, '0->use all views. Else we randomly select upto the specified number.')
 29 | 
 30 | flags.DEFINE_boolean('suncg_dl_out_codes', True, 'Should the data loader load codes')
 31 | flags.DEFINE_boolean('suncg_dl_out_layout', False, 'Should the data loader load layout')
 32 | flags.DEFINE_boolean('suncg_dl_out_depth', False, 'Should the data loader load modal depth')
 33 | flags.DEFINE_boolean('suncg_dl_out_fine_img', True, 'We should output fine images')
 34 | flags.DEFINE_boolean('suncg_dl_out_voxels', False, 'We should output scene voxels')
 35 | flags.DEFINE_boolean('suncg_dl_out_proposals', False, 'We should edgebox proposals for training')
 36 | flags.DEFINE_boolean('suncg_dl_out_test_proposals', False, 'We should edgebox proposals for testing')
 37 | flags.DEFINE_integer('suncg_dl_max_proposals', 40, 'Max number of proposals per image')
 38 | 
 39 | flags.DEFINE_integer('img_height', 128, 'image height')
 40 | flags.DEFINE_integer('img_width', 256, 'image width')
 41 | 
 42 | flags.DEFINE_integer('img_height_fine', 480, 'image height')
 43 | flags.DEFINE_integer('img_width_fine', 640, 'image width')
 44 | 
 45 | flags.DEFINE_integer('layout_height', 64, 'amodal depth height : should be half image height')
 46 | flags.DEFINE_integer('layout_width', 128, 'amodal depth width : should be half image width')
 47 | 
 48 | flags.DEFINE_integer('voxels_height', 32, 'scene voxels height. Should be half of width and depth.')
 49 | flags.DEFINE_integer('voxels_width', 64, 'scene voxels width')
 50 | flags.DEFINE_integer('voxels_depth', 64, 'scene voxels depth')
 51 | flags.DEFINE_boolean('suncg_dl_debug_mode', False, 'Just running for debugging, should not preload ojects')
 52 | 
 53 | #------------- Dataset ------------#
 54 | #----------------------------------#
 55 | class SuncgDataset(Dataset):
 56 |     '''SUNCG data loader'''
 57 |     def __init__(self, house_names, opts):
 58 |         self._suncg_dir = opts.suncg_dir
 59 | 
 60 |         self._house_names = house_names
 61 |         self.img_size = (opts.img_height, opts.img_width)
 62 |         self.output_fine_img = opts.suncg_dl_out_fine_img
 63 |         if self.output_fine_img:
 64 |             self.img_size_fine = (opts.img_height_fine, opts.img_width_fine)
 65 |         self.output_codes = opts.suncg_dl_out_codes
 66 |         self.output_layout = opts.suncg_dl_out_layout
 67 |         self.output_modal_depth = opts.suncg_dl_out_depth
 68 |         self.output_voxels = opts.suncg_dl_out_voxels
 69 |         self.output_proposals = opts.suncg_dl_out_proposals
 70 |         self.output_test_proposals = opts.suncg_dl_out_test_proposals
 71 | 
 72 |         if self.output_layout or self.output_modal_depth:
 73 |             self.layout_size = (opts.layout_height, opts.layout_width)
 74 |         if self.output_voxels:
 75 |             self.voxels_size = (opts.voxels_width, opts.voxels_height, opts.voxels_depth)
 76 | 
 77 |         if self.output_proposals:
 78 |             self.max_proposals = opts.suncg_dl_max_proposals
 79 |         if self.output_codes:
 80 |             self.max_rois = opts.max_rois
 81 |             self._obj_loader = suncg_parse.ObjectLoader(osp.join(opts.suncg_dir, 'object'))
 82 |             if not opts.suncg_dl_debug_mode:
 83 |                 self._obj_loader.preload()
 84 |             if opts.filter_objects:
 85 |                 self._meta_loader = suncg_parse.MetaLoader(osp.join(opts.suncg_dir, 'ModelCategoryMappingEdited.csv'))
 86 |             else:
 87 |                 self._meta_loader = None
 88 | 
 89 |         data_tuples = []
 90 |         for hx, house in enumerate(house_names):
 91 |             if (hx % 1000) == 0:
 92 |                 print('Reading image names from house {}/{}'.format(hx, len(house_names)))
 93 |             imgs_dir = osp.join(opts.suncg_dir, 'renderings_ldr', house)
 94 |             view_ids = [f[0:6] for f in os.listdir(imgs_dir)]
 95 | 
 96 |             rng = np.random.RandomState([ord(c) for c in house])
 97 |             rng.shuffle(view_ids)
 98 | 
 99 |             if (opts.max_views_per_house > 0) and (opts.max_views_per_house < len(view_ids)):
100 |                 view_ids = view_ids[0:opts.max_views_per_house]
101 |             for view_id in view_ids:
102 |                 data_tuples.append((house, view_id))
103 |         self.n_imgs = len(data_tuples)
104 |         self._data_tuples = data_tuples
105 |         self._preload_cameras(house_names)
106 | 
107 |     def forward_img(self, index):
108 |         house, view_id = self._data_tuples[index]
109 |         img = scipy.misc.imread(osp.join(self._suncg_dir, 'renderings_ldr', house, view_id + '_mlt.png'))
110 |         if self.output_fine_img:
111 |             img_fine = scipy.misc.imresize(img, self.img_size_fine)
112 |             img_fine = np.transpose(img_fine, (2,0,1))
113 | 
114 |         img = scipy.misc.imresize(img, self.img_size)
115 |         img = np.transpose(img, (2,0,1))
116 |         if self.output_fine_img:
117 |             return img/255, img_fine/255, house, view_id
118 |         else:
119 |             return img/255, house, view_id
120 | 
121 |     def _preload_cameras(self, house_names):
122 |         self._house_cameras = {}
123 |         for hx, house in enumerate(house_names):
124 |             if (hx % 200) == 0:
125 |                 print('Pre-loading cameras from house {}/{}'.format(hx, len(house_names)))
126 |             cam_file = osp.join(self._suncg_dir, 'camera', house, 'room_camera.txt')
127 |             camera_poses = suncg_parse.read_camera_pose(cam_file)
128 |             self._house_cameras[house] = camera_poses
129 | 
130 |     def forward_codes(self, house_name, view_id):
131 |         #print('Loading Codes for {}_{}'.format(house_name, view_id))
132 |         campose = self._house_cameras[house_name][int(view_id)]
133 |         cam2world = suncg_parse.campose_to_extrinsic(campose)
134 |         world2cam = scipy.linalg.inv(cam2world)
135 | 
136 |         house_data = suncg_parse.load_json(
137 |             osp.join(self._suncg_dir, 'house', house_name, 'house.json'))
138 |         bbox_data = sio.loadmat(
139 |             osp.join(self._suncg_dir, 'bboxes_node', house_name, view_id + '_bboxes.mat'))
140 |         objects_data, objects_bboxes = suncg_parse.select_ids(
141 |             house_data, bbox_data, meta_loader=self._meta_loader, min_pixels=500)
142 |         objects_codes = suncg_parse.codify_room_data(
143 |             objects_data, world2cam, self._obj_loader)
144 |         objects_bboxes -= 1 #0 indexing to 1 indexing
145 |         if len(objects_codes) > self.max_rois:
146 |             select_inds = np.random.permutation(len(objects_codes))[0:self.max_rois]
147 |             objects_bboxes = objects_bboxes[select_inds, :]
148 |             objects_codes = [objects_codes[ix] for ix in select_inds]
149 |         return objects_codes, objects_bboxes
150 | 
151 |     def forward_proposals(self, house_name, view_id, codes_gt, bboxes_gt):
152 |         proposals_data = sio.loadmat(
153 |             osp.join(self._suncg_dir, 'edgebox_proposals', house_name, view_id + '_proposals.mat'))
154 |         bboxes_proposals = proposals_data['proposals'][:,0:4]
155 |         bboxes_proposals -= 1 #zero indexed
156 |         codes, bboxes, labels = suncg_parse.extract_proposal_codes(
157 |             codes_gt, bboxes_gt, bboxes_proposals, self.max_proposals)
158 |         return codes, bboxes, labels
159 |     
160 |     def forward_test_proposals(self, house_name, view_id):
161 |         proposals_data = sio.loadmat(
162 |             osp.join(self._suncg_dir, 'edgebox_proposals', house_name, view_id + '_proposals.mat'))
163 |         bboxes_proposals = proposals_data['proposals'][:,0:4]
164 |         bboxes_proposals -= 1 #zero indexed
165 |         return bboxes_proposals
166 | 
167 |     def forward_layout(self, house_name, view_id, bg_depth=1e4):
168 |         depth_im = scipy.misc.imread(osp.join(
169 |             self._suncg_dir, 'renderings_layout', house_name, view_id + '_depth.png'))
170 |         depth_im =  depth_im.astype(np.float)/1000.0  # depth was saved in mm
171 |         depth_im += bg_depth*np.equal(depth_im,0).astype(np.float)
172 |         disp_im = 1./depth_im
173 |         amodal_depth = scipy.ndimage.interpolation.zoom(
174 |             disp_im, (self.layout_size[0]/disp_im.shape[0], self.layout_size[1]/disp_im.shape[1]), order=0)
175 |         amodal_depth = np.reshape(amodal_depth, (1, self.layout_size[0], self.layout_size[1]))
176 |         return amodal_depth
177 | 
178 |     def forward_depth(self, house_name, view_id, bg_depth=1e4):
179 |         depth_im = scipy.misc.imread(osp.join(
180 |             self._suncg_dir, 'renderings_depth', house_name, view_id + '_depth.png'))
181 |         depth_im =  depth_im.astype(np.float)/1000.0  # depth was saved in mm
182 |         depth_im += bg_depth*np.equal(depth_im,0).astype(np.float)
183 |         disp_im = 1./depth_im
184 |         modal_depth = scipy.ndimage.interpolation.zoom(
185 |             disp_im, (self.layout_size[0]/disp_im.shape[0], self.layout_size[1]/disp_im.shape[1]), order=0)
186 |         modal_depth = np.reshape(modal_depth, (1, self.layout_size[0], self.layout_size[1]))
187 |         return modal_depth
188 | 
189 |     def forward_voxels(self, house_name, view_id):
190 |         scene_voxels = sio.loadmat(osp.join(
191 |             self._suncg_dir, 'scene_voxels', house_name, view_id + '_voxels.mat'))
192 |         scene_voxels = render_utils.downsample(
193 |             scene_voxels['sceneVox'].astype(np.float32),
194 |             64//self.voxels_size[1], use_max=True)
195 |         return scene_voxels
196 | 
197 |     def __len__(self):
198 |         return self.n_imgs
199 | 
200 |     def __getitem__(self, index):
201 |         if self.output_fine_img:
202 |             img, img_fine, house_name, view_id = self.forward_img(index)
203 |         else:
204 |             img, house_name, view_id = self.forward_img(index)
205 | 
206 |         elem = {
207 |             'img': img,
208 |             'house_name': house_name,
209 |             'view_id': view_id,
210 |         }
211 |         if self.output_layout:
212 |             layout = self.forward_layout(house_name, view_id)
213 |             elem['layout'] = layout
214 | 
215 |         if self.output_voxels:
216 |             voxels = self.forward_voxels(house_name, view_id)
217 |             elem['voxels'] = voxels
218 | 
219 |         if self.output_modal_depth:
220 |             depth = self.forward_depth(house_name, view_id)
221 |             elem['depth'] = depth
222 | 
223 |         if self.output_codes:
224 |             codes_gt, bboxes_gt = self.forward_codes(house_name, view_id)
225 |             elem['codes'] = codes_gt
226 |             elem['bboxes'] = bboxes_gt
227 | 
228 |         if self.output_proposals:
229 |             codes_proposals, bboxes_proposals, labels_proposals = self.forward_proposals(
230 |                 house_name, view_id, codes_gt, bboxes_gt)
231 |             if labels_proposals.size == 0:
232 |                 print('No proposal found: ', house_name, view_id)
233 |             elem['codes_proposals'] = codes_proposals
234 |             elem['bboxes_proposals'] = bboxes_proposals
235 |             elem['labels_proposals'] = labels_proposals
236 | 
237 |         if self.output_test_proposals:
238 |             bboxes_proposals = self.forward_test_proposals(house_name, view_id)
239 |             if bboxes_proposals.size == 0:
240 |                 print('No proposal found: ', house_name, view_id)
241 |             elem['bboxes_test_proposals'] = bboxes_proposals
242 | 
243 |         if self.output_fine_img:
244 |             elem['img_fine'] = img_fine
245 | 
246 |         #print('House : {}, View : {}, Code Length : {}'.format(house_name, view_id, len(code)))
247 |         return elem
248 | 
249 | #-------- Collate Function --------#
250 | #----------------------------------#    
251 | def recursive_convert_to_torch(elem):
252 |     if torch.is_tensor(elem):
253 |         return elem
254 |     elif type(elem).__module__ == 'numpy':
255 |         if elem.size == 0:
256 |             return torch.zeros(elem.shape).type(torch.DoubleTensor)
257 |         else:
258 |             return torch.from_numpy(elem)
259 |     elif isinstance(elem, int):
260 |         return torch.LongTensor([elem])
261 |     elif isinstance(elem, float):
262 |         return torch.DoubleTensor([elem])
263 |     elif isinstance(elem, collections.Mapping):
264 |         return {key: recursive_convert_to_torch(d[key]) for key in elem}
265 |     elif isinstance(elem, collections.Sequence):
266 |         return [recursive_convert_to_torch(samples) for samples in elem]
267 |     else:
268 |         return elem
269 | 
270 | def collate_fn(batch):
271 |     '''SUNCG data collater.
272 |     
273 |     Assumes each instance is a dict.
274 |     Applies different collation rules for each field.
275 | 
276 |     Args:
277 |         batch: List of loaded elements via Dataset.__getitem__
278 |     '''
279 |     collated_batch = {}
280 |     # iterate over keys
281 |     for key in batch[0]:
282 |         if key =='codes' or key=='bboxes' or key=='codes_proposals' or key=='bboxes_proposals' or key=='bboxes_test_proposals':
283 |             collated_batch[key] = [recursive_convert_to_torch(elem[key]) for elem in batch]
284 |         elif key == 'labels_proposals':
285 |             collated_batch[key] = torch.cat([default_collate(elem[key]) for elem in batch if elem[key].size > 0])
286 |         else:
287 |             collated_batch[key] = default_collate([elem[key] for elem in batch])
288 |     return collated_batch
289 | 
290 | #----------- Data Loader ----------#
291 | #----------------------------------#
292 | def suncg_data_loader(house_names, opts):
293 |     dset = SuncgDataset(house_names, opts)
294 |     return DataLoader(
295 |         dset, batch_size=opts.batch_size,
296 |         shuffle=True, num_workers=opts.n_data_workers,
297 |         collate_fn=collate_fn)
298 | 
299 | 
300 | def suncg_data_loader_benchmark(house_names, opts):
301 |     dset = SuncgDataset(house_names, opts)
302 |     return DataLoader(
303 |         dset, batch_size=opts.batch_size,
304 |         shuffle=False, num_workers=opts.n_data_workers,
305 |         collate_fn=collate_fn)


--------------------------------------------------------------------------------
/preprocess/suncg/matUtils/read_wobj_safe.m:
--------------------------------------------------------------------------------
  1 | function OBJ=read_wobj(fullfilename)
  2 | % Read the objects from a Wavefront OBJ file
  3 | %
  4 | % OBJ=read_wobj(filename);
  5 | %
  6 | % OBJ struct containing:
  7 | %
  8 | % OBJ.vertices : Vertices coordinates
  9 | % OBJ.vertices_texture: Texture coordinates
 10 | % OBJ.vertices_normal : Normal vectors
 11 | % OBJ.vertices_point  : Vertice data used for points and lines
 12 | % OBJ.material : Parameters from external .MTL file, will contain parameters like
 13 | %           newmtl, Ka, Kd, Ks, illum, Ns, map_Ka, map_Kd, map_Ks,
 14 | %           example of an entry from the material object:
 15 | %       OBJ.material(i).type = newmtl
 16 | %       OBJ.material(i).data = 'vase_tex'
 17 | % OBJ.objects  : Cell object with all objects in the OBJ file,
 18 | %           example of a mesh object:
 19 | %       OBJ.objects(i).type='f'
 20 | %       OBJ.objects(i).data.vertices: [n x 3 double]
 21 | %       OBJ.objects(i).data.texture:  [n x 3 double]
 22 | %       OBJ.objects(i).data.normal:   [n x 3 double]
 23 | %
 24 | % Example,
 25 | %   OBJ=read_wobj('examples\example10.obj');
 26 | %   FV.vertices=OBJ.vertices;
 27 | %   FV.faces=OBJ.objects(3).data.vertices;
 28 | %   figure, patch(FV,'facecolor',[1 0 0]); camlight
 29 | %
 30 | % Function is written by D.Kroon University of Twente (June 2010)
 31 | 
 32 | verbose=false;
 33 | 
 34 | if(exist('fullfilename','var')==0)
 35 |     [filename, filefolder] = uigetfile('*.obj', 'Read obj-file');
 36 |     fullfilename = [filefolder filename];
 37 | end
 38 | filefolder = fileparts( fullfilename);
 39 | if(verbose),disp(['Reading Object file : ' fullfilename]); end
 40 | 
 41 | 
 42 | % Read the DI3D OBJ textfile to a cell array
 43 | file_words = file2cellarray( fullfilename);
 44 | % Remove empty cells, merge lines split by "\" and convert strings with values to double
 45 | [ftype fdata]= fixlines(file_words);
 46 | 
 47 | % Vertex data
 48 | vertices=[]; nv=0;
 49 | vertices_texture=[]; nvt=0;
 50 | vertices_point=[]; nvp=0;
 51 | vertices_normal=[]; nvn=0;
 52 | material=[];
 53 | 
 54 | % Surface data
 55 | no=0;
 56 | 
 57 | % Loop through the Wavefront object file
 58 | for iline=1:length(ftype)
 59 |     if(mod(iline,10000)==0),
 60 |         if(verbose),disp(['Lines processed : ' num2str(iline)]); end
 61 |     end
 62 |     
 63 |     type=ftype{iline}; data=fdata{iline};
 64 |     
 65 |     % Switch on data type line
 66 |     switch(type)
 67 |         case{'mtllib'}
 68 |             if(iscell(data))
 69 |                 datanew=[];
 70 |                 for i=1:length(data)
 71 |                     datanew=[datanew data{i}];
 72 |                     if(i<length(data)), datanew=[datanew ' ']; end
 73 |                 end
 74 |                 data=datanew;
 75 |             end
 76 |             
 77 |             filename_mtl=fullfile(filefolder,data);
 78 |             material=readmtl(filename_mtl,verbose);
 79 |         case('v') % vertices
 80 |             nv=nv+1;
 81 |             if(length(data)==3)
 82 |                 if (iscell(data))
 83 |                     data = [0, 0, 0];
 84 |                 end
 85 |                 % Reserve block of memory
 86 |                 if(mod(nv,10000)==1), vertices(nv+1:nv+10001,1:3)=0; end
 87 |                 % Add to vertices list X Y Z
 88 |                 vertices(nv,1:3)=data;
 89 |             else
 90 |                 % Reserve block of memory
 91 |                 if(mod(nv,10000)==1), vertices(nv+1:nv+10001,1:4)=0; end
 92 |                 % Add to vertices list X Y Z W
 93 |                 vertices(nv,1:4)=data;
 94 |             end
 95 |         case('vp')
 96 |             % Specifies a point in the parameter space of curve or surface
 97 |             nvp=nvp+1;
 98 |             if(length(data)==1)
 99 |                 % Reserve block of memory
100 |                 if(mod(nvp,10000)==1), vertices_point(nvp+1:nvp+10001,1)=0; end
101 |                 % Add to vertices point list U
102 |                 vertices_point(nvp,1)=data;
103 |             elseif(length(data)==2)
104 |                 % Reserve block of memory
105 |                 if(mod(nvp,10000)==1), vertices_point(nvp+1:nvp+10001,1:2)=0; end
106 |                 % Add to vertices point list U V
107 |                 vertices_point(nvp,1:2)=data;
108 |             else
109 |                 % Reserve block of memory
110 |                 if(mod(nvp,10000)==1), vertices_point(nvp+1:nvp+10001,1:3)=0; end
111 |                 % Add to vertices point list U V W
112 |                 vertices_point(nvp,1:3)=data;
113 |             end
114 |         case('vn')
115 |             % A normal vector
116 |             nvn=nvn+1; if(mod(nvn,10000)==1),  vertices_normal(nvn+1:nvn+10001,1:3)=0; end
117 |             % Add to vertices list I J K
118 |             vertices_normal(nvn,1:3)=data;
119 |         case('vt')
120 |             % Vertices Texture Coordinate in photo
121 |             % U V W
122 |             nvt=nvt+1;
123 |             if(length(data)==1)
124 |                 % Reserve block of memory
125 |                 if(mod(nvt,10000)==1), vertices_texture(nvt+1:nvt+10001,1)=0; end
126 |                 % Add to vertices texture list U
127 |                 vertices_texture(nvt,1)=data;
128 |             elseif(length(data)==2)
129 |                 % Reserve block of memory
130 |                 if(mod(nvt,10000)==1), vertices_texture(nvt+1:nvt+10001,1:2)=0; end
131 |                 % Add to vertices texture list U V
132 |                 vertices_texture(nvt,1:2)=data;
133 |             else
134 |                 % Reserve block of memory
135 |                 if(mod(nvt,10000)==1), vertices_texture(nvt+1:nvt+10001,1:3)=0; end
136 |                 % Add to vertices texture list U V W
137 |                 vertices_texture(nvt,1:3)=data;
138 |             end
139 |         case('l')
140 |             no=no+1; if(mod(no,10000)==1), objects(no+10001).data=0; end
141 |             array_vertices=[];
142 |             array_texture=[];
143 |             for i=1:length(data),
144 |                 switch class(data)
145 |                     case 'cell'
146 |                         tvals=str2double(stringsplit(data{i},'/'));
147 |                     case 'string'
148 |                         tvals=str2double(stringsplit(data,'/'));
149 |                     otherwise
150 |                         tvals=data(i);
151 |                 end
152 |                 val=tvals(1);
153 |                 if(val<0), val=val+1+nv; end
154 |                 array_vertices(i)=val;
155 |                 if(length(tvals)>1),
156 |                     val=tvals(2);
157 |                     if(val<0), val=val+1+nvt; end
158 |                     array_texture(i)=val;
159 |                 end
160 |             end
161 |             objects(no).type='l';
162 |             objects(no).data.vertices=array_vertices;
163 |             objects(no).data.texture=array_texture;
164 |         case('f')
165 |             no=no+1; if(mod(no,10000)==1), objects(no+10001).data=0; end
166 |             array_vertices=[];
167 |             array_texture=[];
168 |             array_normal=[];
169 |             for i=1:length(data);
170 |                 switch class(data)
171 |                     case 'cell'
172 |                         tvals=str2double(stringsplit(data{i},'/'));
173 |                     case 'string'
174 |                         tvals=str2double(stringsplit(data,'/'));
175 |                     otherwise
176 |                         tvals=data(i);
177 |                 end
178 |                 val=tvals(1);
179 |                 
180 |                 if(val<0), val=val+1+nv; end
181 |                 array_vertices(i)=val;
182 |                 if(length(tvals)>1),
183 |                     if(isfinite(tvals(2)))
184 |                         val=tvals(2);
185 |                         if(val<0), val=val+1+nvt; end
186 |                         array_texture(i)=val;
187 |                     end
188 |                 end
189 |                 if(length(tvals)>2),
190 |                     val=tvals(3);
191 |                     if(val<0), val=val+1+nvn; end
192 |                     array_normal(i)=val;
193 |                 end
194 |             end
195 |             
196 |             % A face of more than 3 indices is always split into
197 |             % multiple faces of only 3 indices.
198 |             objects(no).type='f';
199 |             findex=1:min (3,length(array_vertices));
200 |            
201 |             objects(no).data.vertices=array_vertices(findex);
202 |             if(~isempty(array_texture)),objects(no).data.texture=array_texture(findex); end
203 |             if(~isempty(array_normal)),objects(no).data.normal=array_normal(findex); end
204 |             for i=1:length(array_vertices)-3;
205 |                 no=no+1; if(mod(no,10000)==1), objects(no+10001).data=0; end
206 |                 findex=[1 2+i 3+i];
207 |                 findex(findex>length(array_vertices))=findex(findex>length(array_vertices))-length(array_vertices);
208 |                 objects(no).type='f';
209 |                 objects(no).data.vertices=array_vertices(findex);
210 |                 if(~isempty(array_texture)),objects(no).data.texture=array_texture(findex); end
211 |                 if(~isempty(array_normal)),objects(no).data.normal=array_normal(findex); end
212 |             end
213 |         case{'#','$'}
214 |             % Comment
215 |             tline='  %'; 
216 |             if(iscell(data))
217 |                 for i=1:length(data), tline=[tline ' ' data{i}]; end
218 |             else
219 |                 tline=[tline data];
220 |             end
221 |             if(verbose), disp(tline); end
222 |         case{''}
223 |         otherwise
224 |             no=no+1;
225 |             if(mod(no,10000)==1), objects(no+10001).data=0; end
226 |             objects(no).type=type;
227 |             objects(no).data=data;
228 |     end
229 | end
230 | 
231 | % Initialize new object list, which will contain the "collapsed" objects
232 | objects2(no).data=0;
233 | 
234 | index=0;
235 | 
236 | i=0;
237 | while (i<no), i=i+1;
238 |     type=objects(i).type;
239 |     % First face found
240 |     if((length(type)==1)&&(type(1)=='f'))
241 |         % Get number of faces
242 |         for j=i:no
243 |             type=objects(j).type;
244 |             if((length(type)~=1)||(type(1)~='f'))
245 |                 j=j-1; break;
246 |             end
247 |         end
248 |         numfaces=(j-i)+1;
249 |         
250 |         index=index+1;
251 |         objects2(index).type='f';
252 |         % Process last face first to allocate memory
253 |         objects2(index).data.vertices(numfaces,:)= objects(i).data.vertices;
254 |         if(isfield(objects(i).data,'texture'))
255 |             objects2(index).data.texture(numfaces,:) = objects(i).data.texture;
256 |         else
257 |             objects2(index).data.texture=[];
258 |         end
259 |         if(isfield(objects(i).data,'normal'))
260 |             objects2(index).data.normal(numfaces,:)  = objects(i).data.normal;
261 |         else
262 |             objects2(index).data.normal=[];
263 |         end
264 |         % All faces to arrays
265 |         for k=1:numfaces
266 |             objects2(index).data.vertices(k,:)= objects(i+k-1).data.vertices;
267 |             if(isfield(objects(i).data,'texture'))
268 |                 objects2(index).data.texture(k,:) = objects(i+k-1).data.texture;
269 |             end
270 |             if(isfield(objects(i).data,'normal'))
271 |                 objects2(index).data.normal(k,:)  = objects(i+k-1).data.normal;
272 |             end
273 |         end
274 |         i=j;
275 |     else
276 |         index=index+1;
277 |         objects2(index).type=objects(i).type;
278 |         objects2(index).data=objects(i).data;
279 |     end
280 | end
281 | 
282 | % Add all data to output struct
283 | OBJ.objects=objects2(1:index);
284 | OBJ.material=material;
285 | OBJ.vertices=vertices(1:nv,:);
286 | OBJ.vertices_point=vertices_point(1:nvp,:);
287 | OBJ.vertices_normal=vertices_normal(1:nvn,:);
288 | OBJ.vertices_texture=vertices_texture(1:nvt,:);
289 | if(verbose),disp('Finished Reading Object file'); end
290 | 
291 | 
292 | function twords=stringsplit(tline,tchar)
293 | % Get start and end position of all "words" separated by a char
294 | i=find(tline(2:end-1)==tchar)+1; i_start=[1 i+1]; i_end=[i-1 length(tline)];
295 | % Create a cell array of the words
296 | twords=cell(1,length(i_start)); for j=1:length(i_start), twords{j}=tline(i_start(j):i_end(j)); end
297 | 
298 | function file_words=file2cellarray(filename)
299 | % Open a DI3D OBJ textfile
300 | fid=fopen(filename,'r');
301 | file_text=fread(fid, inf, 'uint8=>char')';
302 | fclose(fid);
303 | file_lines = regexp(file_text, '\n+', 'split');
304 | file_words = regexp(file_lines, '\s+', 'split');
305 | 
306 | function [ftype fdata]=fixlines(file_words)
307 | ftype=cell(size(file_words));
308 | fdata=cell(size(file_words));
309 | 
310 | iline=0; jline=0;
311 | while(iline<length(file_words))
312 |     iline=iline+1;
313 |     twords=removeemptycells(file_words{iline});
314 |     if(~isempty(twords))
315 |         % Add next line to current line when line end with '\'
316 |         while(strcmp(twords{end},'\')&&iline<length(file_words))
317 |             iline=iline+1;
318 |             twords(end)=[];
319 |             twords=[twords removeemptycells(file_words{iline})];
320 |         end
321 |         % Values to double
322 |         
323 |         type=twords{1};
324 |         stringdold=true;
325 |         j=0;
326 |         switch(type)
327 |             case{'#','$'}
328 |                 for i=2:length(twords)
329 |                     j=j+1; twords{j}=twords{i};                    
330 |                 end    
331 |             otherwise    
332 |                 for i=2:length(twords)
333 |                     str=twords{i};
334 |                     val=str2double(str);
335 |                     stringd=~isfinite(val);
336 |                     if(stringd)
337 |                         j=j+1; twords{j}=str;
338 |                     else
339 |                         if(stringdold)
340 |                             j=j+1; twords{j}=val;
341 |                         else
342 |                             twords{j}=[twords{j} val];    
343 |                         end
344 |                     end
345 |                     stringdold=stringd;
346 |                 end
347 |         end
348 |         twords(j+1:end)=[];
349 |         jline=jline+1;
350 |         ftype{jline}=type;
351 |         if(length(twords)==1), twords=twords{1}; end
352 |         fdata{jline}=twords;
353 |     end
354 | end
355 | ftype(jline+1:end)=[];
356 | fdata(jline+1:end)=[];
357 | 
358 | function b=removeemptycells(a)
359 | j=0; b={};
360 | for i=1:length(a);
361 |     if(~isempty(a{i})),j=j+1; b{j}=a{i}; end;
362 | end
363 | 
364 | function  objects=readmtl(filename_mtl,verbose)
365 | if(verbose),disp(['Reading Material file : ' filename_mtl]); end
366 | file_words=file2cellarray(filename_mtl);
367 | % Remove empty cells, merge lines split by "\" and convert strings with values to double
368 | [ftype fdata]= fixlines(file_words);
369 | 
370 | % Surface data
371 | objects.type(length(ftype))=0; 
372 | objects.data(length(ftype))=0; 
373 | no=0;
374 | % Loop through the Wavefront object file
375 | for iline=1:length(ftype)
376 |     type=ftype{iline}; data=fdata{iline};
377 |     
378 |     % Switch on data type line
379 |     switch(type)
380 |         case{'#','$'}
381 |             % Comment
382 |             tline='  %'; 
383 |             if(iscell(data))
384 |                 for i=1:length(data), tline=[tline ' ' data{i}]; end
385 |             else
386 |                 tline=[tline data];
387 |             end
388 |             if(verbose), disp(tline); end
389 |         case{''}
390 |         otherwise
391 |             no=no+1;
392 |             if(mod(no,10000)==1), objects(no+10001).data=0; end
393 |             objects(no).type=type;
394 |             objects(no).data=data;
395 |     end
396 | end
397 | objects=objects(1:no);
398 | if(verbose),disp('Finished Reading Material file'); end
399 | 


--------------------------------------------------------------------------------
/renderer/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import torch
  4 | from torch.autograd import Variable
  5 | from ..utils import transformations
  6 | curr_path = os.path.dirname(os.path.abspath(__file__))
  7 | blender_dir = os.path.join(curr_path, 'blender')
  8 | 
  9 | 
 10 | colormap = np.array([[0.000000, 0.000000, 0.515625],[0.000000, 0.000000, 0.531250],[0.000000, 0.000000, 0.546875],[0.000000, 0.000000, 0.562500],[0.000000, 0.000000, 0.578125],[0.000000, 0.000000, 0.593750],[0.000000, 0.000000, 0.609375],[0.000000, 0.000000, 0.625000],[0.000000, 0.000000, 0.640625],[0.000000, 0.000000, 0.656250],[0.000000, 0.000000, 0.671875],[0.000000, 0.000000, 0.687500],[0.000000, 0.000000, 0.703125],[0.000000, 0.000000, 0.718750],[0.000000, 0.000000, 0.734375],[0.000000, 0.000000, 0.750000],[0.000000, 0.000000, 0.765625],[0.000000, 0.000000, 0.781250],[0.000000, 0.000000, 0.796875],[0.000000, 0.000000, 0.812500],[0.000000, 0.000000, 0.828125],[0.000000, 0.000000, 0.843750],[0.000000, 0.000000, 0.859375],[0.000000, 0.000000, 0.875000],[0.000000, 0.000000, 0.890625],[0.000000, 0.000000, 0.906250],[0.000000, 0.000000, 0.921875],[0.000000, 0.000000, 0.937500],[0.000000, 0.000000, 0.953125],[0.000000, 0.000000, 0.968750],[0.000000, 0.000000, 0.984375],[0.000000, 0.000000, 1.000000],[0.000000, 0.015625, 1.000000],[0.000000, 0.031250, 1.000000],[0.000000, 0.046875, 1.000000],[0.000000, 0.062500, 1.000000],[0.000000, 0.078125, 1.000000],[0.000000, 0.093750, 1.000000],[0.000000, 0.109375, 1.000000],[0.000000, 0.125000, 1.000000],[0.000000, 0.140625, 1.000000],[0.000000, 0.156250, 1.000000],[0.000000, 0.171875, 1.000000],[0.000000, 0.187500, 1.000000],[0.000000, 0.203125, 1.000000],[0.000000, 0.218750, 1.000000],[0.000000, 0.234375, 1.000000],[0.000000, 0.250000, 1.000000],[0.000000, 0.265625, 1.000000],[0.000000, 0.281250, 1.000000],[0.000000, 0.296875, 1.000000],[0.000000, 0.312500, 1.000000],[0.000000, 0.328125, 1.000000],[0.000000, 0.343750, 1.000000],[0.000000, 0.359375, 1.000000],[0.000000, 0.375000, 1.000000],[0.000000, 0.390625, 1.000000],[0.000000, 0.406250, 1.000000],[0.000000, 0.421875, 1.000000],[0.000000, 0.437500, 1.000000],[0.000000, 0.453125, 1.000000],[0.000000, 0.468750, 1.000000],[0.000000, 0.484375, 1.000000],[0.000000, 0.500000, 1.000000],[0.000000, 0.515625, 1.000000],[0.000000, 0.531250, 1.000000],[0.000000, 0.546875, 1.000000],[0.000000, 0.562500, 1.000000],[0.000000, 0.578125, 1.000000],[0.000000, 0.593750, 1.000000],[0.000000, 0.609375, 1.000000],[0.000000, 0.625000, 1.000000],[0.000000, 0.640625, 1.000000],[0.000000, 0.656250, 1.000000],[0.000000, 0.671875, 1.000000],[0.000000, 0.687500, 1.000000],[0.000000, 0.703125, 1.000000],[0.000000, 0.718750, 1.000000],[0.000000, 0.734375, 1.000000],[0.000000, 0.750000, 1.000000],[0.000000, 0.765625, 1.000000],[0.000000, 0.781250, 1.000000],[0.000000, 0.796875, 1.000000],[0.000000, 0.812500, 1.000000],[0.000000, 0.828125, 1.000000],[0.000000, 0.843750, 1.000000],[0.000000, 0.859375, 1.000000],[0.000000, 0.875000, 1.000000],[0.000000, 0.890625, 1.000000],[0.000000, 0.906250, 1.000000],[0.000000, 0.921875, 1.000000],[0.000000, 0.937500, 1.000000],[0.000000, 0.953125, 1.000000],[0.000000, 0.968750, 1.000000],[0.000000, 0.984375, 1.000000],[0.000000, 1.000000, 1.000000],[0.015625, 1.000000, 0.984375],[0.031250, 1.000000, 0.968750],[0.046875, 1.000000, 0.953125],[0.062500, 1.000000, 0.937500],[0.078125, 1.000000, 0.921875],[0.093750, 1.000000, 0.906250],[0.109375, 1.000000, 0.890625],[0.125000, 1.000000, 0.875000],[0.140625, 1.000000, 0.859375],[0.156250, 1.000000, 0.843750],[0.171875, 1.000000, 0.828125],[0.187500, 1.000000, 0.812500],[0.203125, 1.000000, 0.796875],[0.218750, 1.000000, 0.781250],[0.234375, 1.000000, 0.765625],[0.250000, 1.000000, 0.750000],[0.265625, 1.000000, 0.734375],[0.281250, 1.000000, 0.718750],[0.296875, 1.000000, 0.703125],[0.312500, 1.000000, 0.687500],[0.328125, 1.000000, 0.671875],[0.343750, 1.000000, 0.656250],[0.359375, 1.000000, 0.640625],[0.375000, 1.000000, 0.625000],[0.390625, 1.000000, 0.609375],[0.406250, 1.000000, 0.593750],[0.421875, 1.000000, 0.578125],[0.437500, 1.000000, 0.562500],[0.453125, 1.000000, 0.546875],[0.468750, 1.000000, 0.531250],[0.484375, 1.000000, 0.515625],[0.500000, 1.000000, 0.500000],[0.515625, 1.000000, 0.484375],[0.531250, 1.000000, 0.468750],[0.546875, 1.000000, 0.453125],[0.562500, 1.000000, 0.437500],[0.578125, 1.000000, 0.421875],[0.593750, 1.000000, 0.406250],[0.609375, 1.000000, 0.390625],[0.625000, 1.000000, 0.375000],[0.640625, 1.000000, 0.359375],[0.656250, 1.000000, 0.343750],[0.671875, 1.000000, 0.328125],[0.687500, 1.000000, 0.312500],[0.703125, 1.000000, 0.296875],[0.718750, 1.000000, 0.281250],[0.734375, 1.000000, 0.265625],[0.750000, 1.000000, 0.250000],[0.765625, 1.000000, 0.234375],[0.781250, 1.000000, 0.218750],[0.796875, 1.000000, 0.203125],[0.812500, 1.000000, 0.187500],[0.828125, 1.000000, 0.171875],[0.843750, 1.000000, 0.156250],[0.859375, 1.000000, 0.140625],[0.875000, 1.000000, 0.125000],[0.890625, 1.000000, 0.109375],[0.906250, 1.000000, 0.093750],[0.921875, 1.000000, 0.078125],[0.937500, 1.000000, 0.062500],[0.953125, 1.000000, 0.046875],[0.968750, 1.000000, 0.031250],[0.984375, 1.000000, 0.015625],[1.000000, 1.000000, 0.000000],[1.000000, 0.984375, 0.000000],[1.000000, 0.968750, 0.000000],[1.000000, 0.953125, 0.000000],[1.000000, 0.937500, 0.000000],[1.000000, 0.921875, 0.000000],[1.000000, 0.906250, 0.000000],[1.000000, 0.890625, 0.000000],[1.000000, 0.875000, 0.000000],[1.000000, 0.859375, 0.000000],[1.000000, 0.843750, 0.000000],[1.000000, 0.828125, 0.000000],[1.000000, 0.812500, 0.000000],[1.000000, 0.796875, 0.000000],[1.000000, 0.781250, 0.000000],[1.000000, 0.765625, 0.000000],[1.000000, 0.750000, 0.000000],[1.000000, 0.734375, 0.000000],[1.000000, 0.718750, 0.000000],[1.000000, 0.703125, 0.000000],[1.000000, 0.687500, 0.000000],[1.000000, 0.671875, 0.000000],[1.000000, 0.656250, 0.000000],[1.000000, 0.640625, 0.000000],[1.000000, 0.625000, 0.000000],[1.000000, 0.609375, 0.000000],[1.000000, 0.593750, 0.000000],[1.000000, 0.578125, 0.000000],[1.000000, 0.562500, 0.000000],[1.000000, 0.546875, 0.000000],[1.000000, 0.531250, 0.000000],[1.000000, 0.515625, 0.000000],[1.000000, 0.500000, 0.000000],[1.000000, 0.484375, 0.000000],[1.000000, 0.468750, 0.000000],[1.000000, 0.453125, 0.000000],[1.000000, 0.437500, 0.000000],[1.000000, 0.421875, 0.000000],[1.000000, 0.406250, 0.000000],[1.000000, 0.390625, 0.000000],[1.000000, 0.375000, 0.000000],[1.000000, 0.359375, 0.000000],[1.000000, 0.343750, 0.000000],[1.000000, 0.328125, 0.000000],[1.000000, 0.312500, 0.000000],[1.000000, 0.296875, 0.000000],[1.000000, 0.281250, 0.000000],[1.000000, 0.265625, 0.000000],[1.000000, 0.250000, 0.000000],[1.000000, 0.234375, 0.000000],[1.000000, 0.218750, 0.000000],[1.000000, 0.203125, 0.000000],[1.000000, 0.187500, 0.000000],[1.000000, 0.171875, 0.000000],[1.000000, 0.156250, 0.000000],[1.000000, 0.140625, 0.000000],[1.000000, 0.125000, 0.000000],[1.000000, 0.109375, 0.000000],[1.000000, 0.093750, 0.000000],[1.000000, 0.078125, 0.000000],[1.000000, 0.062500, 0.000000],[1.000000, 0.046875, 0.000000],[1.000000, 0.031250, 0.000000],[1.000000, 0.015625, 0.000000],[1.000000, 0.000000, 0.000000],[0.984375, 0.000000, 0.000000],[0.968750, 0.000000, 0.000000],[0.953125, 0.000000, 0.000000],[0.937500, 0.000000, 0.000000],[0.921875, 0.000000, 0.000000],[0.906250, 0.000000, 0.000000],[0.890625, 0.000000, 0.000000],[0.875000, 0.000000, 0.000000],[0.859375, 0.000000, 0.000000],[0.843750, 0.000000, 0.000000],[0.828125, 0.000000, 0.000000],[0.812500, 0.000000, 0.000000],[0.796875, 0.000000, 0.000000],[0.781250, 0.000000, 0.000000],[0.765625, 0.000000, 0.000000],[0.750000, 0.000000, 0.000000],[0.734375, 0.000000, 0.000000],[0.718750, 0.000000, 0.000000],[0.703125, 0.000000, 0.000000],[0.687500, 0.000000, 0.000000],[0.671875, 0.000000, 0.000000],[0.656250, 0.000000, 0.000000],[0.640625, 0.000000, 0.000000],[0.625000, 0.000000, 0.000000],[0.609375, 0.000000, 0.000000],[0.593750, 0.000000, 0.000000],[0.578125, 0.000000, 0.000000],[0.562500, 0.000000, 0.000000],[0.546875, 0.000000, 0.000000],[0.531250, 0.000000, 0.000000],[0.515625, 0.000000, 0.000000],[0.500000, 0.000000, 0.000000]])
 11 | 
 12 | cube_v = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0]])
 13 | cube_v = cube_v - 0.5
 14 | 
 15 | cube_f = np.array([[1,  7,  5 ], [1,  3,  7 ], [1,  4,  3 ], [1,  2,  4 ], [3,  8,  7 ], [3,  4,  8 ], [5,  7,  8 ], [5,  8,  6 ], [1,  5,  6 ], [1,  6,  2 ], [2,  6,  8 ], [2,  8,  4]]).astype(np.int)
 16 | 
 17 | def voxels_to_mesh(pred_vol, thresh=0.5):
 18 |     v_counter = 0
 19 |     tot_points = np.greater(pred_vol, thresh).sum()
 20 |     v_all = np.tile(cube_v, [tot_points, 1])
 21 |     f_all = np.tile(cube_f, [tot_points, 1])
 22 |     f_offset = np.tile(np.linspace(0, 12*tot_points-1, 12*tot_points), 3).reshape(3, 12*tot_points).transpose()
 23 |     f_offset = (f_offset//12 * 8).astype(np.int)
 24 |     f_all += f_offset
 25 |     for x in range(pred_vol.shape[0]):
 26 |         for y in range(pred_vol.shape[1]):
 27 |             for z in range(pred_vol.shape[2]):
 28 |                 if pred_vol[x,y,z] > thresh:
 29 |                     radius = pred_vol[x,y,z]
 30 |                     v_all[v_counter:v_counter+8,:] *= radius
 31 |                     v_all[v_counter:v_counter+8,:] += (np.array([[x, y, z]]) + 0.5)
 32 |                     v_counter += 8
 33 | 
 34 |     return v_all, f_all
 35 | 
 36 | 
 37 | def voxels_to_points(pred_vol, thresh=0.5):
 38 |     v_counter = 0
 39 |     tot_points = np.greater(pred_vol, thresh).sum()
 40 |     v_all = np.zeros([tot_points, 3])
 41 |     for x in range(pred_vol.shape[0]):
 42 |         for y in range(pred_vol.shape[1]):
 43 |             for z in range(pred_vol.shape[2]):
 44 |                 if pred_vol[x,y,z] > thresh:
 45 |                     v_all[v_counter,:] = (np.array([[x, y, z]]) + 0.5)
 46 |                     v_counter += 1
 47 |     return v_all
 48 | 
 49 | 
 50 | def append_obj(mf_handle, vertices, faces):
 51 |     for vx in range(vertices.shape[0]):
 52 |         mf_handle.write('v {:f} {:f} {:f}\n'.format(vertices[vx, 0], vertices[vx, 1], vertices[vx, 2]))
 53 |     for fx in range(faces.shape[0]):
 54 |         mf_handle.write('f {:d} {:d} {:d}\n'.format(faces[fx, 0], faces[fx, 1], faces[fx, 2]))
 55 |     return
 56 | 
 57 | 
 58 | def append_mtl_obj(mf_handle, vertices, faces, mtl_ids):
 59 |     for vx in range(vertices.shape[0]):
 60 |         mf_handle.write('v {:f} {:f} {:f}\n'.format(vertices[vx, 0], vertices[vx, 1], vertices[vx, 2]))
 61 |     for fx in range(faces.shape[0]):
 62 |         mf_handle.write('usemtl m{}\n'.format(mtl_ids[fx]))
 63 |         mf_handle.write('f {:d} {:d} {:d}\n'.format(faces[fx, 0], faces[fx, 1], faces[fx, 2]))
 64 |     return
 65 | 
 66 | 
 67 | def append_mtl(mtl_handle, mtl_ids, colors):
 68 |     for mx in range(len(mtl_ids)):
 69 |         mtl_handle.write('newmtl m{}\n'.format(mtl_ids[mx]))
 70 |         mtl_handle.write('Kd {:f} {:f} {:f}\n'.format(colors[mx, 0], colors[mx, 1], colors[mx, 2]))
 71 |         mtl_handle.write('Ka 0 0 0\n')
 72 |     return
 73 | 
 74 | def render_mesh(mesh_file, png_dir, scale=0.5):
 75 |     cmd = 'python3.4 {:s}/render_script.py  --obj_file {:s} --out_dir {:s} --r 2 --delta_theta 30 --sz_x {} --sz_y {} >> /dev/null 2>&1'.format(blender_dir, mesh_file, png_dir, int(640*scale), int(480*scale))
 76 |     os.system(cmd)
 77 |     return
 78 | 
 79 | def render_directory(mesh_dir, png_dir, scale=0.5):
 80 |     cmd = 'python3.4 {:s}/render_dir_script.py  --obj_dir {:s} --out_dir {:s} --r 2 --delta_theta 30 --sz_x {} --sz_y {} >> /dev/null 2>&1'.format(blender_dir, mesh_dir, png_dir, int(640*scale), int(480*scale))
 81 |     os.system(cmd)
 82 |     return
 83 | 
 84 | class Downsample(torch.nn.Module):
 85 |     def __init__(self, s, use_max=False, batch_mode=False):
 86 |         super(Downsample, self).__init__()
 87 |         self.batch_mode = batch_mode
 88 |         if(use_max):
 89 |             layer = torch.nn.MaxPool3d(s, stride=s)
 90 |         else:
 91 |             layer = torch.nn.Conv3d(1, 1, s, stride=s)
 92 |             layer.weight.data.fill_(1./layer.weight.data.nelement())
 93 |             layer.bias.data.fill_(0)
 94 |         self.layer = layer
 95 | 
 96 |     def forward(self, vol):
 97 |         if self.batch_mode:
 98 |             out_vol = self.layer.forward(vol)
 99 |         else:
100 |             out_vol = self.layer.forward(torch.unsqueeze(torch.unsqueeze(vol, 0), 0))[0,0]
101 |         return out_vol
102 | 
103 | def downsample(vol, s, use_max=False):
104 |     module = Downsample(s, use_max=use_max).type(torch.FloatTensor)
105 |     vol_var = Variable(torch.from_numpy(vol), requires_grad=False).type(torch.FloatTensor)
106 |     return module.forward(vol_var).data.numpy()
107 | 
108 | def prediction_to_entity(pred):
109 |     if torch.is_tensor(pred[0]):
110 |         pred = [p.numpy() for p in pred]
111 |     volume = pred[0].astype(np.float)
112 |     transform = pred[1].astype(np.float)
113 |     if transform.shape[0] == 4 and transform.shape[1]==4:
114 |         return volume, transform
115 |     else:
116 |         scale_mat = np.diag(pred[1].astype(np.float))
117 |         rot_mat = transformations.quaternion_matrix(pred[2].astype(np.float))[0:3, 0:3]
118 |         transform = np.eye(4)
119 |         transform[0:3, 0:3] = np.matmul(rot_mat, scale_mat)
120 |         transform[0:3, 3] = pred[3].astype(np.float)
121 |         return volume, transform
122 | 
123 | def save_parse(mesh_file, codes, thresh=0.5, use_soft_voxels=True, save_objectwise=False):
124 |     mtl_file = mesh_file.replace('.obj','.mtl')
125 |     fout_mtl = open(mtl_file, 'w')
126 |     mtl_file = mtl_file.split('/')[-1]
127 | 
128 |     n_parts = len(codes)
129 |     color_inds = np.linspace(0, 255, n_parts).astype(np.int).tolist()
130 |     for p in range(n_parts):
131 |         cmap = colormap[color_inds[p]]
132 |         fout_mtl.write(
133 |             'newmtl m{:d}\nKd {:f} {:f} {:f}\nKa 0 0 0\n'.format(p, cmap[0], cmap[1], cmap[2]))
134 |     fout_mtl.close()
135 |     if not save_objectwise:
136 |         fout = open(mesh_file, 'w')
137 |         fout.write('mtllib {:s}\n'.format(mtl_file))
138 | 
139 |     f_counter = 0
140 |     for p in range(n_parts):
141 |         volume, transform = prediction_to_entity(codes[p])
142 |         if save_objectwise:
143 |             fout = open(mesh_file.replace('.obj', '_' + str(p) + '.obj'), 'w')
144 |             fout.write('mtllib {:s}\n'.format(mtl_file))
145 |         volume = downsample(volume, volume.shape[0]//32)
146 |         v, f = voxels_to_mesh(volume, thresh=thresh)
147 |         v = v/32 - 0.5
148 | 
149 |         if v.size > 0:
150 |             n_verts = v.shape[0]
151 |             v_homographic = np.concatenate((v, np.ones((n_verts, 1))), axis=1).transpose()
152 |             v_transformed = np.matmul(transform[0:3,:], v_homographic).transpose()
153 |             fout.write('usemtl m{:d}\n'.format(p))
154 |             append_obj(fout, v_transformed, f + f_counter)
155 | 
156 |             if not save_objectwise:
157 |                 f_counter += n_verts
158 | 
159 |         if save_objectwise or p==(n_parts-1):
160 |             fout.close()
161 | 
162 | 
163 | def codes_to_points(codes, thresh=0.5, objectwise=False):
164 |     scene_verts = []
165 |     n_parts = len(codes)
166 |     for p in range(n_parts):
167 |         volume, transform = prediction_to_entity(codes[p])
168 |         volume = downsample(volume, volume.shape[0]//32)
169 |         v = voxels_to_points(volume, thresh=thresh)
170 |         v = v/32 - 0.5
171 | 
172 |         if v.size > 0:
173 |             n_verts = v.shape[0]
174 |             v_homographic = np.concatenate((v, np.ones((n_verts, 1))), axis=1).transpose()
175 |             v_transformed = np.matmul(transform[0:3,:], v_homographic).transpose()
176 |             scene_verts.append(v_transformed)
177 | 
178 |     if not objectwise:
179 |         scene_verts = np.concatenate(scene_verts, axis=0)
180 | 
181 |     return scene_verts
182 | 
183 | 
184 | def dispmap_to_mesh(dmap, k_mat, scale_x=1, scale_y=1, min_disp=1e-2):
185 |     '''
186 |     Converts a inverse depth map to a 3D point cloud.
187 |     
188 |     Args:
189 |         dmap: H X W inverse depth map
190 |         k_mat : 3 X 3 intrinsic matrix
191 |         scale_x: Scale the intrinsic matrix's x row by this factor e.g. scale=0.5 implies downsampling by factor of 2
192 |         scale_y: Scale the intrinsic matrix's y row by this factor e.g. scale=0.5 implies downsampling by factor of 2
193 |         min_disp: Points with disp less than this are not rendered
194 |     Returns:
195 |         vs: n_pts X 3 [x,y,z] coordinates
196 |         fs: mesh faces
197 |     '''
198 |     H = np.shape(dmap)[0]
199 |     W = np.shape(dmap)[1]
200 |     dmap = dmap.reshape((H, W))
201 |     k_mat[0, :] = scale_x*k_mat[0, :]
202 |     k_mat[1, :] = scale_y*k_mat[1, :]
203 |     k_inv = np.linalg.inv(k_mat)
204 |     num_pts = H*W
205 |     pts = np.ones((3, num_pts))
206 |     ctr = 0
207 |     for y in range(H):
208 |         for x in range(W):
209 |             pts[0, ctr] = x + 0.5
210 |             pts[1, ctr] = y + 0.5
211 |             pts[:, ctr] *= (1/dmap[y,x])
212 |             ctr += 1
213 |             
214 |     verts = np.transpose(np.matmul(k_inv, pts))
215 |     num_faces_max = H*W*2
216 |     faces = np.zeros((num_faces_max, 3))
217 |     face_ctr = 0
218 |     for y in range(H-1):
219 |         for x in range(W-1):
220 |             if (dmap[y,x] > min_disp) and (dmap[y,x+1] > min_disp) and (dmap[y+1,x+1] > min_disp):
221 |                 faces[face_ctr, 0] = y*W + x + 1
222 |                 faces[face_ctr, 1] = y*W + (x+1) + 1
223 |                 faces[face_ctr, 2] = (y+1)*W + (x+1) + 1
224 |                 face_ctr += 1
225 | 
226 |             if (dmap[y,x] > min_disp) and (dmap[y+1,x] > min_disp) and (dmap[y+1,x+1] > min_disp):
227 |                 faces[face_ctr, 0] = y*W + x + 1
228 |                 faces[face_ctr, 1] = (y+1)*W + x + 1
229 |                 faces[face_ctr, 2] = (y+1)*W + (x+1) + 1
230 |                 face_ctr += 1
231 |     faces = faces[0:face_ctr, :]
232 |     return verts, faces.astype(np.int)
233 | 
234 | 
235 | def dispmap_to_points(dmap, k_mat, scale_x=1, scale_y=1, min_disp=1e-2):
236 |     '''
237 |     Converts a inverse depth map to a 3D point cloud.
238 |     
239 |     Args:
240 |         dmap: H X W inverse depth map
241 |         k_mat : 3 X 3 intrinsic matrix
242 |         scale_x: Scale the intrinsic matrix's x row by this factor e.g. scale=0.5 implies downsampling by factor of 2
243 |         scale_y: Scale the intrinsic matrix's y row by this factor e.g. scale=0.5 implies downsampling by factor of 2
244 |         min_disp: Points with disp less than this are not rendered
245 |     Returns:
246 |         n_pts X 3 [x,y,z] coordinates
247 |     '''
248 |     H = np.shape(dmap)[0]
249 |     W = np.shape(dmap)[1]
250 |     dmap = dmap.reshape((H, W))
251 |     k_mat[0, :] = scale_x*k_mat[0, :]
252 |     k_mat[1, :] = scale_y*k_mat[1, :]
253 |     k_inv = np.linalg.inv(k_mat)
254 |     num_pts = np.sum(np.greater(dmap, min_disp))
255 |     pts = np.ones((3, num_pts))
256 |     ctr = 0
257 |     for y in range(H):
258 |         for x in range(W):
259 |             if (dmap[y,x] > min_disp) and (ctr < num_pts):
260 |                 pts[0, ctr] = x + 0.5
261 |                 pts[1, ctr] = y + 0.5
262 |                 pts[:, ctr] *= (1/dmap[y,x])
263 |                 ctr += 1
264 |     return np.transpose(np.matmul(k_inv, pts))
265 | 
266 | 
267 | def points_to_cubes(points, edge_size=0.05):
268 |     '''
269 |     Converts an input point cloud to a set of cubes.
270 |     
271 |     Args:
272 |         points: N X 3 array
273 |         edge_size: cube edge size
274 |     Returns:
275 |         vs: vertices
276 |         fs: faces
277 |     '''
278 |     v_counter = 0
279 |     tot_points = points.shape[0]
280 |     v_all = np.tile(cube_v, [tot_points, 1])
281 |     f_all = np.tile(cube_f, [tot_points, 1])
282 |     f_offset = np.tile(np.linspace(0, 12*tot_points-1, 12*tot_points), 3).reshape(3, 12*tot_points).transpose()
283 |     f_offset = (f_offset//12 * 8).astype(np.int)
284 |     f_all += f_offset
285 |     for px in range(points.shape[0]):
286 |         v_all[v_counter:v_counter+8,:] *= edge_size
287 |         v_all[v_counter:v_counter+8,:] += points[px, :]
288 |         v_counter += 8
289 | 
290 |     return v_all, f_all
291 | 


--------------------------------------------------------------------------------