├── .gitignore ├── LICENSE ├── README.md ├── data └── PUT_DATA_HERE └── src ├── cache └── CACHE_GOES_HERE ├── data ├── __init__.py ├── basetypes.py ├── dataset.py ├── importers.py └── transformations.py ├── eval └── EVAL_GOES_HERE ├── main_icvl_com_refine.py ├── main_icvl_posereg_embedding.py ├── main_nyu_com_refine.py ├── main_nyu_posereg_embedding.py ├── net ├── __init__.py ├── convlayer.py ├── convpoollayer.py ├── dropoutlayer.py ├── hiddenlayer.py ├── layerparams.py ├── netbase.py ├── poollayer.py ├── poseregnet.py └── scalenet.py ├── test_realtimepipeline.py ├── trainer ├── __init__.py ├── nettrainer.py ├── optimizer.py ├── poseregnettrainer.py └── scalenettrainer.py └── util ├── CMakeLists.txt ├── __init__.py ├── cameradevice.py ├── depthsense.cxx ├── handdetector.py ├── handpose_evaluation.py ├── helpers.py ├── initdepthsense.cxx ├── initdepthsense.h ├── realtimehandposepipeline.py └── vtkpointcloud.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This work is superseded by [DeepPrior++](https://github.com/moberweger/deep-prior-pp) 2 | 3 | # DeepPrior - Accurate and Fast 3D Hand Pose Estimation 4 | 5 | Author: Markus Oberweger 6 | 7 | ## Requirements: 8 | * OS 9 | * Ubuntu 14.04 10 | * CUDA 7 11 | * via Ubuntu package manager: 12 | * python2.7 13 | * python-matplotlib 14 | * python-scipy 15 | * python-pil 16 | * python-numpy 17 | * python-vtk6 18 | * python-pip 19 | * python-vtk6 20 | * via pip install: 21 | * scikit-learn 22 | * progressbar 23 | * psutil 24 | * theano (0.8) 25 | * Camera driver 26 | * OpenNI for Kinect 27 | * DepthSense SDK for Creative Senz3D. 28 | 29 | For a description of our method see: 30 | 31 | M. Oberweger, P. Wohlhart, and V. Lepetit. Hands Deep in Deep Learning for Hand Pose Estimation. In Computer Vision Winter Workshop, 2015. 32 | 33 | ## Setup: 34 | * Put dataset files into ./data (e.g. [ICVL dataset](http://www.iis.ee.ic.ac.uk/~dtang/hand.html), or [NYU dataset](http://cims.nyu.edu/~tompson/NYU_Hand_Pose_Dataset.htm) ) 35 | * Goto ./src and see the main file test_realtimepipeline.py how to handle the API 36 | * Camera interface for the Creative Senz3D is included in ./src/util. Build them with `cmake . && make`. 37 | 38 | ## Pretrained models: 39 | [Download](https://webadmin.tugraz.at/fileadmin/user_upload/Institute/ICG/Downloads/team_lepetit/3d_hand_pose/DeepPrior_pretrained.zip) pretrained models for ICVL and NYU dataset. 40 | 41 | ## Datasets: 42 | The ICVL dataset is trained for a time-of-flight camera, and the NYU dataset for a structured light camera. The annotations are different. See the papers for it. 43 | 44 | D. Tang, H. J. Chang, A. Tejani, and T.-K. Kim. Latent Regression Forest: Structured Estimation of 3D Articulated Hand Posture. In Conference on Computer Vision and Pattern Recognition, 2014. 45 | 46 | J. Tompson, M. Stein, Y. LeCun, and K. Perlin. Real-Time Continuous Pose Recovery of Human Hands Using Convolutional Networks. ACM Transactions on Graphics, 33, 2014. 47 | -------------------------------------------------------------------------------- /data/PUT_DATA_HERE: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/cache/CACHE_GOES_HERE: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/data/basetypes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Predifined datatypes 3 | 4 | Copyright 2015 Markus Oberweger, ICG, 5 | Graz University of Technology 6 | 7 | This file is part of DeepPrior. 8 | 9 | DeepPrior is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | DeepPrior is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with DeepPrior. If not, see . 21 | """ 22 | 23 | from collections import namedtuple 24 | 25 | __author__ = "Paul Wohlhart , Markus Oberweger " 26 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 27 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 28 | __license__ = "GPL" 29 | __version__ = "1.0" 30 | __maintainer__ = "Markus Oberweger" 31 | __email__ = "oberweger@icg.tugraz.at" 32 | __status__ = "Development" 33 | 34 | ICVLFrame = namedtuple('ICVLFrame', ['dpt', 'gtorig', 'gtcrop', 'T', 'gt3Dorig', 'gt3Dcrop', 'com', 'fileName', 'subSeqName']) 35 | NamedImgSequence = namedtuple('NamedImgSequence', ['name', 'data', 'config']) 36 | -------------------------------------------------------------------------------- /src/data/dataset.py: -------------------------------------------------------------------------------- 1 | """Provides Dataset class for handling datasets. 2 | 3 | Dataset provides interface for managing data, eg normalization, batch building. 4 | ICVLDataset, NYUDataset, MSRADataset are specific instances of different datasets. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import numpy 26 | from data import transformations 27 | from data.basetypes import NamedImgSequence 28 | from data.importers import NYUImporter, ICVLImporter 29 | from util.handdetector import HandDetector 30 | from util.helpers import shuffle_many_inplace 31 | 32 | 33 | __author__ = "Paul Wohlhart , Markus Oberweger " 34 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 35 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 36 | __license__ = "GPL" 37 | __version__ = "1.0" 38 | __maintainer__ = "Markus Oberweger" 39 | __email__ = "oberweger@icg.tugraz.at" 40 | __status__ = "Development" 41 | 42 | 43 | class Dataset(object): 44 | """ 45 | Base class for managing data. Used to create training batches. 46 | """ 47 | 48 | def __init__(self, imgSeqs=None): 49 | """ 50 | Constructor 51 | """ 52 | if imgSeqs is None: 53 | self._imgSeqs = [] 54 | else: 55 | self._imgSeqs = imgSeqs 56 | self._imgStacks = {} 57 | self._labelStacks = {} 58 | 59 | @property 60 | def imgSeqs(self): 61 | return self._imgSeqs 62 | 63 | def imgSeq(self, seqName): 64 | for seq in self._imgSeqs: 65 | if seq.name == seqName: 66 | return seq 67 | return [] 68 | 69 | @imgSeqs.setter 70 | def imgSeqs(self, value): 71 | self._imgSeqs = value 72 | self._imgStacks = {} 73 | 74 | def load(self): 75 | objNames = self.lmi.getObjectNames() 76 | imgSeqs = self.lmi.loadSequences(objNames) 77 | raise NotImplementedError("Not implemented!") 78 | 79 | def imgStackDepthOnly(self, seqName, normZeroOne=False): 80 | imgSeq = None 81 | for seq in self._imgSeqs: 82 | if seq.name == seqName: 83 | imgSeq = seq 84 | break 85 | if imgSeq is None: 86 | return [] 87 | 88 | if seqName not in self._imgStacks: 89 | # compute the stack from the sequence 90 | numImgs = len(imgSeq.data) 91 | data0 = numpy.asarray(imgSeq.data[0].dpt, 'float32') 92 | label0 = numpy.asarray(imgSeq.data[0].gtorig, 'float32') 93 | h, w = data0.shape 94 | j, d = label0.shape 95 | imgStack = numpy.zeros((numImgs, 1, h, w), dtype='float32') # num_imgs,stack_size,rows,cols 96 | labelStack = numpy.zeros((numImgs, j, d), dtype='float32') # num_imgs,joints,dim 97 | for i in xrange(numImgs): 98 | if normZeroOne: 99 | imgD = numpy.asarray(imgSeq.data[i].dpt.copy(), 'float32') 100 | imgD[imgD == 0] = imgSeq.data[i].com[2] + (imgSeq.config['cube'][2] / 2.) 101 | imgD -= (imgSeq.data[i].com[2] - (imgSeq.config['cube'][2] / 2.)) 102 | imgD /= imgSeq.config['cube'][2] 103 | else: 104 | imgD = numpy.asarray(imgSeq.data[i].dpt.copy(), 'float32') 105 | imgD[imgD == 0] = imgSeq.data[i].com[2] + (imgSeq.config['cube'][2] / 2.) 106 | imgD -= imgSeq.data[i].com[2] 107 | imgD /= (imgSeq.config['cube'][2] / 2.) 108 | 109 | imgStack[i] = imgD 110 | labelStack[i] = numpy.clip(numpy.asarray(imgSeq.data[i].gt3Dcrop, dtype='float32') / (imgSeq.config['cube'][2] / 2.), -1, 1) 111 | 112 | self._imgStacks[seqName] = imgStack 113 | self._labelStacks[seqName] = labelStack 114 | 115 | return self._imgStacks[seqName], self._labelStacks[seqName] 116 | 117 | 118 | class ICVLDataset(Dataset): 119 | def __init__(self, imgSeqs=None, basepath=None): 120 | """ 121 | constructor 122 | """ 123 | super(ICVLDataset, self).__init__(imgSeqs) 124 | if basepath is None: 125 | basepath = '../../data/ICVL/' 126 | 127 | self.lmi = ICVLImporter(basepath) 128 | 129 | 130 | class NYUDataset(Dataset): 131 | def __init__(self, imgSeqs=None, basepath=None): 132 | """ 133 | constructor 134 | """ 135 | super(NYUDataset, self).__init__(imgSeqs) 136 | if basepath is None: 137 | basepath = '../../data/NYU/' 138 | 139 | self.lmi = NYUImporter(basepath) 140 | 141 | -------------------------------------------------------------------------------- /src/data/transformations.py: -------------------------------------------------------------------------------- 1 | """Provides different transformation methods on images. 2 | 3 | Copyright 2015 Markus Oberweger, ICG, 4 | Graz University of Technology 5 | 6 | This file is part of DeepPrior. 7 | 8 | DeepPrior is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | DeepPrior is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with DeepPrior. If not, see . 20 | """ 21 | 22 | import numpy 23 | from PIL import Image, ImageEnhance 24 | import data.basetypes 25 | 26 | __author__ = "Paul Wohlhart , Markus Oberweger " 27 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 28 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 29 | __license__ = "GPL" 30 | __version__ = "1.0" 31 | __maintainer__ = "Markus Oberweger" 32 | __email__ = "oberweger@icg.tugraz.at" 33 | __status__ = "Development" 34 | 35 | 36 | def getTransformationMatrix(center, rot, trans, scale): 37 | ca = numpy.cos(rot) 38 | sa = numpy.sin(rot) 39 | sc = scale 40 | cx = center[0] 41 | cy = center[1] 42 | tx = trans[0] 43 | ty = trans[1] 44 | t = numpy.array([ca * sc, -sa * sc, sc * (ca * (-tx - cx) + sa * ( cy + ty)) + cx, 45 | sa * sc, ca * sc, sc * (ca * (-ty - cy) + sa * (-tx - cx)) + cy]) 46 | return t 47 | 48 | 49 | def transformPoint2D(pt, M): 50 | """ 51 | Transform point in 2D coordinates 52 | :param pt: point coordinates 53 | :param M: transformation matrix 54 | :return: transformed point 55 | """ 56 | pt2 = numpy.asmatrix(M.reshape((3, 3))) * numpy.matrix([pt[0], pt[1], 1]).T 57 | return numpy.array([pt2[0] / pt2[2], pt2[1] / pt2[2]]) 58 | 59 | 60 | def transformPoint3D(pt, M): 61 | """ 62 | Transform point in 3D coordinates 63 | :param pt: point coordinates 64 | :param M: transformation matrix 65 | :return: transformed point 66 | """ 67 | pt3 = numpy.asmatrix(M.reshape((4, 4))) * numpy.matrix([pt[0], pt[1], pt[2], 1]).T 68 | return numpy.array([pt3[0] / pt3[3], pt3[1] / pt3[3], pt3[2] / pt3[3]]) 69 | 70 | 71 | class ImageJitterer(object): 72 | """ 73 | Image jitterer than creates jittered images, using affine transformations, color, etc. 74 | """ 75 | def __init__(self, rng, sz, crop=False): 76 | """ 77 | Constructor 78 | """ 79 | 80 | self.rng = rng 81 | self.imgSize = sz 82 | self.crop = crop 83 | 84 | def getJitteredParams(self, num, center=(0.0, 0.0), maxRot=(-5.0, 5.0), maxTranslate=(-2.0, 2.0), 85 | maxScale=(-0.1, 0.1), mirror=True): 86 | 87 | if not (type(maxRot) is tuple): 88 | maxRot = (-maxRot, maxRot) 89 | if not (type(maxTranslate) is tuple): 90 | maxTranslate = (-maxTranslate, maxTranslate) 91 | if not (type(maxScale) is tuple): 92 | maxScale = (-maxScale, maxScale) 93 | 94 | alphas = self.rng.rand(num) * (maxRot[1] - maxRot[0]) + maxRot[0] 95 | alphas = numpy.deg2rad(alphas) 96 | 97 | tx = self.rng.rand(num) * (maxTranslate[1] - maxTranslate[0]) + maxTranslate[0] 98 | ty = self.rng.rand(num) * (maxTranslate[1] - maxTranslate[0]) + maxTranslate[0] 99 | 100 | sc = 2 ** -(self.rng.rand(num) * (maxScale[1] - maxScale[0]) + maxScale[0]) 101 | 102 | if mirror: 103 | mi = self.rng.randint(2, size=num) # mirror true or false 104 | else: 105 | mi = numpy.zeros(num) 106 | 107 | transformationMats = [] 108 | for i in range(num): 109 | # First is not modified 110 | if i == 0: 111 | t = numpy.array([0, 0, 0, 1, 0]) 112 | else: 113 | t = numpy.array([alphas[i], tx[i], ty[i], sc[i], mi[i]]) 114 | transformationMats.append(t) 115 | 116 | return transformationMats 117 | 118 | def transformPoint2D(self, x_pic, y_pic, M): 119 | """ 120 | Transform point 121 | :param x_pic: 122 | :param y_pic: 123 | :param M: 124 | :return: 125 | """ 126 | 127 | if M.size != 6: 128 | raise ValueError("M not valid") 129 | 130 | x = M[0] * x_pic + M[1] * y_pic + M[2] 131 | y = M[3] * x_pic + M[4] * y_pic + M[5] 132 | return x, y 133 | 134 | def transformImg(self, img, t): 135 | imgT = img.transform((int(img.size[0]*t[3]),int(img.size[1]*t[3])), Image.EXTENT, (0,0,img.size[0],img.size[1]), Image.BILINEAR) 136 | imgT = imgT.rotate(numpy.rad2deg(t[0]), Image.BILINEAR, expand=1) 137 | if t[4] == 1.: 138 | imgT = imgT.transpose(Image.FLIP_LEFT_RIGHT) 139 | 140 | # crop only valid part 141 | if self.crop: 142 | imgT = imgT.crop(self.getInscribedRectangle(t[0], (img.size[0]*t[3], img.size[1]*t[3]))) 143 | 144 | # crop from translation 145 | imgT = imgT.resize((int(self.imgSize[0]*1.1), int(self.imgSize[1]*1.1)), Image.BILINEAR) 146 | xstart = int((imgT.size[0] // 2 - t[1]) - self.imgSize[0] // 2) 147 | ystart = int((imgT.size[1] // 2 - t[2]) - self.imgSize[1] // 2) 148 | assert xstart >= 0 and ystart >= 0 149 | return imgT.crop((xstart, ystart, xstart+self.imgSize[0], ystart+self.imgSize[1])) 150 | 151 | def getJitteredImgs(self, img, num, maxRot=(-5.0, 5.0), maxTranslate=(-2.0, 2.0), maxScale=(-0.1, 0.1), augmentColor=False): 152 | """ 153 | Take img and jitter it 154 | :return: a list of all jittered images 155 | """ 156 | 157 | cx = img.size[0] / 2 158 | cy = img.size[1] / 2 159 | 160 | tMats = self.getJitteredParams(center=(cx, cy), num=num, maxRot=maxRot, maxTranslate=maxTranslate, 161 | maxScale=maxScale) 162 | imgs = [] 163 | for i in range(len(tMats)): 164 | t = tMats[i] 165 | imgT = self.transformImg(img, t) 166 | 167 | if augmentColor: 168 | # jitter colors 169 | color = ImageEnhance.Color(imgT) 170 | imgT = color.enhance(self.rng.uniform(0.7, 1)) 171 | 172 | # jitter contrast 173 | contr = ImageEnhance.Contrast(imgT) 174 | imgT = contr.enhance(self.rng.uniform(0.7, 1)) 175 | 176 | # jitter brightness 177 | bright = ImageEnhance.Brightness(imgT) 178 | imgT = bright.enhance(self.rng.uniform(0.7, 1)) 179 | 180 | # add noise 181 | im = numpy.asarray(imgT).astype('int') + numpy.rint(self.rng.normal(0, 4, numpy.asarray(imgT).shape)).astype('int') 182 | im = numpy.clip(im, 0, 255).astype('uint8') 183 | imgT = Image.fromarray(im) 184 | 185 | # add image 186 | imgs.append(imgT) 187 | 188 | return imgs, tMats 189 | 190 | def applyJitterImg(self, img, tMats): 191 | imgs = [] 192 | for i in range(len(tMats)): 193 | t = tMats[i] 194 | imgT = self.transformImg(img, t) 195 | 196 | # add image 197 | imgs.append(imgT) 198 | 199 | return imgs 200 | 201 | def getJitteredImgSeq(self, imgSeq, num, maxRot=(-5.0, 5.0), maxTranslate=(-2.0, 2.0), maxScale=(-0.1, 0.1)): 202 | """ 203 | Take every img in the sequence (ie. list of Frames) and jitter it 204 | return a list of all jittered 205 | 206 | :param imgSeq: list of Frames 207 | """ 208 | 209 | seq = [] 210 | for frame in imgSeq: 211 | imgs = self.getJitteredImgs(frame.img, num, maxRot, maxTranslate, maxScale) 212 | for i in range(num): 213 | seq.append(data.basetypes.Frame(imgs[i], frame.dpt, frame.rot, frame.tra, frame.className)) 214 | return seq 215 | 216 | def maximumInscribedRectangle(self, mask): 217 | """ 218 | http://www.imagingshop.com/articles/automatic-cropping-non-rectangular-images 219 | This is a very slow, genearal purpose, enumerative approach 220 | :param mask: image mask 221 | :return: largest inscribed rectangle within mask 222 | """ 223 | 224 | def getSize(ww, hh): 225 | return ww * hh 226 | 227 | (height, width) = mask.size 228 | 229 | squares = numpy.zeros((height, width), dtype=int) 230 | 231 | # process bottom boundary of the mask 232 | row = (height - 1) 233 | 234 | for col in range(0, width): 235 | if mask.getpixel((row, col)): 236 | squares[row, col] = 1 237 | 238 | # process right boundary of the mask 239 | col = (width - 1) 240 | 241 | for row in range(0, height): 242 | if mask.getpixel((row, col)): 243 | squares[row, col] = 1 244 | 245 | # process internal pixels of the mask 246 | for row in range(height - 2, -1, -1): 247 | for col in range(width - 2, -1, -1): 248 | if mask.getpixel((row, col)): 249 | a = squares[row, col + 1] 250 | b = squares[row + 1, col] 251 | c = squares[row + 1, col + 1] 252 | squares[row, col] = (min(min(a, b), c) + 1) 253 | 254 | sizes = numpy.zeros((height, width), dtype=int) 255 | 256 | maxSquare = 0 257 | 258 | for row in range(0, height): 259 | for col in range(0, width): 260 | square = squares[row, col] 261 | sizes[row, col] = getSize(square, square) 262 | 263 | if square > maxSquare: 264 | maxSquare = square 265 | 266 | # find largest rectangles with width >= height 267 | height2width = [None]*(maxSquare + 1) 268 | 269 | widths = numpy.zeros((height, width), dtype=int) 270 | heights = numpy.zeros((height, width), dtype=int) 271 | 272 | for row in range(0, height): 273 | for s in range(0, maxSquare+1): 274 | height2width[s] = 0 275 | 276 | for col in range(width - 1, -1, -1): 277 | square = squares[row, col] 278 | 279 | if square > 0: 280 | maxSize = sizes[row, col] 281 | 282 | for rectHeight in range(square, 0, -1): 283 | rectWidth = height2width[rectHeight] 284 | rectWidth = max(rectWidth + 1, square) 285 | height2width[rectHeight] = rectWidth 286 | size = getSize(rectWidth, rectHeight) 287 | if size >= maxSize: 288 | maxSize = size 289 | widths[row, col] = rectWidth 290 | heights[row, col] = rectHeight 291 | 292 | sizes[row, col] = maxSize 293 | 294 | for s in range(square + 1, maxSquare+1): 295 | # widths larger that 'square' will not be available 296 | height2width[s] = 0 297 | 298 | # find largest rectangles with width < height 299 | width2height = [None]*(maxSquare + 1) 300 | 301 | for col in range(0, width): 302 | for s in range(0, maxSquare+1): 303 | width2height[s] = 0 304 | 305 | for row in range(height - 1, -1, -1): 306 | square = squares[row, col] 307 | 308 | if square > 0: 309 | maxSize = sizes[row, col] 310 | 311 | for rectWidth in range(square, rectWidth, -1): 312 | rectHeight = width2height[rectWidth] 313 | rectHeight = max(rectHeight + 1, square) 314 | width2height[rectWidth] = rectHeight 315 | size = getSize(rectWidth, rectHeight) 316 | 317 | if size > maxSize: 318 | maxSize = size 319 | widths[row, col] = rectWidth 320 | heights[row, col] = rectHeight 321 | 322 | sizes[row, col] = maxSize 323 | 324 | for s in range(square + 1, maxSquare+1): 325 | # heights larger that 'square' will not be available 326 | width2height[s] = 0 327 | 328 | # find the largest rectangle 329 | maxSize = 0 330 | rectWidth = 0 331 | rectHeight = 0 332 | rectRow = 0 333 | rectCol = 0 334 | 335 | for row in range(0, height): 336 | for col in range(0, width): 337 | size = sizes[row, col] 338 | if size > maxSize: 339 | maxSize = size 340 | rectRow = row 341 | rectCol = col 342 | rectWidth = widths[row, col] 343 | rectHeight = heights[row, col] 344 | 345 | return (rectCol, rectRow, rectCol + rectWidth, rectRow + rectHeight) 346 | 347 | def getInscribedRectangle(self, angle, rectSz): 348 | """ 349 | From https://stackoverflow.com/questions/5789239/calculate-largest-rectangle-in-a-rotated-rectangle 350 | :param angle: angle in radians 351 | :param rectSz: rectangle size 352 | :return: 353 | """ 354 | 355 | imgSzw = rectSz[0] 356 | imgSzh = rectSz[1] 357 | 358 | quadrant = int(numpy.floor(angle / (numpy.pi / 2.))) & 3 359 | sign_alpha = angle if (quadrant & 1) == 0 else numpy.pi - angle 360 | alpha = (sign_alpha % numpy.pi + numpy.pi) % numpy.pi 361 | 362 | bbw = imgSzw * numpy.cos(alpha) + imgSzh * numpy.sin(alpha) 363 | bbh = imgSzw * numpy.sin(alpha) + imgSzh * numpy.cos(alpha) 364 | 365 | gamma = numpy.arctan2(bbw, bbh) if imgSzw < imgSzh else numpy.arctan2(bbh, bbw) 366 | delta = numpy.pi - alpha - gamma 367 | 368 | length = imgSzh if imgSzw < imgSzh else imgSzw 369 | d = length * numpy.cos(alpha) 370 | a = d * numpy.sin(alpha) / numpy.sin(delta) 371 | 372 | y = a * numpy.cos(gamma) 373 | x = y * numpy.tan(gamma) 374 | 375 | return (int(x), int(y), int(x + bbw - 2*x), int(y + bbh - 2*y)) -------------------------------------------------------------------------------- /src/eval/EVAL_GOES_HERE: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/main_icvl_com_refine.py: -------------------------------------------------------------------------------- 1 | """This is the main file for training hand detection refinement on ICVL dataset 2 | 3 | Copyright 2015 Markus Oberweger, ICG, 4 | Graz University of Technology 5 | 6 | This file is part of DeepPrior. 7 | 8 | DeepPrior is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | DeepPrior is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with DeepPrior. If not, see . 20 | """ 21 | 22 | import numpy 23 | import matplotlib 24 | matplotlib.use('Agg') # plot to file 25 | import matplotlib.pyplot as plt 26 | from net.scalenet import ScaleNetParams, ScaleNet 27 | from trainer.scalenettrainer import ScaleNetTrainerParams, ScaleNetTrainer 28 | from util.handdetector import HandDetector 29 | 30 | import theano 31 | import os 32 | import cPickle 33 | import sys 34 | from data.importers import ICVLImporter 35 | from data.dataset import ICVLDataset 36 | from util.handpose_evaluation import ICVLHandposeEvaluation 37 | import cv2 38 | 39 | if __name__ == '__main__': 40 | 41 | eval_prefix = 'ICVL_COM' 42 | if not os.path.exists('./eval/'+eval_prefix+'/'): 43 | os.makedirs('./eval/'+eval_prefix+'/') 44 | 45 | floatX = theano.config.floatX # @UndefinedVariable 46 | 47 | rng = numpy.random.RandomState(23455) 48 | 49 | print("create data") 50 | 51 | di = ICVLImporter('../data/ICVL/') 52 | Seq1 = di.loadSequence('train', ['0'],shuffle=True,rng=rng,docom=True) 53 | trainSeqs = [Seq1] 54 | 55 | Seq2 = di.loadSequence('test_seq_1',docom=True) 56 | testSeqs = [Seq2] 57 | 58 | # create training data 59 | trainDataSet = ICVLDataset(trainSeqs) 60 | train_data, train_gt3D = trainDataSet.imgStackDepthOnly('train') 61 | 62 | mb = (train_data.nbytes) / (1024 * 1024) 63 | print("data size: {}Mb".format(mb)) 64 | 65 | testDataSet = ICVLDataset(testSeqs) 66 | test_data, test_gt3D = testDataSet.imgStackDepthOnly('test_seq_1') 67 | 68 | val_data = test_data 69 | val_gt3D = test_gt3D 70 | 71 | #################################### 72 | # resize data 73 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 74 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 75 | xend = xstart + dsize[0] 76 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 77 | yend = ystart + dsize[1] 78 | train_data2 = train_data[:, :, ystart:yend, xstart:xend] 79 | 80 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 81 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 82 | xend = xstart + dsize[0] 83 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 84 | yend = ystart + dsize[1] 85 | train_data4 = train_data[:, :, ystart:yend, xstart:xend] 86 | 87 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 88 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 89 | xend = xstart + dsize[0] 90 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 91 | yend = ystart + dsize[1] 92 | val_data2 = val_data[:, :, ystart:yend, xstart:xend] 93 | 94 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 95 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 96 | xend = xstart + dsize[0] 97 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 98 | yend = ystart + dsize[1] 99 | val_data4 = val_data[:, :, ystart:yend, xstart:xend] 100 | 101 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 102 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 103 | xend = xstart + dsize[0] 104 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 105 | yend = ystart + dsize[1] 106 | test_data2 = test_data[:, :, ystart:yend, xstart:xend] 107 | 108 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 109 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 110 | xend = xstart + dsize[0] 111 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 112 | yend = ystart + dsize[1] 113 | test_data4 = test_data[:, :, ystart:yend, xstart:xend] 114 | 115 | print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min() 116 | print train_data.max(), test_data.max(), train_data.min(), test_data.min() 117 | 118 | imgSizeW = train_data.shape[3] 119 | imgSizeH = train_data.shape[2] 120 | nChannels = train_data.shape[1] 121 | 122 | ############################################################################# 123 | print("create network") 124 | batchSize = 64 125 | poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, 126 | resizeFactor=2, numJoints=1, nDims=3) 127 | poseNet = ScaleNet(rng, cfgParams=poseNetParams) 128 | 129 | poseNetTrainerParams = ScaleNetTrainerParams() 130 | poseNetTrainerParams.batch_size = batchSize 131 | poseNetTrainerParams.learning_rate = 0.0005 132 | poseNetTrainerParams.weightreg_factor = 0.0001 133 | poseNetTrainerParams.lr_of_ep = lambda ep: poseNetTrainerParams.learning_rate/(1+0.1*ep) 134 | 135 | print("setup trainer") 136 | poseNetTrainer = ScaleNetTrainer(poseNet, poseNetTrainerParams, rng) 137 | poseNetTrainer.setData(train_data, train_gt3D[:, 0, :], val_data, val_gt3D[:, 0, :]) 138 | poseNetTrainer.addStaticData({'val_data_x1': val_data2, 'val_data_x2': val_data4}) 139 | poseNetTrainer.addManagedData({'train_data_x1': train_data2, 'train_data_x2': train_data4}) 140 | poseNetTrainer.compileFunctions() 141 | 142 | ################################################################### 143 | # TRAIN 144 | train_res = poseNetTrainer.train(n_epochs=100, storeFilters=True) 145 | train_costs = train_res[0] 146 | wvals = train_res[1] 147 | val_errs = train_res[2] 148 | 149 | # plot cost 150 | fig = plt.figure() 151 | plt.semilogy(train_costs) 152 | plt.show(block=False) 153 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_cost.png') 154 | 155 | fig = plt.figure() 156 | plt.semilogy(val_errs) 157 | plt.show(block=False) 158 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_errs.png') 159 | 160 | # save results 161 | poseNet.save("./eval/{}/net_{}.pkl".format(eval_prefix, eval_prefix)) 162 | # poseNet.load("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) 163 | 164 | #################################################### 165 | # TEST 166 | print("Testing ...") 167 | gt3D = [j.gt3Dorig[0].reshape(1, 3) for j in testSeqs[0].data] 168 | jts = poseNet.computeOutput([test_data, test_data2, test_data4]) 169 | joints = [] 170 | for i in xrange(test_data.shape[0]): 171 | joints.append(jts[i].reshape(1, 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com) 172 | 173 | hpe = ICVLHandposeEvaluation(gt3D, joints) 174 | hpe.subfolder += '/'+eval_prefix+'/' 175 | mean_error = hpe.getMeanError() 176 | max_error = hpe.getMaxError() 177 | print("Mean error: {}mm, max error: {}mm".format(mean_error, max_error)) 178 | 179 | # save results 180 | cPickle.dump(joints, open("./eval/{}/result_{}_{}.pkl".format(eval_prefix,os.path.split(__file__)[1],eval_prefix), "wb"), protocol=cPickle.HIGHEST_PROTOCOL) 181 | 182 | print "Testing baseline" 183 | 184 | ################################# 185 | # BASELINE 186 | # Load the evaluation 187 | data_baseline = di.loadBaseline('../data/ICVL/LRF_Results_seq_1.txt') 188 | 189 | hpe_base = ICVLHandposeEvaluation(gt3D, numpy.asarray(data_baseline)[:,0,:].reshape((len(gt3D),1,3))) 190 | hpe_base.subfolder += '/'+eval_prefix+'/' 191 | print("Mean error: {}mm".format(hpe_base.getMeanError())) 192 | 193 | com = [j.com for j in testSeqs[0].data] 194 | hpe_com = ICVLHandposeEvaluation(gt3D, numpy.asarray(com).reshape((len(gt3D),1,3))) 195 | hpe_com.subfolder += '/'+eval_prefix+'/' 196 | print("Mean error: {}mm".format(hpe_com.getMeanError())) 197 | 198 | -------------------------------------------------------------------------------- /src/main_icvl_posereg_embedding.py: -------------------------------------------------------------------------------- 1 | """This is the main file for training hand joint classifier on ICVL dataset 2 | 3 | Copyright 2015 Markus Oberweger, ICG, 4 | Graz University of Technology 5 | 6 | This file is part of DeepPrior. 7 | 8 | DeepPrior is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | DeepPrior is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with DeepPrior. If not, see . 20 | """ 21 | 22 | import numpy 23 | import matplotlib 24 | matplotlib.use('Agg') # plot to file 25 | import matplotlib.pyplot as plt 26 | import theano 27 | import os 28 | import cPickle 29 | import sys 30 | from sklearn.decomposition import PCA 31 | from trainer.poseregnettrainer import PoseRegNetTrainer, PoseRegNetTrainerParams 32 | from net.poseregnet import PoseRegNetParams, PoseRegNet 33 | from data.importers import ICVLImporter 34 | from data.dataset import ICVLDataset 35 | from util.handpose_evaluation import ICVLHandposeEvaluation 36 | from data.transformations import transformPoint2D 37 | from net.hiddenlayer import HiddenLayer, HiddenLayerParams 38 | 39 | if __name__ == '__main__': 40 | 41 | eval_prefix = 'ICVL_EMB_t0nF8mp421fD553h1024_PCA30' 42 | if not os.path.exists('./eval/'+eval_prefix+'/'): 43 | os.makedirs('./eval/'+eval_prefix+'/') 44 | 45 | floatX = theano.config.floatX # @UndefinedVariable 46 | 47 | rng = numpy.random.RandomState(23455) 48 | 49 | print("create data") 50 | 51 | di = ICVLImporter('../data/ICVL/') 52 | Seq1 = di.loadSequence('train', ['0'], shuffle=True, rng=rng) 53 | trainSeqs = [Seq1] 54 | 55 | Seq2 = di.loadSequence('test_seq_1') 56 | testSeqs = [Seq2] 57 | 58 | # create training data 59 | trainDataSet = ICVLDataset(trainSeqs) 60 | train_data, train_gt3D = trainDataSet.imgStackDepthOnly('train') 61 | 62 | mb = (train_data.nbytes) / (1024 * 1024) 63 | print("data size: {}Mb".format(mb)) 64 | 65 | valDataSet = ICVLDataset(testSeqs) 66 | val_data, val_gt3D = valDataSet.imgStackDepthOnly('test_seq_1') 67 | 68 | testDataSet = ICVLDataset(testSeqs) 69 | test_data, test_gt3D = testDataSet.imgStackDepthOnly('test_seq_1') 70 | 71 | print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min() 72 | print train_data.max(), test_data.max(), train_data.min(), test_data.min() 73 | 74 | imgSizeW = train_data.shape[3] 75 | imgSizeH = train_data.shape[2] 76 | nChannels = train_data.shape[1] 77 | 78 | #################################### 79 | # convert data to embedding 80 | pca = PCA(n_components=30) 81 | pca.fit(train_gt3D.reshape((train_gt3D.shape[0], train_gt3D.shape[1]*3))) 82 | train_gt3D_embed = pca.transform(train_gt3D.reshape((train_gt3D.shape[0], train_gt3D.shape[1]*3))) 83 | test_gt3D_embed = pca.transform(test_gt3D.reshape((test_gt3D.shape[0], test_gt3D.shape[1]*3))) 84 | val_gt3D_embed = pca.transform(val_gt3D.reshape((val_gt3D.shape[0], val_gt3D.shape[1]*3))) 85 | 86 | ############################################################################ 87 | print("create network") 88 | batchSize = 128 89 | poseNetParams = PoseRegNetParams(type=0, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, numJoints=1, nDims=train_gt3D_embed.shape[1]) 90 | poseNet = PoseRegNet(rng, cfgParams=poseNetParams) 91 | 92 | poseNetTrainerParams = PoseRegNetTrainerParams() 93 | poseNetTrainerParams.batch_size = batchSize 94 | poseNetTrainerParams.learning_rate = 0.01 95 | 96 | print("setup trainer") 97 | poseNetTrainer = PoseRegNetTrainer(poseNet, poseNetTrainerParams, rng) 98 | poseNetTrainer.setData(train_data, train_gt3D_embed, val_data, val_gt3D_embed) 99 | poseNetTrainer.compileFunctions(compileDebugFcts=False) 100 | 101 | ################################################################### 102 | # 103 | # TRAIN 104 | nEpochs = 100 105 | train_res = poseNetTrainer.train(n_epochs=nEpochs, storeFilters=True) 106 | train_costs = train_res[0] 107 | wvals = train_res[1] 108 | val_errs = train_res[2] 109 | 110 | ################################################################### 111 | # TEST 112 | # plot cost 113 | fig = plt.figure() 114 | plt.semilogy(train_costs) 115 | plt.show(block=False) 116 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_cost.png') 117 | 118 | fig = plt.figure() 119 | plt.semilogy(val_errs) 120 | plt.show(block=False) 121 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_errs.png') 122 | 123 | # save results 124 | poseNet.save("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) 125 | # poseNet.load("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) 126 | 127 | # add prior to network 128 | cfg = HiddenLayerParams(inputDim=(batchSize, train_gt3D_embed.shape[1]), outputDim=(batchSize, numpy.prod(train_gt3D.shape[1:])), activation=None) 129 | pcalayer = HiddenLayer(rng, poseNet.layers[-1].output, cfg, copyLayer=None, layerNum=len(poseNet.layers)) 130 | pcalayer.W.set_value(pca.components_) 131 | pcalayer.b.set_value(pca.mean_) 132 | poseNet.layers.append(pcalayer) 133 | poseNet.output = pcalayer.output 134 | poseNet.cfgParams.numJoints = train_gt3D.shape[1] 135 | poseNet.cfgParams.nDims = train_gt3D.shape[2] 136 | poseNet.cfgParams.outputDim = pcalayer.cfgParams.outputDim 137 | poseNet.save("./eval/{}/network_prior.pkl".format(eval_prefix)) 138 | 139 | ################################################################### 140 | # test 141 | print("Testing ...") 142 | gt3D = [j.gt3Dorig for j in testSeqs[0].data] 143 | jts_embed = poseNet.computeOutput(test_data) 144 | jts = jts_embed 145 | joints = [] 146 | for i in range(test_data.shape[0]): 147 | joints.append(jts[i].reshape(gt3D[0].shape[0], 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com) 148 | 149 | joints = numpy.array(joints) 150 | 151 | hpe = ICVLHandposeEvaluation(gt3D, joints) 152 | hpe.subfolder += '/'+eval_prefix+'/' 153 | mean_error = hpe.getMeanError() 154 | max_error = hpe.getMaxError() 155 | print("Train samples: {}, test samples: {}".format(train_data.shape[0], len(gt3D))) 156 | print("Mean error: {}mm, max error: {}mm".format(mean_error, max_error)) 157 | print("MD score: {}".format(hpe.getMDscore(80))) 158 | 159 | print("{}".format([hpe.getJointMeanError(j) for j in range(joints[0].shape[0])])) 160 | print("{}".format([hpe.getJointMaxError(j) for j in range(joints[0].shape[0])])) 161 | 162 | # save results 163 | cPickle.dump(joints, open("./eval/{}/result_{}_{}.pkl".format(eval_prefix, os.path.split(__file__)[1], eval_prefix), "wb"), protocol=cPickle.HIGHEST_PROTOCOL) 164 | 165 | print "Testing baseline" 166 | 167 | ################################# 168 | # BASELINE 169 | # Load the evaluation 170 | data_baseline = di.loadBaseline('../data/ICVL/LRF_Results_seq_1.txt') 171 | 172 | hpe_base = ICVLHandposeEvaluation(gt3D, data_baseline) 173 | hpe_base.subfolder += '/'+eval_prefix+'/' 174 | print("Mean error: {}mm".format(hpe_base.getMeanError())) 175 | 176 | hpe.plotEvaluation(eval_prefix, methodName='Our regr', baseline=[('Tang et al.', hpe_base)]) 177 | 178 | ind = 0 179 | for i in testSeqs[0].data: 180 | if ind % 20 != 0: 181 | ind += 1 182 | continue 183 | jt = joints[ind] 184 | jtI = di.joints3DToImg(jt) 185 | for joint in range(jt.shape[0]): 186 | t=transformPoint2D(jtI[joint], i.T) 187 | jtI[joint, 0] = t[0] 188 | jtI[joint, 1] = t[1] 189 | hpe.plotResult(i.dpt, i.gtcrop, jtI, "{}_{}".format(eval_prefix, ind)) 190 | ind+=1 191 | -------------------------------------------------------------------------------- /src/main_nyu_com_refine.py: -------------------------------------------------------------------------------- 1 | """This is the main file for training hand detection refinement on NYU dataset 2 | 3 | Copyright 2015 Markus Oberweger, ICG, 4 | Graz University of Technology 5 | 6 | This file is part of DeepPrior. 7 | 8 | DeepPrior is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | DeepPrior is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with DeepPrior. If not, see . 20 | """ 21 | 22 | import numpy 23 | import matplotlib 24 | matplotlib.use('Agg') # plot to file 25 | import matplotlib.pyplot as plt 26 | from net.scalenet import ScaleNetParams, ScaleNet 27 | from trainer.scalenettrainer import ScaleNetTrainerParams, ScaleNetTrainer 28 | from util.handdetector import HandDetector 29 | 30 | import theano 31 | import os 32 | import cPickle 33 | import sys 34 | from data.importers import NYUImporter 35 | from data.dataset import NYUDataset 36 | from util.handpose_evaluation import NYUHandposeEvaluation 37 | import cv2 38 | 39 | if __name__ == '__main__': 40 | 41 | eval_prefix = 'NYU_COM' 42 | if not os.path.exists('./eval/'+eval_prefix+'/'): 43 | os.makedirs('./eval/'+eval_prefix+'/') 44 | 45 | floatX = theano.config.floatX # @UndefinedVariable 46 | 47 | rng = numpy.random.RandomState(23455) 48 | 49 | print("create data") 50 | 51 | di = NYUImporter('../data/NYU/') 52 | Seq1 = di.loadSequence('train',shuffle=True,rng=rng,docom=True) 53 | trainSeqs = [Seq1] 54 | 55 | Seq2_1 = di.loadSequence('test_1',docom=True) 56 | Seq2_2 = di.loadSequence('test_2',docom=True) 57 | testSeqs = [Seq2_1, Seq2_2] 58 | 59 | # create training data 60 | trainDataSet = NYUDataset(trainSeqs) 61 | train_data, train_gt3D = trainDataSet.imgStackDepthOnly('train') 62 | 63 | mb = (train_data.nbytes) / (1024 * 1024) 64 | print("data size: {}Mb".format(mb)) 65 | 66 | testDataSet = NYUDataset(testSeqs) 67 | test_data1, test_gt3D1 = testDataSet.imgStackDepthOnly('test_1') 68 | test_data2, test_gt3D2 = testDataSet.imgStackDepthOnly('test_2') 69 | 70 | val_data = test_data1 71 | val_gt3D = test_gt3D1 72 | 73 | #################################### 74 | # resize data 75 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 76 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 77 | xend = xstart + dsize[0] 78 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 79 | yend = ystart + dsize[1] 80 | train_data2 = train_data[:, :, ystart:yend, xstart:xend] 81 | 82 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 83 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 84 | xend = xstart + dsize[0] 85 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 86 | yend = ystart + dsize[1] 87 | train_data4 = train_data[:, :, ystart:yend, xstart:xend] 88 | 89 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 90 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 91 | xend = xstart + dsize[0] 92 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 93 | yend = ystart + dsize[1] 94 | val_data2 = val_data[:, :, ystart:yend, xstart:xend] 95 | 96 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 97 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 98 | xend = xstart + dsize[0] 99 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 100 | yend = ystart + dsize[1] 101 | val_data4 = val_data[:, :, ystart:yend, xstart:xend] 102 | 103 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 104 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 105 | xend = xstart + dsize[0] 106 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 107 | yend = ystart + dsize[1] 108 | test_data12 = test_data1[:, :, ystart:yend, xstart:xend] 109 | 110 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 111 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 112 | xend = xstart + dsize[0] 113 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 114 | yend = ystart + dsize[1] 115 | test_data14 = test_data1[:, :, ystart:yend, xstart:xend] 116 | 117 | dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) 118 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 119 | xend = xstart + dsize[0] 120 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 121 | yend = ystart + dsize[1] 122 | test_data22 = test_data2[:, :, ystart:yend, xstart:xend] 123 | 124 | dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) 125 | xstart = int(train_data.shape[2]/2-dsize[0]/2) 126 | xend = xstart + dsize[0] 127 | ystart = int(train_data.shape[3]/2-dsize[1]/2) 128 | yend = ystart + dsize[1] 129 | test_data24 = test_data2[:, :, ystart:yend, xstart:xend] 130 | 131 | print train_gt3D.max(), test_gt3D1.max(), train_gt3D.min(), test_gt3D1.min() 132 | print train_data.max(), test_data1.max(), train_data.min(), test_data1.min() 133 | 134 | imgSizeW = train_data.shape[3] 135 | imgSizeH = train_data.shape[2] 136 | nChannels = train_data.shape[1] 137 | 138 | ############################################################################# 139 | print("create network") 140 | batchSize = 64 141 | poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, 142 | resizeFactor=2, numJoints=1, nDims=3) 143 | poseNet = ScaleNet(rng, cfgParams=poseNetParams) 144 | 145 | poseNetTrainerParams = ScaleNetTrainerParams() 146 | poseNetTrainerParams.batch_size = batchSize 147 | poseNetTrainerParams.learning_rate = 0.0005 148 | poseNetTrainerParams.weightreg_factor = 0.0001 149 | poseNetTrainerParams.lr_of_ep = lambda ep: poseNetTrainerParams.learning_rate/(1+0.1*ep) 150 | 151 | print("setup trainer") 152 | poseNetTrainer = ScaleNetTrainer(poseNet, poseNetTrainerParams, rng) 153 | poseNetTrainer.setData(train_data, train_gt3D[:, 13, :], val_data, val_gt3D[:, 13, :]) 154 | poseNetTrainer.addStaticData({'val_data_x1': val_data2, 'val_data_x2': val_data4}) 155 | poseNetTrainer.addManagedData({'train_data_x1': train_data2, 'train_data_x2': train_data4}) 156 | poseNetTrainer.compileFunctions() 157 | 158 | ################################################################### 159 | # TRAIN 160 | train_res = poseNetTrainer.train(n_epochs=100, storeFilters=True) 161 | train_costs = train_res[0] 162 | wvals = train_res[1] 163 | val_errs = train_res[2] 164 | 165 | # plot cost 166 | fig = plt.figure() 167 | plt.semilogy(train_costs) 168 | plt.show(block=False) 169 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_cost.png') 170 | 171 | fig = plt.figure() 172 | plt.semilogy(val_errs) 173 | plt.show(block=False) 174 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_errs.png') 175 | 176 | # save results 177 | poseNet.save("./eval/{}/net_{}.pkl".format(eval_prefix, eval_prefix)) 178 | # poseNet.load("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) 179 | 180 | #################################################### 181 | # TEST 182 | print("Testing ...") 183 | gt3D = [] 184 | joints = [] 185 | gt3D.extend([j.gt3Dorig[13].reshape(1,3) for j in testSeqs[0].data]) 186 | jts = poseNet.computeOutput([test_data1, test_data12, test_data14]) 187 | for i in xrange(test_data1.shape[0]): 188 | joints.append(jts[i].reshape(1, 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com) 189 | 190 | gt3D.extend([j.gt3Dorig[13].reshape(1,3) for j in testSeqs[1].data]) 191 | jts = poseNet.computeOutput([test_data2, test_data22, test_data24]) 192 | for i in range(test_data2.shape[0]): 193 | joints.append(jts[i].reshape(1, 3)*(testSeqs[1].config['cube'][2]/2.) + testSeqs[1].data[i].com) 194 | 195 | hpe = NYUHandposeEvaluation(gt3D, joints) 196 | hpe.subfolder += '/'+eval_prefix+'/' 197 | mean_error = hpe.getMeanError() 198 | max_error = hpe.getMaxError() 199 | print("Mean error: {}mm, max error: {}mm".format(mean_error, max_error)) 200 | 201 | # save results 202 | cPickle.dump(joints, open("./eval/{}/result_{}_{}.pkl".format(eval_prefix,os.path.split(__file__)[1],eval_prefix), "wb"), protocol=cPickle.HIGHEST_PROTOCOL) 203 | 204 | print "Testing baseline" 205 | 206 | ################################# 207 | # BASELINE 208 | # Load the evaluation 209 | data_baseline = di.loadBaseline('../data/NYU/test/test_predictions.mat',numpy.concatenate([numpy.asarray([j.gt3Dorig for j in testSeqs[0].data]), numpy.asarray([j.gt3Dorig for j in testSeqs[1].data])])) 210 | 211 | hpe_base = NYUHandposeEvaluation(gt3D, numpy.asarray(data_baseline)[:, 13, :].reshape((len(gt3D), 1, 3))) 212 | hpe_base.subfolder += '/'+eval_prefix+'/' 213 | print("Mean error: {}mm".format(hpe_base.getMeanError())) 214 | 215 | com = [j.com for j in testSeqs[0].data] 216 | com.extend([j.com for j in testSeqs[1].data]) 217 | hpe_com = NYUHandposeEvaluation(gt3D, numpy.asarray(com).reshape((len(gt3D),1,3))) 218 | hpe_com.subfolder += '/'+eval_prefix+'/' 219 | print("Mean error: {}mm".format(hpe_com.getMeanError())) 220 | 221 | -------------------------------------------------------------------------------- /src/main_nyu_posereg_embedding.py: -------------------------------------------------------------------------------- 1 | """This is the main file for training hand joint classifier on NYU dataset 2 | 3 | Copyright 2015 Markus Oberweger, ICG, 4 | Graz University of Technology 5 | 6 | This file is part of DeepPrior. 7 | 8 | DeepPrior is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | DeepPrior is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with DeepPrior. If not, see . 20 | """ 21 | 22 | import numpy 23 | import matplotlib 24 | matplotlib.use('Agg') # plot to file 25 | import matplotlib.pyplot as plt 26 | import theano 27 | import os 28 | import cPickle 29 | import sys 30 | from sklearn.decomposition import PCA 31 | from trainer.poseregnettrainer import PoseRegNetTrainer, PoseRegNetTrainerParams 32 | from net.poseregnet import PoseRegNetParams, PoseRegNet 33 | from data.importers import NYUImporter 34 | from data.dataset import NYUDataset 35 | from util.handpose_evaluation import NYUHandposeEvaluation 36 | from data.transformations import transformPoint2D 37 | from net.hiddenlayer import HiddenLayer, HiddenLayerParams 38 | 39 | if __name__ == '__main__': 40 | 41 | eval_prefix = 'NYU_EMB_t0nF8mp421fD553h1024_PCA30' 42 | if not os.path.exists('./eval/'+eval_prefix+'/'): 43 | os.makedirs('./eval/'+eval_prefix+'/') 44 | 45 | floatX = theano.config.floatX # @UndefinedVariable 46 | 47 | rng = numpy.random.RandomState(23455) 48 | 49 | print("create data") 50 | 51 | di = NYUImporter('../data/NYU/') 52 | Seq1 = di.loadSequence('train', shuffle=True, rng=rng) 53 | trainSeqs = [Seq1] 54 | 55 | Seq2_1 = di.loadSequence('test_1') 56 | Seq2_2 = di.loadSequence('test_2') 57 | testSeqs = [Seq2_1, Seq2_2] 58 | 59 | # create training data 60 | trainDataSet = NYUDataset(trainSeqs) 61 | train_data, train_gt3D = trainDataSet.imgStackDepthOnly('train') 62 | 63 | mb = (train_data.nbytes) / (1024 * 1024) 64 | print("data size: {}Mb".format(mb)) 65 | 66 | valDataSet = NYUDataset(testSeqs) 67 | val_data, val_gt3D = valDataSet.imgStackDepthOnly('test_1') 68 | 69 | testDataSet = NYUDataset(testSeqs) 70 | test_data1, test_gt3D1 = testDataSet.imgStackDepthOnly('test_1') 71 | test_data2, test_gt3D2 = testDataSet.imgStackDepthOnly('test_2') 72 | 73 | print train_gt3D.max(), test_gt3D1.max(), train_gt3D.min(), test_gt3D1.min() 74 | print train_data.max(), test_data1.max(), train_data.min(), test_data1.min() 75 | 76 | imgSizeW = train_data.shape[3] 77 | imgSizeH = train_data.shape[2] 78 | nChannels = train_data.shape[1] 79 | 80 | #################################### 81 | # convert data to embedding 82 | pca = PCA(n_components=30) 83 | pca.fit(train_gt3D.reshape((train_gt3D.shape[0], train_gt3D.shape[1]*3))) 84 | train_gt3D_embed = pca.transform(train_gt3D.reshape((train_gt3D.shape[0], train_gt3D.shape[1]*3))) 85 | test_gt3D_embed1 = pca.transform(test_gt3D1.reshape((test_gt3D1.shape[0], test_gt3D1.shape[1]*3))) 86 | test_gt3D_embed2 = pca.transform(test_gt3D2.reshape((test_gt3D2.shape[0], test_gt3D2.shape[1]*3))) 87 | val_gt3D_embed = pca.transform(val_gt3D.reshape((val_gt3D.shape[0], val_gt3D.shape[1]*3))) 88 | 89 | ############################################################################ 90 | print("create network") 91 | batchSize = 128 92 | poseNetParams = PoseRegNetParams(type=0, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, numJoints=1, nDims=train_gt3D_embed.shape[1]) 93 | poseNet = PoseRegNet(rng, cfgParams=poseNetParams) 94 | 95 | poseNetTrainerParams = PoseRegNetTrainerParams() 96 | poseNetTrainerParams.batch_size = batchSize 97 | poseNetTrainerParams.learning_rate = 0.01 98 | 99 | print("setup trainer") 100 | poseNetTrainer = PoseRegNetTrainer(poseNet, poseNetTrainerParams, rng) 101 | poseNetTrainer.setData(train_data, train_gt3D_embed, val_data, val_gt3D_embed) 102 | poseNetTrainer.compileFunctions(compileDebugFcts=False) 103 | 104 | ################################################################### 105 | # 106 | # TRAIN 107 | nEpochs = 100 108 | train_res = poseNetTrainer.train(n_epochs=nEpochs, storeFilters=True) 109 | train_costs = train_res[0] 110 | wvals = train_res[1] 111 | val_errs = train_res[2] 112 | 113 | ################################################################### 114 | # TEST 115 | # plot cost 116 | fig = plt.figure() 117 | plt.semilogy(train_costs) 118 | plt.show(block=False) 119 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_cost.png') 120 | 121 | fig = plt.figure() 122 | plt.semilogy(val_errs) 123 | plt.show(block=False) 124 | fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_errs.png') 125 | 126 | # save results 127 | poseNet.save("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) 128 | # poseNet.load("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) 129 | 130 | # add prior to network 131 | cfg = HiddenLayerParams(inputDim=(batchSize, train_gt3D_embed.shape[1]), outputDim=(batchSize, numpy.prod(train_gt3D.shape[1:])), activation=None) 132 | pcalayer = HiddenLayer(rng, poseNet.layers[-1].output, cfg, copyLayer=None, layerNum=len(poseNet.layers)) 133 | pcalayer.W.set_value(pca.components_) 134 | pcalayer.b.set_value(pca.mean_) 135 | poseNet.layers.append(pcalayer) 136 | poseNet.output = pcalayer.output 137 | poseNet.cfgParams.numJoints = train_gt3D.shape[1] 138 | poseNet.cfgParams.nDims = train_gt3D.shape[2] 139 | poseNet.cfgParams.outputDim = pcalayer.cfgParams.outputDim 140 | poseNet.save("./eval/{}/network_prior.pkl".format(eval_prefix)) 141 | 142 | ################################################################### 143 | # test 144 | print("Testing ...") 145 | gt3D = [] 146 | joints = [] 147 | for seq in testSeqs: 148 | gt3D.extend([j.gt3Dorig for j in seq.data]) 149 | test_data, _ = testDataSet.imgStackDepthOnly(seq.name) 150 | jts_embed = poseNet.computeOutput(test_data) 151 | # Backtransform from embedding 152 | # jts = pca.inverse_transform(jts_embed) 153 | jts = jts_embed 154 | for i in range(test_data.shape[0]): 155 | joints.append(jts[i].reshape(gt3D[0].shape[0], 3)*(seq.config['cube'][2]/2.) + seq.data[i].com) 156 | 157 | joints = numpy.array(joints) 158 | 159 | hpe = NYUHandposeEvaluation(gt3D, joints) 160 | hpe.subfolder += '/'+eval_prefix+'/' 161 | mean_error = hpe.getMeanError() 162 | max_error = hpe.getMaxError() 163 | print("Train samples: {}, test samples: {}".format(train_data.shape[0], len(gt3D))) 164 | print("Mean error: {}mm, max error: {}mm".format(mean_error, max_error)) 165 | print("MD score: {}".format(hpe.getMDscore(80))) 166 | 167 | print("{}".format([hpe.getJointMeanError(j) for j in range(joints[0].shape[0])])) 168 | print("{}".format([hpe.getJointMaxError(j) for j in range(joints[0].shape[0])])) 169 | 170 | # save results 171 | cPickle.dump(joints, open("./eval/{}/result_{}_{}.pkl".format(eval_prefix,os.path.split(__file__)[1],eval_prefix), "wb"), protocol=cPickle.HIGHEST_PROTOCOL) 172 | 173 | print "Testing baseline" 174 | 175 | ################################# 176 | # BASELINE 177 | # Load the evaluation 178 | data_baseline = di.loadBaseline('../data/NYU/test/test_predictions.mat', numpy.asarray(gt3D)) 179 | 180 | hpe_base = NYUHandposeEvaluation(gt3D, data_baseline) 181 | hpe_base.subfolder += '/'+eval_prefix+'/' 182 | print("Mean error: {}mm".format(hpe_base.getMeanError())) 183 | 184 | hpe.plotEvaluation(eval_prefix, methodName='Our regr',baseline=[('Tompson et al.',hpe_base)]) 185 | 186 | ind = 0 187 | for i in testSeqs[0].data: 188 | if ind % 20 != 0: 189 | ind += 1 190 | continue 191 | jt = joints[ind] 192 | jtI = di.joints3DToImg(jt) 193 | for joint in range(jt.shape[0]): 194 | t=transformPoint2D(jtI[joint], i.T) 195 | jtI[joint, 0] = t[0] 196 | jtI[joint, 1] = t[1] 197 | hpe.plotResult(i.dpt, i.gtcrop, jtI, "{}_{}".format(eval_prefix, ind)) 198 | ind+=1 199 | -------------------------------------------------------------------------------- /src/net/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/net/convlayer.py: -------------------------------------------------------------------------------- 1 | """Provides ConvLayer class for using in CNNs. 2 | 3 | ConvLayer provides interface for building convolutional layers in CNNs. 4 | ConvLayerParams is the parametrization of these ConvLayer layers. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import numpy 26 | import theano 27 | import theano.sandbox.neighbours 28 | import theano.tensor as T 29 | from theano.tensor.nnet import conv2d 30 | from net.layerparams import LayerParams 31 | from util.helpers import ReLU 32 | 33 | __author__ = "Paul Wohlhart , Markus Oberweger " 34 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 35 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 36 | __license__ = "GPL" 37 | __version__ = "1.0" 38 | __maintainer__ = "Markus Oberweger" 39 | __email__ = "oberweger@icg.tugraz.at" 40 | __status__ = "Development" 41 | 42 | 43 | class ConvLayerParams(LayerParams): 44 | 45 | def __init__(self, inputDim=None, nFilters=None, filterDim=None, activation=T.tanh, 46 | filter_shape=None, image_shape=None, outputDim=None, stride=(1, 1), border_mode='valid'): 47 | """ 48 | :type filter_shape: tuple or list of length 4 49 | :param filter_shape: (number of filters, num inputVar feature maps, filter height,filter width) 50 | 51 | :type image_shape: tuple or list of length 4 52 | :param image_shape: (batch size, num inputVar feature maps, image height, image width) 53 | 54 | :type stride: tuple or list of length 2 55 | :param stride: the downsampling (pooling) factor (#rows,#cols) 56 | """ 57 | 58 | super(ConvLayerParams, self).__init__(inputDim, outputDim) 59 | 60 | self._nFilters = nFilters 61 | self._filterDim = filterDim 62 | self._filter_shape = filter_shape 63 | self._image_shape = image_shape 64 | self._activation = activation 65 | self._stride = stride 66 | self._border_mode = border_mode 67 | self.update() 68 | 69 | @property 70 | def filter_shape(self): 71 | return self._filter_shape 72 | 73 | @property 74 | def image_shape(self): 75 | return self._image_shape 76 | 77 | @property 78 | def stride(self): 79 | return self._stride 80 | 81 | @stride.setter 82 | def stride(self, value): 83 | self._stride = value 84 | self.update() 85 | 86 | @property 87 | def border_mode(self): 88 | return self._border_mode 89 | 90 | @border_mode.setter 91 | def border_mode(self, value): 92 | self._border_mode = value 93 | self.update() 94 | 95 | @property 96 | def nFilters(self): 97 | return self._nFilters 98 | 99 | @nFilters.setter 100 | def nFilters(self, value): 101 | self._nFilters = value 102 | self.update() 103 | 104 | @property 105 | def filterDim(self): 106 | return self._filterDim 107 | 108 | @filterDim.setter 109 | def filterDim(self, value): 110 | self._filterDim = value 111 | self.update() 112 | 113 | @property 114 | def activation(self): 115 | return self._activation 116 | 117 | def update(self): 118 | """ 119 | calc image_shape, 120 | """ 121 | self._filter_shape = (self._nFilters, 122 | self._inputDim[1], 123 | self._filterDim[1], 124 | self._filterDim[0]) 125 | self._image_shape = self._inputDim 126 | 127 | if self._border_mode == 'valid': 128 | self._outputDim = (self._inputDim[0], # batch_size 129 | self._nFilters, # number of kernels 130 | (self._inputDim[2] - self._filterDim[0] + 1), # output H 131 | (self._inputDim[3] - self._filterDim[1] + 1)) # output W 132 | elif self._border_mode == 'full': 133 | self._outputDim = (self._inputDim[0], # batch_size 134 | self._nFilters, # number of kernels 135 | (self._inputDim[2] + self._filterDim[0] - 1), # output H 136 | (self._inputDim[3] + self._filterDim[1] - 1)) # output W 137 | elif self._border_mode == 'same': 138 | self._outputDim = (self._inputDim[0], # batch_size 139 | self._nFilters, # number of kernels 140 | self._inputDim[2], # output H 141 | self._inputDim[3]) # output W 142 | else: 143 | raise ValueError("Unknown border mode") 144 | 145 | # correct stride 146 | self._outputDim = list(self._outputDim) 147 | self._outputDim[2] = int(numpy.ceil(self._outputDim[2] / float(self._stride[0]))) 148 | self._outputDim[3] = int(numpy.ceil(self._outputDim[3] / float(self._stride[1]))) 149 | self._outputDim = tuple(self._outputDim) 150 | 151 | def getMemoryRequirement(self): 152 | """ 153 | Get memory requirements of weights 154 | :return: memory requirement 155 | """ 156 | return (numpy.prod(self.filter_shape) + self.filter_shape[0]) * 4 # sizeof(theano.config.floatX) 157 | 158 | def getOutputRange(self): 159 | """ 160 | Get output range of layer 161 | :return: output range as tuple 162 | """ 163 | if self._activation == T.tanh: 164 | return [-1, 1] 165 | elif self._activation == T.nnet.sigmoid: 166 | return [0, 1] 167 | elif self._activation == ReLU: 168 | return [0, numpy.inf] 169 | else: 170 | return [-numpy.inf, numpy.inf] 171 | 172 | 173 | class ConvLayer(object): 174 | """ 175 | Pool Layer of a convolutional network 176 | 177 | copy of LeNetConvPoolLayer from deeplearning.net tutorials 178 | """ 179 | 180 | def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): 181 | """ 182 | Allocate a LeNetConvPoolLayer with shared variable internal parameters. 183 | 184 | :type rng: numpy.random.RandomState 185 | :param rng: a random number generator used to initialize weights 186 | 187 | :type inputVar: theano.tensor.dtensor4 188 | :param inputVar: symbolic image tensor, of shape image_shape 189 | 190 | :type cfgParams: ConvPoolLayerParams 191 | """ 192 | 193 | assert isinstance(cfgParams, ConvLayerParams) 194 | 195 | floatX = theano.config.floatX # @UndefinedVariable 196 | 197 | filter_shape = cfgParams.filter_shape 198 | image_shape = cfgParams.image_shape 199 | filter_stride = cfgParams.stride 200 | activation = cfgParams.activation 201 | inputDim = cfgParams.inputDim 202 | border_mode = cfgParams.border_mode 203 | 204 | self.cfgParams = cfgParams 205 | self.layerNum = layerNum 206 | 207 | assert image_shape[1] == filter_shape[1] 208 | self.inputVar = inputVar 209 | 210 | # there are "num inputVar feature maps * filter height * filter width" 211 | # inputs to each hidden unit 212 | fan_in = numpy.prod(filter_shape[1:]) 213 | # each unit in the lower layer receives a gradient from: 214 | # "num output feature maps * filter height * filter width" / filter stride 215 | fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(filter_stride)) 216 | 217 | if not (copyLayer is None): 218 | self.W = copyLayer.W 219 | else: 220 | # initialize weights with random weights 221 | if activation == ReLU: 222 | W_bound = numpy.sqrt(2. / numpy.prod(filter_shape[1:])) 223 | wInitVals = numpy.asarray(rng.normal(loc=0.0, scale=W_bound, size=filter_shape), dtype=floatX) 224 | elif activation == theano.tensor.nnet.sigmoid: 225 | W_bound = 4. * numpy.sqrt(6. / (fan_in + fan_out)) 226 | wInitVals = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=floatX) 227 | else: 228 | W_bound = 1. / (fan_in + fan_out) 229 | wInitVals = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=floatX) 230 | 231 | # try pca to create an orthogonal set of filters to start with 232 | w_init_orthogonal = False # True ? 233 | if w_init_orthogonal: 234 | wInitVals = numpy.reshape(wInitVals, (filter_shape[0], numpy.prod(filter_shape[1:]))) 235 | svd = numpy.linalg.svd(wInitVals.T) 236 | U = svd[0] 237 | wInitVals = U.T[0:filter_shape[0]].T 238 | wInitVals = numpy.reshape(wInitVals.swapaxes(0, 1), filter_shape) 239 | 240 | self.W = theano.shared(wInitVals, borrow=True, name='convW{}'.format(layerNum)) 241 | 242 | # the bias is a 1D tensor -- one bias per output feature map 243 | if not (copyLayer is None): 244 | self.b = copyLayer.b 245 | else: 246 | if activation == ReLU: 247 | b_values = numpy.zeros((filter_shape[0],), dtype=floatX) # TODO ones 248 | else: 249 | b_values = numpy.zeros((filter_shape[0],), dtype=floatX) 250 | self.b = theano.shared(value=b_values, borrow=True, name='convB{}'.format(layerNum)) 251 | 252 | if border_mode == 'same': 253 | # convolve inputVar feature maps with filters 254 | conv_out = conv2d(input=inputVar, 255 | filters=self.W, 256 | filter_shape=filter_shape, 257 | input_shape=image_shape, 258 | subsample=filter_stride, 259 | border_mode='full') 260 | 261 | # perform full convolution and crop output of input size 262 | offset_2 = filter_shape[2]//2 263 | offset_3 = filter_shape[3]//2 264 | conv_out = conv_out[:, :, offset_2:offset_2+image_shape[2], offset_3:offset_3+image_shape[3]] 265 | else: 266 | # convolve inputVar feature maps with filters 267 | conv_out = conv2d(input=inputVar, 268 | filters=self.W, 269 | filter_shape=filter_shape, 270 | input_shape=image_shape, 271 | subsample=filter_stride, 272 | border_mode=border_mode) 273 | 274 | # add the bias term. Since the bias is a vector (1D array), we first reshape it to a tensor of shape 275 | # (1,n_filters,1,1). Each bias will thus be broadcasted across mini-batches and feature map width & height 276 | lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') 277 | self.output = (lin_output if activation is None 278 | else activation(lin_output)) 279 | 280 | self.output.name = 'output_layer_{}'.format(self.layerNum) 281 | 282 | # store parameters of this layer 283 | self.params = [self.W, self.b] 284 | self.weights = [self.W] 285 | 286 | def __str__(self): 287 | """ 288 | Print configuration of layer 289 | :return: configuration string 290 | """ 291 | return "inputDim {}, outputDim {}, filterDim {}, nFilters {}, activation {}, stride {}, border_mode {}".format(self.cfgParams.inputDim, self.cfgParams.outputDim, self.cfgParams.filterDim, 292 | self.cfgParams.nFilters, self.cfgParams.activation_str, 293 | self.cfgParams.stride, self.cfgParams.border_mode) 294 | -------------------------------------------------------------------------------- /src/net/convpoollayer.py: -------------------------------------------------------------------------------- 1 | """Provides ConvLayer class for using in CNNs. 2 | 3 | ConvLayer provides interface for building convolutional layers in CNNs. 4 | ConvLayerParams is the parametrization of these ConvLayer layers. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import numpy 26 | import theano 27 | import theano.sandbox.neighbours 28 | import theano.tensor as T 29 | from theano.tensor.signal.pool import pool_2d 30 | from theano.tensor.nnet import conv2d 31 | from net.layerparams import LayerParams 32 | from util.helpers import ReLU 33 | 34 | __author__ = "Paul Wohlhart , Markus Oberweger " 35 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 36 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 37 | __license__ = "GPL" 38 | __version__ = "1.0" 39 | __maintainer__ = "Markus Oberweger" 40 | __email__ = "oberweger@icg.tugraz.at" 41 | __status__ = "Development" 42 | 43 | 44 | class ConvPoolLayerParams(LayerParams): 45 | 46 | def __init__(self, inputDim=None, nFilters=None, filterDim=None, activation=T.tanh, poolsize=(1, 1), poolType=0, 47 | filter_shape=None, image_shape=None, outputDim=None, stride=(1, 1), border_mode='valid'): 48 | """ 49 | :type filter_shape: tuple or list of length 4 50 | :param filter_shape: (number of filters, num inputVar feature maps, filter height,filter width) 51 | 52 | :type image_shape: tuple or list of length 4 53 | :param image_shape: (batch size, num inputVar feature maps, image height, image width) 54 | 55 | :type poolsize: tuple or list of length 2 56 | :param poolsize: the downsampling (pooling) factor (#rows,#cols) 57 | """ 58 | 59 | super(ConvPoolLayerParams, self).__init__(inputDim, outputDim) 60 | 61 | self._nFilters = nFilters 62 | self._filterDim = filterDim 63 | self._poolsize = poolsize 64 | self._poolType = poolType 65 | self._filter_shape = filter_shape 66 | self._image_shape = image_shape 67 | self._activation = activation 68 | self._stride = stride 69 | self._border_mode = border_mode 70 | self.update() 71 | 72 | @property 73 | def filter_shape(self): 74 | return self._filter_shape 75 | 76 | @property 77 | def image_shape(self): 78 | return self._image_shape 79 | 80 | @property 81 | def stride(self): 82 | return self._stride 83 | 84 | @stride.setter 85 | def stride(self, value): 86 | self._stride = value 87 | self.update() 88 | 89 | @property 90 | def border_mode(self): 91 | return self._border_mode 92 | 93 | @border_mode.setter 94 | def border_mode(self, value): 95 | self._border_mode = value 96 | self.update() 97 | 98 | @property 99 | def nFilters(self): 100 | return self._nFilters 101 | 102 | @nFilters.setter 103 | def nFilters(self, value): 104 | self._nFilters = value 105 | self.update() 106 | 107 | @property 108 | def filterDim(self): 109 | return self._filterDim 110 | 111 | @filterDim.setter 112 | def filterDim(self, value): 113 | self._filterDim = value 114 | self.update() 115 | 116 | @property 117 | def poolsize(self): 118 | return self._poolsize 119 | 120 | @poolsize.setter 121 | def poolsize(self, value): 122 | self._poolsize = value 123 | self.update() 124 | 125 | @property 126 | def poolType(self): 127 | return self._poolType 128 | 129 | @property 130 | def activation(self): 131 | return self._activation 132 | 133 | def update(self): 134 | """ 135 | calc image_shape, 136 | """ 137 | self._filter_shape = (self._nFilters, 138 | self._inputDim[1], 139 | self._filterDim[1], 140 | self._filterDim[0]) 141 | self._image_shape = self._inputDim 142 | 143 | if self._border_mode == 'valid': 144 | self._outputDim = (self._inputDim[0], # batch_size 145 | self._nFilters, # number of kernels 146 | (self._inputDim[2] - self._filterDim[0] + 1), # output H 147 | (self._inputDim[3] - self._filterDim[1] + 1)) # output W 148 | elif self._border_mode == 'full': 149 | self._outputDim = (self._inputDim[0], # batch_size 150 | self._nFilters, # number of kernels 151 | (self._inputDim[2] + self._filterDim[0] - 1), # output H 152 | (self._inputDim[3] + self._filterDim[1] - 1)) # output W 153 | elif self._border_mode == 'same': 154 | self._outputDim = (self._inputDim[0], # batch_size 155 | self._nFilters, # number of kernels 156 | self._inputDim[2], # output H 157 | self._inputDim[3]) # output W 158 | else: 159 | raise ValueError("Unknown border mode") 160 | 161 | # correct stride 162 | self._outputDim = list(self._outputDim) 163 | self._outputDim[2] = int(numpy.ceil(self._outputDim[2] / float(self._stride[0]))) // self._poolsize[0] 164 | self._outputDim[3] = int(numpy.ceil(self._outputDim[3] / float(self._stride[1]))) // self._poolsize[1] 165 | self._outputDim = tuple(self._outputDim) 166 | 167 | # no pooling required 168 | if(self._poolsize[0] == 1) and (self._poolsize[1] == 1): 169 | self._poolType = -1 170 | 171 | def getMemoryRequirement(self): 172 | """ 173 | Get memory requirements of weights 174 | :return: memory requirement 175 | """ 176 | return (numpy.prod(self.filter_shape) + self.filter_shape[0]) * 4 # sizeof(theano.config.floatX) 177 | 178 | def getOutputRange(self): 179 | """ 180 | Get output range of layer 181 | :return: output range as tuple 182 | """ 183 | if self._activation == T.tanh: 184 | return [-1, 1] 185 | elif self._activation == T.nnet.sigmoid: 186 | return [0, 1] 187 | elif self._activation == ReLU: 188 | return [0, numpy.inf] 189 | else: 190 | return [-numpy.inf, numpy.inf] 191 | 192 | 193 | class ConvPoolLayer(object): 194 | """ 195 | Pool Layer of a convolutional network 196 | 197 | copy of LeNetConvPoolLayer from deeplearning.net tutorials 198 | """ 199 | 200 | def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): 201 | """ 202 | Allocate a LeNetConvPoolLayer with shared variable internal parameters. 203 | 204 | :type rng: numpy.random.RandomState 205 | :param rng: a random number generator used to initialize weights 206 | 207 | :type inputVar: theano.tensor.dtensor4 208 | :param inputVar: symbolic image tensor, of shape image_shape 209 | 210 | :type cfgParams: ConvPoolLayerParams 211 | """ 212 | 213 | assert isinstance(cfgParams, ConvPoolLayerParams) 214 | 215 | floatX = theano.config.floatX # @UndefinedVariable 216 | 217 | filter_shape = cfgParams.filter_shape 218 | image_shape = cfgParams.image_shape 219 | filter_stride = cfgParams.stride 220 | poolsize = cfgParams.poolsize 221 | poolType = cfgParams.poolType 222 | activation = cfgParams.activation 223 | inputDim = cfgParams.inputDim 224 | border_mode = cfgParams.border_mode 225 | 226 | self.cfgParams = cfgParams 227 | self.layerNum = layerNum 228 | 229 | assert image_shape[1] == filter_shape[1] 230 | self.inputVar = inputVar 231 | 232 | # there are "num inputVar feature maps * filter height * filter width" 233 | # inputs to each hidden unit 234 | fan_in = numpy.prod(filter_shape[1:]) 235 | # each unit in the lower layer receives a gradient from: 236 | # "num output feature maps * filter height * filter width" / pooling size 237 | fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) / numpy.prod(filter_stride)) 238 | 239 | if not (copyLayer is None): 240 | self.W = copyLayer.W 241 | else: 242 | # initialize weights with random weights 243 | if activation == ReLU: 244 | W_bound = numpy.sqrt(2. / numpy.prod(filter_shape[1:])) 245 | wInitVals = numpy.asarray(rng.normal(loc=0.0, scale=W_bound, size=filter_shape), dtype=floatX) 246 | elif activation == theano.tensor.nnet.sigmoid: 247 | W_bound = 4. * numpy.sqrt(6. / (fan_in + fan_out)) 248 | wInitVals = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=floatX) 249 | else: 250 | W_bound = 1. / (fan_in + fan_out) 251 | wInitVals = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=floatX) 252 | 253 | # try pca to create an orthogonal set of filters to start with 254 | w_init_orthogonal = False # True ? 255 | if w_init_orthogonal: 256 | wInitVals = numpy.reshape(wInitVals, (filter_shape[0], numpy.prod(filter_shape[1:]))) 257 | svd = numpy.linalg.svd(wInitVals.T) 258 | U = svd[0] 259 | wInitVals = U.T[0:filter_shape[0]].T 260 | wInitVals = numpy.reshape(wInitVals.swapaxes(0, 1), filter_shape) 261 | 262 | self.W = theano.shared(wInitVals, borrow=True, name='convW{}'.format(layerNum)) 263 | 264 | # the bias is a 1D tensor -- one bias per output feature map 265 | if not (copyLayer is None): 266 | self.b = copyLayer.b 267 | else: 268 | if activation == ReLU: 269 | b_values = numpy.zeros((filter_shape[0],), dtype=floatX) # TODO ones 270 | else: 271 | b_values = numpy.zeros((filter_shape[0],), dtype=floatX) 272 | self.b = theano.shared(value=b_values, borrow=True, name='convB{}'.format(layerNum)) 273 | 274 | if border_mode == 'same': 275 | # convolve inputVar feature maps with filters 276 | conv_out = conv2d(input=inputVar, 277 | filters=self.W, 278 | filter_shape=filter_shape, 279 | input_shape=image_shape, 280 | subsample=filter_stride, 281 | border_mode='full') 282 | 283 | # perform full convolution and crop output of input size 284 | offset_2 = filter_shape[2]//2 285 | offset_3 = filter_shape[3]//2 286 | conv_out = conv_out[:, :, offset_2:offset_2+image_shape[2], offset_3:offset_3+image_shape[3]] 287 | else: 288 | # convolve inputVar feature maps with filters 289 | # TODO THIS SHOULD DO THEANO 290 | # conv_out = theano.sandbox.cuda.dnn.dnn_conv(inputVar, self.W, border_mode=border_mode, subsample=filter_stride, conv_mode='conv', direction_hint=None, workmem=None) 291 | conv_out = conv2d(input=inputVar, 292 | filters=self.W, 293 | filter_shape=filter_shape, 294 | input_shape=image_shape, 295 | subsample=filter_stride, 296 | border_mode=border_mode) 297 | 298 | # downsample each feature map individually, using maxpooling 299 | if poolType == 0: 300 | # using maxpooling 301 | pooled_out = pool_2d(input=conv_out, ds=poolsize, ignore_border=True) 302 | elif poolType == 1: 303 | # using average pooling 304 | pooled_out = theano.sandbox.neighbours.images2neibs(ten4=conv_out, neib_shape=poolsize, mode='ignore_borders').mean(axis=-1) 305 | new_shape = T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]//poolsize[0]]), 306 | T.as_tensor([conv_out.shape[3]//poolsize[1]])), 'int64') 307 | pooled_out = T.reshape(pooled_out, new_shape, ndim=4) 308 | elif poolType == -1: 309 | # no pooling at all 310 | pooled_out = conv_out 311 | 312 | # add the bias term. Since the bias is a vector (1D array), we first reshape it to a tensor of shape 313 | # (1,n_filters,1,1). Each bias will thus be broadcasted across mini-batches and feature map width & height 314 | lin_output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') 315 | self.output = (lin_output if activation is None 316 | else activation(lin_output)) 317 | 318 | self.output.name = 'output_layer_{}'.format(self.layerNum) 319 | 320 | # store parameters of this layer 321 | self.params = [self.W, self.b] 322 | self.weights = [self.W] 323 | 324 | def __str__(self): 325 | """ 326 | Print configuration of layer 327 | :return: configuration string 328 | """ 329 | return "inputDim {}, outputDim {}, filterDim {}, nFilters {}, activation {}, stride {}, border_mode {}, pool_type {}, pool_size {}".format(self.cfgParams.inputDim, self.cfgParams.outputDim, self.cfgParams.filterDim, 330 | self.cfgParams.nFilters, self.cfgParams.activation_str, self.cfgParams.stride, self.cfgParams.border_mode, self.cfgParams.poolType, self.cfgParams.poolsize) 331 | -------------------------------------------------------------------------------- /src/net/dropoutlayer.py: -------------------------------------------------------------------------------- 1 | """Provides DropoutLayer class for using in CNNs. 2 | 3 | DropoutLayer provides interface for building dropout layers in CNNs. 4 | DropoutLayerParams is the parametrization of these DropoutLayer layers. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import numpy 26 | import theano 27 | import theano.tensor as T 28 | from net.layerparams import LayerParams 29 | 30 | __author__ = "Markus Oberweger " 31 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 32 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 33 | __license__ = "GPL" 34 | __version__ = "1.0" 35 | __maintainer__ = "Markus Oberweger" 36 | __email__ = "oberweger@icg.tugraz.at" 37 | __status__ = "Development" 38 | 39 | 40 | class DropoutLayerParams(LayerParams): 41 | def __init__(self, inputDim=None, outputDim=None, p=0.3): 42 | """ 43 | :type inputDim: int 44 | :param inputDim: dimensionality of input 45 | 46 | :type outputDim: int 47 | :param outputDim: number of hidden units 48 | 49 | :type p: float 50 | :param p: Probability for dropping a unit of the layer 51 | """ 52 | 53 | super(DropoutLayerParams, self).__init__(inputDim=inputDim, outputDim=outputDim) 54 | 55 | self._p = p 56 | 57 | @property 58 | def p(self): 59 | return self._p 60 | 61 | @p.setter 62 | def p(self, value): 63 | self._p = value 64 | 65 | 66 | class DropoutLayer(object): 67 | def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): 68 | """ 69 | Dropout layer of a MLP: units are fully-connected and connections are 70 | dropped randomly during training. 71 | 72 | :type rng: numpy.random.RandomState 73 | :param rng: a random number generator used to initialize mask 74 | 75 | :type inputVar: theano.tensor.fmatrix 76 | :param inputVar: a symbolic tensor of shape (n_examples, n_in) 77 | 78 | :type cfgParams: DropoutLayerParams 79 | """ 80 | 81 | self.inputVar = inputVar 82 | self.cfgParams = cfgParams 83 | self.layerNum = layerNum 84 | 85 | # see https://github.com/uoguelph-mlrg/theano_alexnet/blob/master/alex_net.py 86 | self.prob_drop = cfgParams.p 87 | self.prob_keep = 1.0 - cfgParams.p 88 | self.flag_on = theano.shared(numpy.cast[theano.config.floatX](1.0), name='flag_on') 89 | 90 | # mask_rng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) 91 | # faster rng on GPU 92 | from theano.sandbox.rng_mrg import MRG_RandomStreams 93 | mask_rng = MRG_RandomStreams(rng.randint(999999)) 94 | self.mask = mask_rng.binomial(n=1, p=self.prob_keep, size=self.inputVar.shape) 95 | self.output = self.flag_on * T.cast(self.mask, theano.config.floatX) * self.inputVar + (1.0 - self.flag_on) * self.prob_keep * self.inputVar 96 | self.output.name = 'output_layer_{}'.format(self.layerNum) 97 | 98 | # no params and weights 99 | self.params = [] 100 | self.weights = [] 101 | 102 | def enableDropout(self): 103 | """ 104 | Enable dropout 105 | :return: None 106 | """ 107 | self.flag_on.set_value(1.0) 108 | 109 | def disableDropout(self): 110 | """ 111 | Disable dropout 112 | :return: None 113 | """ 114 | self.flag_on.set_value(0.0) 115 | 116 | def dropoutEnabled(self): 117 | """ 118 | Check if dropout is enabled 119 | :return: True if enabled 120 | """ 121 | return self.flag_on.get_value() == 1.0 122 | 123 | def __str__(self): 124 | """ 125 | Print configuration of layer 126 | :return: configuration string 127 | """ 128 | return "inputDim {}, outputDim {}, p {}".format(self.cfgParams.inputDim, self.cfgParams.outputDim, self.cfgParams.p) -------------------------------------------------------------------------------- /src/net/hiddenlayer.py: -------------------------------------------------------------------------------- 1 | """Provides HiddenLayer class for using in CNNs. 2 | 3 | HiddenLayer provides interface for building hidden (fully connected) layers in CNNs. 4 | HiddenLayerParams is the parametrization of these HiddenLayer layers. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import inspect 26 | import numpy 27 | import theano 28 | import theano.tensor as T 29 | from net.layerparams import LayerParams 30 | from util.helpers import ReLU 31 | 32 | __author__ = "Paul Wohlhart , Markus Oberweger " 33 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 34 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 35 | __license__ = "GPL" 36 | __version__ = "1.0" 37 | __maintainer__ = "Markus Oberweger" 38 | __email__ = "oberweger@icg.tugraz.at" 39 | __status__ = "Development" 40 | 41 | 42 | class HiddenLayerParams(LayerParams): 43 | def __init__(self, inputDim=None, outputDim=None, activation=None): 44 | """ 45 | :type inputDim: tuple of [int] 46 | :param inputDim: dimensionality of input 47 | 48 | :type outputDim: tuple of [int] 49 | :param outputDim: number of hidden units 50 | 51 | :type activation: theano.Op or function 52 | :param activation: Non linearity to be applied in the hidden layer 53 | """ 54 | 55 | super(HiddenLayerParams, self).__init__(inputDim, outputDim) 56 | 57 | self._activation = activation 58 | 59 | @property 60 | def activation(self): 61 | return self._activation 62 | 63 | @activation.setter 64 | def activation(self, value): 65 | self._activation = value 66 | 67 | def getMemoryRequirement(self): 68 | """ 69 | Get memory requirements of weights 70 | :return: memory requirement 71 | """ 72 | return ((self.inputDim[1] * self.outputDim[1]) + self.outputDim[1]) * 4 # sizeof(theano.config.floatX) 73 | 74 | def getOutputRange(self): 75 | """ 76 | Get output range of layer 77 | :return: output range as tuple 78 | """ 79 | if self._activation == T.tanh: 80 | return [-1, 1] 81 | elif self._activation == T.nnet.sigmoid: 82 | return [0, 1] 83 | elif self._activation == ReLU: 84 | return [0, numpy.inf] 85 | else: 86 | return [-numpy.inf, numpy.inf] 87 | 88 | 89 | class HiddenLayer(object): 90 | def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): 91 | """ 92 | Typical hidden layer of a MLP: units are fully-connected. 93 | Weight matrix W is of shape (n_in,n_out) 94 | and the bias vector b is of shape (n_out,). 95 | 96 | Hidden unit activation is given by: activation(dot(inputVar,W) + b) 97 | 98 | :type rng: numpy.random.RandomState 99 | :param rng: a random number generator used to initialize weights 100 | 101 | :type inputVar: theano.tensor.dmatrix 102 | :param inputVar: a symbolic tensor of shape (n_examples, n_in) 103 | 104 | :type cfgParams: HiddenLayerParams 105 | """ 106 | 107 | assert isinstance(cfgParams, HiddenLayerParams) 108 | 109 | self.inputVar = inputVar 110 | self.cfgParams = cfgParams 111 | self.layerNum = layerNum 112 | 113 | n_in = cfgParams.inputDim[1] 114 | n_out = cfgParams.outputDim[1] 115 | activation = cfgParams.activation 116 | 117 | # `W` is initialized with `W_values` which is uniformely sampled from sqrt(-6./(n_in+n_hidden)) and 118 | # sqrt(6./(n_in+n_hidden)) for tanh activation function the output of uniform if converted using asarray 119 | # to dtype theano.config.floatX so that the code is runable on GPU. 120 | # Note : optimal initialization of weights is dependent on the activation function used (among other things). 121 | # For example, results presented in [Xavier10] suggest that you should use 4 times larger initial weights for 122 | # sigmoid compared to tanh. We have no info for other function, so we use the same as tanh. 123 | floatX = theano.config.floatX # @UndefinedVariable 124 | 125 | if copyLayer is None: 126 | if activation == ReLU: 127 | W_values = numpy.asarray(rng.normal(loc=0.0, scale=0.01, size=(n_in, n_out)), dtype=floatX) 128 | elif activation == theano.tensor.nnet.sigmoid: 129 | W_values = 4. * numpy.asarray(rng.uniform(low=-numpy.sqrt(6. / (n_in + n_out)), 130 | high=numpy.sqrt(6. / (n_in + n_out)), 131 | size=(n_in, n_out)), dtype=floatX) 132 | else: # activation == T.tanh 133 | W_values = numpy.asarray(rng.uniform(low=-numpy.sqrt(6. / (n_in + n_out)), 134 | high=numpy.sqrt(6. / (n_in + n_out)), 135 | size=(n_in, n_out)), dtype=floatX) 136 | 137 | self.W = theano.shared(value=W_values, name='W{}'.format(layerNum), borrow=True) 138 | 139 | if activation == ReLU: 140 | b_values = numpy.zeros((n_out,), dtype=floatX) 141 | else: 142 | b_values = numpy.zeros((n_out,), dtype=floatX) 143 | self.b = theano.shared(value=b_values, name='b{}'.format(layerNum), borrow=True) 144 | 145 | else: 146 | self.W = copyLayer.W 147 | self.b = copyLayer.b 148 | 149 | lin_output = T.dot(inputVar, self.W) + self.b 150 | if activation is None: 151 | self.output = lin_output 152 | self.output.name = 'output_layer_{}'.format(self.layerNum) 153 | self.params = [self.W, self.b] 154 | else: 155 | if inspect.isfunction(activation) and len(inspect.getargspec(activation).args) == 2: 156 | c_values = numpy.ones((n_out,), dtype=floatX)*0.5 157 | self.c = theano.shared(value=c_values, name='c{}'.format(layerNum), borrow=True) 158 | self.output = activation(lin_output, self.c) 159 | self.output.name = 'output_layer_{}'.format(self.layerNum) 160 | self.params = [self.W, self.b, self.c] 161 | else: 162 | self.output = activation(lin_output) 163 | self.output.name = 'output_layer_{}'.format(self.layerNum) 164 | self.params = [self.W, self.b] 165 | 166 | # parameters of the model 167 | self.weights = [self.W] 168 | 169 | def __str__(self): 170 | """ 171 | Print configuration of layer 172 | :return: configuration string 173 | """ 174 | return "inputDim {}, outputDim {}, activiation {}".format(self.cfgParams.inputDim, self.cfgParams.outputDim, 175 | self.cfgParams.activation_str) 176 | -------------------------------------------------------------------------------- /src/net/layerparams.py: -------------------------------------------------------------------------------- 1 | """Provides LayerParams class used for parametrizing other layers. 2 | 3 | Copyright 2015 Markus Oberweger, ICG, 4 | Graz University of Technology 5 | 6 | This file is part of DeepPrior. 7 | 8 | DeepPrior is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | DeepPrior is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with DeepPrior. If not, see . 20 | """ 21 | 22 | import inspect 23 | 24 | __author__ = "Paul Wohlhart " 25 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 26 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 27 | __license__ = "GPL" 28 | __version__ = "1.0" 29 | __maintainer__ = "Markus Oberweger" 30 | __email__ = "oberweger@icg.tugraz.at" 31 | __status__ = "Development" 32 | 33 | 34 | class LayerParams(object): 35 | """ 36 | Parametrization of different layers for CNNs 37 | """ 38 | 39 | def __init__(self, inputDim, outputDim): 40 | """ 41 | Constructor 42 | """ 43 | self._inputDim = inputDim 44 | self._outputDim = outputDim 45 | 46 | @property 47 | def outputDim(self): 48 | return self._outputDim 49 | 50 | @outputDim.setter 51 | def outputDim(self, value): 52 | self._outputDim = value 53 | self.update() 54 | 55 | @property 56 | def inputDim(self): 57 | return self._inputDim 58 | 59 | @inputDim.setter 60 | def inputDim(self, value): 61 | self._inputDim = value 62 | self.update() 63 | 64 | def update(self): 65 | """ 66 | Default. Override in derived 67 | """ 68 | pass 69 | 70 | @property 71 | def activation_str(self): 72 | """ 73 | Get printable string from activation function. 74 | :return: string 75 | """ 76 | if hasattr(self, 'activation'): 77 | if self.activation is None: 78 | return str(None) 79 | elif inspect.isclass(self.activation): 80 | return self.activation.__class__.__name__ 81 | elif inspect.isfunction(self.activation): 82 | return self.activation.__name__ 83 | else: 84 | return str(self.activation) 85 | else: 86 | return '' 87 | -------------------------------------------------------------------------------- /src/net/netbase.py: -------------------------------------------------------------------------------- 1 | """Provides NetBase class for generating networks from configurations. 2 | 3 | NetBase provides interface for building CNNs. 4 | It should be inherited by all network classes in order to provide 5 | basic functionality, ie computing outputs, creating computational 6 | graph, managing dropout, etc. 7 | NetBaseParams is the parametrization of these NetBase networks. 8 | 9 | Copyright 2015 Markus Oberweger, ICG, 10 | Graz University of Technology 11 | 12 | This file is part of DeepPrior. 13 | 14 | DeepPrior is free software: you can redistribute it and/or modify 15 | it under the terms of the GNU General Public License as published by 16 | the Free Software Foundation, either version 3 of the License, or 17 | (at your option) any later version. 18 | 19 | DeepPrior is distributed in the hope that it will be useful, 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | GNU General Public License for more details. 23 | 24 | You should have received a copy of the GNU General Public License 25 | along with DeepPrior. If not, see . 26 | """ 27 | 28 | import difflib 29 | import gzip 30 | import time 31 | import numpy 32 | import cPickle 33 | import re 34 | import theano 35 | import theano.tensor as T 36 | from net.convpoollayer import ConvPoolLayer, ConvPoolLayerParams 37 | from net.convlayer import ConvLayer, ConvLayerParams 38 | from net.hiddenlayer import HiddenLayer, HiddenLayerParams 39 | from net.poollayer import PoolLayer, PoolLayerParams 40 | from net.dropoutlayer import DropoutLayer, DropoutLayerParams 41 | 42 | __author__ = "Markus Oberweger " 43 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 44 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 45 | __license__ = "GPL" 46 | __version__ = "1.0" 47 | __maintainer__ = "Markus Oberweger" 48 | __email__ = "oberweger@icg.tugraz.at" 49 | __status__ = "Development" 50 | 51 | 52 | class NetBaseParams(object): 53 | def __init__(self): 54 | """ 55 | Init the parametrization 56 | """ 57 | 58 | self.numInputs = 1 59 | self.numOutputs = 1 60 | self.layers = [] 61 | self.inputDim = None 62 | self.outputDim = None 63 | 64 | def getMemoryRequirement(self): 65 | """ 66 | Get memory requirements of weights 67 | :return: memory requirement 68 | """ 69 | mem = 0 70 | for l in self.layers: 71 | mem += l.getMemoryRequirement() 72 | return mem 73 | 74 | 75 | class NetBase(object): 76 | def __init__(self, rng, inputVar, cfgParams, twin=None): 77 | """ 78 | Initialize object by constructing the layers 79 | :param rng: random number generator 80 | :param inputVar: input variable 81 | :param cfgParams: parameters 82 | :param twin: determine to copy layer @deprecated 83 | :return: None 84 | """ 85 | 86 | self._params_filter = [] 87 | self._weights_filter = [] 88 | 89 | self.inputVar = inputVar 90 | self.cfgParams = cfgParams 91 | self.rng = rng 92 | 93 | # create network 94 | self.layers = [] 95 | i = 0 96 | for layerParam in cfgParams.layers: 97 | # first input is inputVar, otherwise input is output of last one 98 | if i == 0: 99 | inp = inputVar 100 | else: 101 | # flatten output from conv to hidden layer and reshape from hidden to conv layer 102 | if (len(self.layers[-1].cfgParams.outputDim) == 4) and (len(layerParam.inputDim) == 2): 103 | inp = self.layers[-1].output.flatten(2) 104 | inp.name = "input_layer_{}".format(i) # name this node as it is different from previous output 105 | elif (len(layerParam.inputDim) == 4) and (len(self.layers[-1].cfgParams.outputDim) == 2): 106 | inp = T.reshape(self.layers[-1].output, layerParam.inputDim, ndim=4) 107 | inp.name = "input_layer_{}".format(i) # name this node as it is different from previous output 108 | else: 109 | inp = self.layers[-1].output 110 | 111 | id = layerParam.__class__.__name__[:-6] 112 | constructor = globals()[id] 113 | self.layers.append(constructor(rng, 114 | inputVar=inp, 115 | cfgParams=layerParam, 116 | copyLayer=None if (twin is None) else twin.layers[i], 117 | layerNum=i)) 118 | 119 | i += 1 120 | 121 | # assemble externally visible parameters 122 | self.output = self.layers[-1].output 123 | 124 | # TODO test 125 | # Ngyuen Widrow initialization 126 | # for l in range(len(self.layers)): 127 | # if isinstance(self.layers[l], HiddenLayer) or isinstance(self.layers[l], HiddenLayerInv): 128 | # if l > 0: 129 | # self.resetWeightsNW(rng, self.layers[l-1].cfgParams.getOutputRange(), self.layers[l], self.layers[l].cfgParams.getOutputRange()) 130 | # else: 131 | # self.resetWeightsNW(rng, [-1, 1], self.layers[l], self.layers[l].cfgParams.getOutputRange()) 132 | 133 | def __str__(self): 134 | """ 135 | prints the parameters of the layers of the network 136 | :return: configuration string 137 | """ 138 | 139 | cfg = "Network configuration:\n" 140 | i = 0 141 | for l in self.layers: 142 | cfg += "Layer {}: {} with {} \n".format(i, l.__class__.__name__, l) 143 | i += 1 144 | 145 | return cfg 146 | 147 | @property 148 | def params(self): 149 | """ 150 | Get a list of the learnable theano parameters for this network. 151 | :return: list of theano variables 152 | """ 153 | 154 | # remove filtered params 155 | if not hasattr(self, '_params_filter'): 156 | self._params_filter = [] 157 | prms = [p for l in self.layers for p in l.params if p.name not in self._params_filter] 158 | 159 | # only unique variables, remove shared weights from list 160 | return dict((obj.auto_name, obj) for obj in prms).values() 161 | 162 | @property 163 | def params_filter(self): 164 | return self._params_filter 165 | 166 | @params_filter.setter 167 | def params_filter(self, bl): 168 | names = [p.name for l in self.layers for p in l.params] 169 | for b in bl: 170 | if b not in names: 171 | raise UserWarning("Param {} not in model!".format(b)) 172 | self._params_filter = bl 173 | 174 | @property 175 | def weights(self): 176 | """ 177 | Get a list of the weights for this network. 178 | :return: list of theano variables 179 | """ 180 | 181 | # remove filtered weights 182 | if not hasattr(self, '_weights_filter'): 183 | self._weights_filter = [] 184 | prms = [p for l in self.layers for p in l.weights if p.name not in self._weights_filter] 185 | 186 | # only unique variables, remove shared weights from list 187 | return dict((obj.auto_name, obj) for obj in prms).values() 188 | 189 | @property 190 | def weights_filter(self): 191 | return self._weights_filter 192 | 193 | @weights_filter.setter 194 | def weights_filter(self, bl): 195 | names = [p.name for l in self.layers for p in l.weights] 196 | for b in bl: 197 | if b not in names: 198 | raise UserWarning("Weight {} not in model!".format(b)) 199 | self._weights_filter = bl 200 | 201 | def computeOutput(self, inputs, timeit=False): 202 | """ 203 | compute the output of the network for given input 204 | :param inputs: input data 205 | :param timeit: print the timing information 206 | :return: output of the network 207 | """ 208 | 209 | # Convert input data 210 | if not isinstance(inputs, list): 211 | inputs = [inputs] 212 | 213 | # All data must be same 214 | assert all(i.shape[0] == inputs[0].shape[0] for i in inputs[1:]) 215 | 216 | if self.dropoutEnabled(): 217 | print("WARNING: dropout is enabled in at least one layer for testing, DISABLING") 218 | self.disableDropout() 219 | 220 | floatX = theano.config.floatX # @UndefinedVariable 221 | batch_size = self.cfgParams.batch_size 222 | nSamp = inputs[0].shape[0] 223 | 224 | padSize = int(batch_size * numpy.ceil(nSamp / float(batch_size))) 225 | 226 | out = [] 227 | if isinstance(self.output, list): 228 | for i in range(len(self.output)): 229 | outSize = list(self.cfgParams.outputDim[i]) 230 | outSize[0] = padSize 231 | out.append(numpy.zeros(tuple(outSize), dtype=floatX)) 232 | else: 233 | outSize = list(self.cfgParams.outputDim) 234 | outSize[0] = padSize 235 | out.append(numpy.zeros(tuple(outSize), dtype=floatX)) 236 | 237 | index = T.lscalar('index') 238 | 239 | if not hasattr(self, 'compute_output'): 240 | self.input_data = [] 241 | self.input_givens = dict() 242 | input_pad = [] 243 | if inputs[0].shape[0] < batch_size: 244 | for k in range(len(inputs)): 245 | shape = list(inputs[k].shape) 246 | shape[0] = batch_size 247 | input_pad.append(numpy.zeros(tuple(shape), dtype=floatX)) 248 | input_pad[k][0:inputs[k].shape[0]] = inputs[k][0:inputs[k].shape[0]] 249 | input_pad[k][inputs[k].shape[0]:] = inputs[k][-1] 250 | else: 251 | for k in range(len(inputs)): 252 | input_pad.append(inputs[k]) 253 | for i in range(len(inputs)): 254 | if len(inputs) == 1 and not isinstance(self.inputVar, list): 255 | self.input_data.append(theano.shared(input_pad[i][0:batch_size], self.inputVar.name, borrow=True)) 256 | self.input_givens[self.inputVar] = self.input_data[i][index * batch_size:(index + 1) * batch_size] 257 | else: 258 | assert isinstance(self.inputVar, list) 259 | self.input_data.append(theano.shared(input_pad[i][0:batch_size], self.inputVar[i].name, borrow=True)) 260 | self.input_givens[self.inputVar[i]] = self.input_data[i][index * batch_size:(index + 1) * batch_size] 261 | print("compiling compute_output() ...") 262 | self.compute_output = theano.function(inputs=[index], outputs=self.output, givens=self.input_givens, 263 | mode='FAST_RUN', on_unused_input='warn') 264 | print("done") 265 | 266 | # iterate to save memory 267 | n_test_batches = padSize / batch_size 268 | start = time.time() 269 | for i in range(n_test_batches): 270 | # pad last batch to batch size 271 | if i == n_test_batches-1: 272 | input_pad = [] 273 | for k in range(len(inputs)): 274 | shape = list(inputs[k].shape) 275 | shape[0] = batch_size 276 | input_pad.append(numpy.zeros(tuple(shape), dtype=floatX)) 277 | input_pad[k][0:inputs[k].shape[0]-i*batch_size] = inputs[k][i*batch_size:] 278 | input_pad[k][inputs[k].shape[0]-i*batch_size:] = inputs[k][-1] 279 | for k in range(len(inputs)): 280 | self.input_data[k].set_value(input_pad[k], borrow=True) 281 | else: 282 | for k in range(len(inputs)): 283 | self.input_data[k].set_value(inputs[k][i * batch_size:(i + 1) * batch_size], borrow=True) 284 | o = self.compute_output(0) 285 | if isinstance(self.output, list): 286 | for k in range(len(self.output)): 287 | out[k][i * batch_size:(i + 1) * batch_size] = o[k] 288 | else: 289 | out[0][i * batch_size:(i + 1) * batch_size] = o.reshape(self.cfgParams.outputDim) 290 | end = time.time() 291 | if timeit: 292 | print("{} in {}s, {}ms per frame".format(padSize, end - start, (end - start)*1000./padSize)) 293 | if isinstance(self.output, list): 294 | for k in range(len(self.output)): 295 | out[k] = out[k][0:nSamp] 296 | return out 297 | else: 298 | return out[0][0:nSamp] 299 | 300 | def enableDropout(self): 301 | """ 302 | Enables dropout in all dropout layers, ie for training 303 | :return: None 304 | """ 305 | for layer in self.layers: 306 | if isinstance(layer, DropoutLayer): 307 | layer.enableDropout() 308 | 309 | def disableDropout(self): 310 | """ 311 | Disables dropout in all dropout layers, ie for classification 312 | :return: None 313 | """ 314 | for layer in self.layers: 315 | if isinstance(layer, DropoutLayer): 316 | layer.disableDropout() 317 | 318 | def dropoutEnabled(self): 319 | """ 320 | Disables dropout in all dropout layers, ie for classification 321 | :return: None 322 | """ 323 | for layer in self.layers: 324 | if isinstance(layer, DropoutLayer): 325 | if layer.dropoutEnabled(): 326 | return True 327 | 328 | return False 329 | 330 | def hasDropout(self): 331 | """ 332 | Checks if network has dropout layers 333 | :return: True if there are dropout layers 334 | """ 335 | for layer in self.layers: 336 | if isinstance(layer, DropoutLayer): 337 | return True 338 | 339 | return False 340 | 341 | @property 342 | def weightVals(self): 343 | """ 344 | Returns list of the weight values 345 | :return: list of weight values 346 | """ 347 | return self.recGetWeightVals(self.params) 348 | 349 | @weightVals.setter 350 | def weightVals(self, value): 351 | """ 352 | Set weights with given values 353 | :param value: values for weights 354 | :return: None 355 | """ 356 | self.recSetWeightVals(self.params, value) 357 | 358 | def recSetWeightVals(self, param, value): 359 | """ 360 | Set weights with given values 361 | :param param: layer parameters listing the layers weights 362 | :param value: values for weights 363 | :return: None 364 | """ 365 | if isinstance(value, list): 366 | assert isinstance(param, list), "tried to assign a list of weights to params, which is not a list {}".format(type(param)) 367 | assert len(param) == len(value), "tried to assign unequal list of weights {} != {}".format(len(param), len(value)) 368 | for i in xrange(len(value)): 369 | self.recSetWeightVals(param[i], value[i]) 370 | else: 371 | param.set_value(value) 372 | 373 | def recGetWeightVals(self, param): 374 | """ 375 | Returns list of the weight values 376 | :param param: layer parameters listing the layers weights 377 | :return: list of weight values 378 | """ 379 | w = [] 380 | if isinstance(param, list): 381 | for p in param: 382 | w.append(self.recGetWeightVals(p)) 383 | else: 384 | w = param.get_value() 385 | return w 386 | 387 | def save(self, filename): 388 | """ 389 | Save the state of this network to a pickle file on disk. 390 | :param filename: Save the parameters of this network to a pickle file at the named path. If this name ends in 391 | ".gz" then the output will automatically be gzipped; otherwise the output will be a "raw" pickle. 392 | :return: None 393 | """ 394 | 395 | state = dict([('class', self.__class__.__name__), ('network', self.__str__())]) 396 | for layer in self.layers: 397 | key = '{}-values'.format(layer.layerNum) 398 | state[key] = [p.get_value() for p in layer.params] 399 | opener = gzip.open if filename.lower().endswith('.gz') else open 400 | handle = opener(filename, 'wb') 401 | cPickle.dump(state, handle, -1) 402 | handle.close() 403 | print 'Saved model parameter to {}'.format(filename) 404 | 405 | def load(self, filename): 406 | """ 407 | Load the parameters for this network from disk. 408 | :param filename: Load the parameters of this network from a pickle file at the named path. If this name ends in 409 | ".gz" then the input will automatically be gunzipped; otherwise the input will be treated as a "raw" pickle. 410 | :return: None 411 | """ 412 | 413 | opener = gzip.open if filename.lower().endswith('.gz') else open 414 | handle = opener(filename, 'rb') 415 | saved = cPickle.load(handle) 416 | handle.close() 417 | if saved['network'] != self.__str__(): 418 | print "Possibly not matching network configuration!" 419 | differences = list(difflib.Differ().compare(saved['network'].splitlines(), self.__str__().splitlines())) 420 | print "Differences are:" 421 | print "\n".join(differences) 422 | for layer in self.layers: 423 | if len(layer.params) != len(saved['{}-values'.format(layer.layerNum)]): 424 | print "Warning: Layer parameters for layer {} do not match. Trying to fit on shape!".format(layer.layerNum) 425 | n_assigned = 0 426 | for p in layer.params: 427 | for v in saved['{}-values'.format(layer.layerNum)]: 428 | if p.get_value().shape == v.shape: 429 | p.set_value(v) 430 | n_assigned += 1 431 | 432 | if n_assigned != len(layer.params): 433 | raise ImportError("Could not load all necessary variables!") 434 | else: 435 | print "Found fitting parameters!" 436 | else: 437 | prms = layer.params 438 | for p, v in zip(prms, saved['{}-values'.format(layer.layerNum)]): 439 | if p.get_value().shape == v.shape: 440 | p.set_value(v) 441 | else: 442 | print "WARNING: Skipping parameter for {}! Shape {} does not fit {}.".format(p.name, p.get_value().shape, v.shape) 443 | print 'Loaded model parameters from {}'.format(filename) -------------------------------------------------------------------------------- /src/net/poollayer.py: -------------------------------------------------------------------------------- 1 | """Provides PoolLayer class for using in CNNs. 2 | 3 | PoolLayer provides interface for building pooling layers in CNNs. 4 | PoolLayerParams is the parametrization of these PoolLayer layers. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import numpy 26 | import cPickle 27 | import theano 28 | import theano.sandbox.neighbours 29 | import theano.tensor as T 30 | from theano.tensor.signal.pool import pool_2d 31 | from net.layerparams import LayerParams 32 | 33 | __author__ = "Markus Oberweger " 34 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 35 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 36 | __license__ = "GPL" 37 | __version__ = "1.0" 38 | __maintainer__ = "Markus Oberweger" 39 | __email__ = "oberweger@icg.tugraz.at" 40 | __status__ = "Development" 41 | 42 | 43 | class PoolLayerParams(LayerParams): 44 | 45 | def __init__(self, inputDim=None, poolsize=None, redDim=None, outputDim=None, activation=None, poolType=0): 46 | """ 47 | 48 | :type poolsize: tuple or list of length 2 49 | :param poolsize: the downsampling (pooling) factor (#rows,#cols) 50 | """ 51 | 52 | super(PoolLayerParams, self).__init__(inputDim, outputDim) 53 | 54 | self._poolsize = poolsize 55 | self._redDim = redDim 56 | self._activation = activation 57 | self._poolType = poolType 58 | self.update() 59 | 60 | @property 61 | def poolsize(self): 62 | return self._poolsize 63 | 64 | @poolsize.setter 65 | def poolsize(self, value): 66 | self._poolsize = value 67 | self.update() 68 | 69 | @property 70 | def activation(self): 71 | return self._activation 72 | 73 | @property 74 | def poolType(self): 75 | return self._poolType 76 | 77 | def update(self): 78 | """ 79 | calc image_shape, 80 | """ 81 | self._outputDim = (self._inputDim[0], # batch_size 82 | self._redDim*self._inputDim[1] if self._redDim is not None else self._inputDim[1], # number of kernels 83 | self._inputDim[2]//self._poolsize[0], # output H 84 | self._inputDim[3]//self._poolsize[1]) # output W 85 | 86 | if(self._poolsize[0] == 1) and (self._poolsize[1] == 1): 87 | self._poolType = -1 88 | 89 | 90 | class PoolLayer(object): 91 | """ 92 | Pool Layer of a convolutional network 93 | """ 94 | 95 | def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): 96 | """ 97 | Allocate a PoolLayer with shared variable internal parameters. 98 | 99 | :type rng: numpy.random.RandomState 100 | :param rng: a random number generator used to initialize weights 101 | 102 | :type inputVar: theano.tensor.dtensor4 103 | :param inputVar: symbolic image tensor, of shape image_shape 104 | 105 | :type cfgParams: PoolLayerParams 106 | """ 107 | 108 | floatX = theano.config.floatX # @UndefinedVariable 109 | 110 | outputDim = cfgParams.outputDim 111 | poolsize = cfgParams.poolsize 112 | inputDim = cfgParams.inputDim 113 | activation = cfgParams.activation 114 | poolType = cfgParams.poolType 115 | 116 | self.cfgParams = cfgParams 117 | self.layerNum = layerNum 118 | 119 | self.inputVar = inputVar 120 | 121 | if inputVar.type.ndim != 4: 122 | raise TypeError() 123 | 124 | self.params = [] 125 | self.weights = [] 126 | 127 | # downsample each feature map individually, using maxpooling 128 | if poolType == 0: 129 | # use maxpooling 130 | pooled_out = pool_2d(input=self.inputVar, ds=poolsize, ignore_border=True) 131 | elif poolType == 1: 132 | # use average pooling 133 | pooled_out = theano.sandbox.neighbours.images2neibs(ten4=self.inputVar, neib_shape=poolsize, mode='ignore_borders').mean(axis=-1) 134 | new_shape = T.cast(T.join(0, self.inputVar.shape[:-2], T.as_tensor([self.inputVar.shape[2]//poolsize[0]]), T.as_tensor([self.inputVar.shape[3]//poolsize[1]])), 'int64') 135 | pooled_out = T.reshape(pooled_out, new_shape, ndim=4) 136 | elif poolType == 3: 137 | # use subsampling and ignore border 138 | pooled_out = self.inputVar[:, :, :(inputDim[2]//poolsize[0])*poolsize[0], :(inputDim[3]//poolsize[1])*poolsize[1]][:, :, ::poolsize[0], ::poolsize[1]] 139 | elif poolType == -1: 140 | # no pooling at all 141 | pooled_out = self.inputVar 142 | else: 143 | raise ValueError("Unknown pool type!") 144 | 145 | self.output = (pooled_out if activation is None 146 | else activation(pooled_out)) 147 | 148 | self.output.name = 'output_layer_{}'.format(self.layerNum) 149 | 150 | def __str__(self): 151 | """ 152 | Print configuration of layer 153 | :return: configuration string 154 | """ 155 | return "poolsize {}, pooltype {}, activation {}".format(self.cfgParams.poolsize, self.cfgParams.poolType, 156 | self.cfgParams.activation_str) 157 | -------------------------------------------------------------------------------- /src/net/poseregnet.py: -------------------------------------------------------------------------------- 1 | """Provides PoseRegNet class that implements deep CNNs. 2 | 3 | PoseRegNet provides interface for building the CNN. 4 | PoseRegNetParams is the parametrization of these CNNs. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import theano.tensor as T 26 | from net.convpoollayer import ConvPoolLayer, ConvPoolLayerParams 27 | from net.hiddenlayer import HiddenLayer, HiddenLayerParams 28 | from net.dropoutlayer import DropoutLayer, DropoutLayerParams 29 | from net.netbase import NetBase, NetBaseParams 30 | from net.poollayer import PoolLayerParams 31 | from util.helpers import ReLU 32 | 33 | __author__ = "Markus Oberweger " 34 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 35 | __credits__ = ["Markus Oberweger"] 36 | __license__ = "GPL" 37 | __version__ = "1.0" 38 | __maintainer__ = "Markus Oberweger" 39 | __email__ = "oberweger@icg.tugraz.at" 40 | __status__ = "Development" 41 | 42 | 43 | class PoseRegNetParams(NetBaseParams): 44 | def __init__(self, type=0, nChan=1, wIn=128, hIn=128, batchSize=128, numJoints=16, nDims=3): 45 | """ 46 | Init the parametrization 47 | 48 | :type type: int 49 | :param type: type of descr network 50 | """ 51 | 52 | super(PoseRegNetParams, self).__init__() 53 | 54 | self.batch_size = batchSize 55 | self.numJoints = numJoints 56 | self.nDims = nDims 57 | self.inputDim = (batchSize, nChan, hIn, wIn) 58 | 59 | if type == 0: 60 | # Try DeepPose CNN similar configuration 61 | self.layers.append(ConvPoolLayerParams(inputDim=(batchSize, nChan, hIn, wIn), # w,h,nChannel 62 | nFilters=8, 63 | filterDim=(5, 5), 64 | poolsize=(4, 4), 65 | activation=ReLU)) 66 | 67 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 68 | nFilters=8, 69 | filterDim=(5, 5), 70 | poolsize=(2, 2), 71 | activation=ReLU)) 72 | 73 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 74 | nFilters=8, 75 | filterDim=(3, 3), 76 | poolsize=(1, 1), 77 | activation=ReLU)) 78 | 79 | l3out = self.layers[-1].outputDim 80 | self.layers.append(HiddenLayerParams(inputDim=(l3out[0], l3out[1] * l3out[2] * l3out[3]), 81 | outputDim=(batchSize, 1024), 82 | activation=ReLU)) 83 | 84 | self.layers.append(DropoutLayerParams(inputDim=self.layers[-1].outputDim, 85 | outputDim=self.layers[-1].outputDim)) 86 | 87 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 88 | outputDim=(batchSize, 1024), 89 | activation=ReLU)) 90 | 91 | self.layers.append(DropoutLayerParams(inputDim=self.layers[-1].outputDim, 92 | outputDim=self.layers[-1].outputDim)) 93 | 94 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 95 | outputDim=(batchSize, numJoints * nDims), 96 | activation=None)) 97 | 98 | self.outputDim = self.layers[-1].outputDim 99 | elif type == 11: 100 | # Try DeepPose CNN similar configuration 101 | self.layers.append(ConvPoolLayerParams(inputDim=(batchSize, nChan, hIn, wIn), # w,h,nChannel 102 | nFilters=8, 103 | filterDim=(5, 5), 104 | poolsize=(4, 4), 105 | activation=ReLU)) 106 | 107 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 108 | nFilters=8, 109 | filterDim=(5, 5), 110 | poolsize=(2, 2), 111 | activation=ReLU)) 112 | 113 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 114 | nFilters=8, 115 | filterDim=(3, 3), 116 | poolsize=(1, 1), 117 | activation=ReLU)) 118 | 119 | l3out = self.layers[-1].outputDim 120 | self.layers.append(HiddenLayerParams(inputDim=(l3out[0], l3out[1] * l3out[2] * l3out[3]), 121 | outputDim=(batchSize, 1024), 122 | activation=ReLU)) 123 | 124 | self.layers.append(DropoutLayerParams(inputDim=self.layers[-1].outputDim, 125 | outputDim=self.layers[-1].outputDim)) 126 | 127 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 128 | outputDim=(batchSize, 1024), 129 | activation=ReLU)) 130 | 131 | self.layers.append(DropoutLayerParams(inputDim=self.layers[-1].outputDim, 132 | outputDim=self.layers[-1].outputDim)) 133 | 134 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 135 | outputDim=(batchSize, 30), 136 | activation=None)) 137 | 138 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 139 | outputDim=(batchSize, numJoints * nDims), 140 | activation=None)) 141 | 142 | self.outputDim = self.layers[-1].outputDim 143 | else: 144 | raise NotImplementedError("not implemented") 145 | 146 | 147 | class PoseRegNet(NetBase): 148 | def __init__(self, rng, inputVar=None, cfgParams=None): 149 | """ 150 | 151 | :type cfgParams: DescriptorNetParams 152 | """ 153 | 154 | if cfgParams is None: 155 | raise Exception("Cannot create a Net without config parameters (ie. cfgParams==None)") 156 | 157 | if inputVar is None: 158 | inputVar = T.tensor4('x') # input variable 159 | elif isinstance(inputVar, str): 160 | inputVar = T.tensor4(inputVar) # input variable 161 | 162 | # create structure 163 | super(PoseRegNet, self).__init__(rng, inputVar, cfgParams) 164 | -------------------------------------------------------------------------------- /src/net/scalenet.py: -------------------------------------------------------------------------------- 1 | """This file contains the bases classes for different networks 2 | 3 | Created on 01.09.2014 4 | 5 | @author: Markus Oberweger 6 | """ 7 | from __future__ import print_function 8 | 9 | import theano 10 | import theano.tensor as T 11 | import time 12 | from net.convpoollayer import ConvPoolLayer, ConvPoolLayerParams 13 | from net.hiddenlayer import HiddenLayer, HiddenLayerParams 14 | from net.dropoutlayer import DropoutLayer, DropoutLayerParams 15 | from net.netbase import NetBase, NetBaseParams 16 | import numpy 17 | from util.helpers import ReLU 18 | 19 | 20 | class ScaleNetParams(NetBaseParams): 21 | def __init__(self, type=0, nChan=1, wIn=128, hIn=128, batchSize=128, numJoints=16, nDims=3, resizeFactor = 2): 22 | ''' 23 | Init the parametrization 24 | 25 | :type typeID: int 26 | :param typeID: type of descr network 27 | ''' 28 | 29 | super(ScaleNetParams, self).__init__() 30 | 31 | self.batch_size = batchSize 32 | self.numJoints = numJoints 33 | self.nDims = nDims 34 | 35 | if type == 1: 36 | self.numInputs = 3 37 | self.inpConv = 3 38 | self.inputDim = [(batchSize, nChan, hIn, wIn), (batchSize, nChan, hIn//resizeFactor, wIn//resizeFactor), (batchSize, nChan, hIn//resizeFactor**2, wIn//resizeFactor**2)] 39 | # Try small configuration 40 | self.layers.append(ConvPoolLayerParams(inputDim=(batchSize, nChan, hIn, wIn), # w,h,nChannel 41 | nFilters=8, 42 | filterDim=(5, 5), 43 | poolsize=(4, 4), 44 | activation=ReLU)) 45 | 46 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 47 | nFilters=8, 48 | filterDim=(5, 5), 49 | poolsize=(2, 2), 50 | activation=ReLU)) 51 | 52 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 53 | nFilters=8, 54 | filterDim=(3, 3), 55 | poolsize=(1, 1), 56 | activation=ReLU)) 57 | 58 | self.layers.append(ConvPoolLayerParams(inputDim=(batchSize, nChan, hIn//resizeFactor, wIn//resizeFactor), # w,h,nChannel 59 | nFilters=8, 60 | filterDim=(5, 5), 61 | poolsize=(2, 2), 62 | activation=ReLU)) 63 | 64 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 65 | nFilters=8, 66 | filterDim=(5, 5), 67 | poolsize=(2, 2), 68 | activation=ReLU)) 69 | 70 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 71 | nFilters=8, 72 | filterDim=(3, 3), 73 | poolsize=(1, 1), 74 | activation=ReLU)) 75 | 76 | self.layers.append(ConvPoolLayerParams(inputDim=(batchSize, nChan, hIn//resizeFactor**2, wIn//resizeFactor**2), # w,h,nChannel 77 | nFilters=8, 78 | filterDim=(5, 5), 79 | poolsize=(2, 2), 80 | activation=ReLU)) 81 | 82 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 83 | nFilters=8, 84 | filterDim=(5, 5), 85 | poolsize=(1, 1), 86 | activation=ReLU)) 87 | 88 | self.layers.append(ConvPoolLayerParams(inputDim=self.layers[-1].outputDim, 89 | nFilters=8, 90 | filterDim=(3, 3), 91 | poolsize=(1, 1), 92 | activation=ReLU)) 93 | lout = 0 94 | for j in range(self.numInputs): 95 | idx = (j+1)*self.inpConv-1 96 | lout += self.layers[idx].outputDim[1]*self.layers[idx].outputDim[2]*self.layers[idx].outputDim[3] 97 | 98 | self.layers.append(HiddenLayerParams(inputDim=(batchSize, lout), 99 | outputDim=(batchSize, 1024), 100 | activation=ReLU)) 101 | 102 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 103 | outputDim=(batchSize, 1024), 104 | activation=ReLU)) 105 | 106 | self.layers.append(HiddenLayerParams(inputDim=self.layers[-1].outputDim, 107 | outputDim=(batchSize, numJoints * nDims), 108 | activation=None)) # last one is linear for regression 109 | 110 | self.outputDim = self.layers[-1].outputDim 111 | else: 112 | raise NotImplementedError("not implemented") 113 | 114 | 115 | class ScaleNet(NetBase): 116 | def __init__(self, rng, inputVar=None, cfgParams=None, twin=None): 117 | ''' 118 | 119 | :type cfgParams: DescriptorNetParams 120 | ''' 121 | 122 | if cfgParams is None: 123 | raise Exception("Cannot create a Net without config parameters (ie. cfgParams==None)") 124 | 125 | if inputVar is None: 126 | self.inputVar = [] 127 | for i in range(cfgParams.numInputs): 128 | self.inputVar.append(T.tensor4('x{}'.format(i))) 129 | else: 130 | raise Exception("Do not give inputVar, created inline") 131 | 132 | # create structure 133 | 134 | self.cfgParams = cfgParams 135 | 136 | # create network 137 | self.layers = [] 138 | i = 0 139 | inI = 0 140 | for layerParam in cfgParams.layers: 141 | # first input is inputVar, otherwise input is output of last one 142 | if (i % self.cfgParams.inpConv) == 0 and i < self.cfgParams.numInputs*self.cfgParams.inpConv: 143 | inp = self.inputVar[inI] 144 | inI += 1 145 | else: 146 | # flatten output from conv to hidden layer 147 | if i == self.cfgParams.numInputs*self.cfgParams.inpConv: 148 | cList = [] 149 | for j in range(self.cfgParams.numInputs): 150 | idx = (j+1)*self.cfgParams.inpConv-1 151 | cList.append(self.layers[idx].output.flatten(2)) 152 | inp = T.concatenate(cList, axis=1) 153 | else: 154 | inp = self.layers[-1].output 155 | 156 | id = layerParam.__class__.__name__[:-6] 157 | constructor = globals()[id] 158 | self.layers.append(constructor(rng, 159 | inputVar=inp, 160 | cfgParams=layerParam, 161 | copyLayer=(None if (twin is None) else twin.layers[i]), 162 | layerNum=i)) 163 | 164 | i += 1 165 | 166 | # assemble externally visible parameters 167 | self.output = self.layers[-1].output 168 | -------------------------------------------------------------------------------- /src/test_realtimepipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the main file for testing realtime performance. 3 | 4 | Copyright 2015 Markus Oberweger, ICG, 5 | Graz University of Technology 6 | 7 | This file is part of DeepPrior. 8 | 9 | DeepPrior is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | DeepPrior is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with DeepPrior. If not, see . 21 | """ 22 | 23 | import glob 24 | import numpy 25 | from data.dataset import NYUDataset, ICVLDataset 26 | from net.poseregnet import PoseRegNetParams, PoseRegNet 27 | from net.scalenet import ScaleNetParams, ScaleNet 28 | from util.realtimehandposepipeline import RealtimeHandposePipeline 29 | from data.importers import ICVLImporter, NYUImporter, DepthImporter 30 | 31 | 32 | __author__ = "Markus Oberweger " 33 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 34 | __credits__ = ["Markus Oberweger"] 35 | __license__ = "GPL" 36 | __version__ = "1.0" 37 | __maintainer__ = "Markus Oberweger" 38 | __email__ = "oberweger@icg.tugraz.at" 39 | __status__ = "Development" 40 | 41 | if __name__ == '__main__': 42 | 43 | # di = ICVLImporter('../data/ICVL/') 44 | # Seq2 = di.loadSequence('test_seq_1') 45 | # testSeqs = [Seq2] 46 | # 47 | # testDataSet = ICVLDataset(testSeqs) 48 | # test_data, test_gt3D = testDataSet.imgStackDepthOnly('test_seq_1') 49 | 50 | di = NYUImporter('../data/NYU/') 51 | Seq2 = di.loadSequence('test_1') 52 | testSeqs = [Seq2] 53 | 54 | testDataSet = NYUDataset(testSeqs) 55 | test_data, test_gt3D = testDataSet.imgStackDepthOnly('test_1') 56 | 57 | # load trained network 58 | # poseNetParams = PoseRegNetParams(type=11, nChan=1, wIn=128, hIn=128, batchSize=1, numJoints=16, nDims=3) 59 | # poseNet = PoseRegNet(numpy.random.RandomState(23455), cfgParams=poseNetParams) 60 | # poseNet.load("./ICVL_network_prior.pkl") 61 | poseNetParams = PoseRegNetParams(type=11, nChan=1, wIn=128, hIn=128, batchSize=1, numJoints=14, nDims=3) 62 | poseNet = PoseRegNet(numpy.random.RandomState(23455), cfgParams=poseNetParams) 63 | poseNet.load("./NYU_network_prior.pkl") 64 | # comrefNetParams = ScaleNetParams(type=1, nChan=1, wIn=128, hIn=128, batchSize=1, resizeFactor=2, numJoints=1, nDims=3) 65 | # comrefNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=comrefNetParams) 66 | # comrefNet.load("./net_ICVL_COM.pkl") 67 | comrefNetParams = ScaleNetParams(type=1, nChan=1, wIn=128, hIn=128, batchSize=1, resizeFactor=2, numJoints=1, nDims=3) 68 | comrefNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=comrefNetParams) 69 | comrefNet.load("./net_NYU_COM.pkl") 70 | config = {'fx': 588., 'fy': 587., 'cube': (300, 300, 300)} 71 | # config = {'fx': 241.42, 'fy': 241.42, 'cube': (250, 250, 250)} 72 | # config = {'fx': 224.5, 'fy': 230.5, 'cube': (300, 300, 300)} # Creative Gesture Camera 73 | # di = ICVLImporter("./capture/") 74 | # di.fx = 224.5 75 | # di.fy = 230.5 76 | # di.ux = 160. 77 | # di.uy = 120. 78 | rtp = RealtimeHandposePipeline(poseNet, config, di, comrefNet) 79 | 80 | # use filenames 81 | filenames = [] 82 | for i in testSeqs[0].data: 83 | filenames.append(i.fileName) 84 | # filenames = sorted(glob.glob('./capture2/*.png')) 85 | rtp.processFiles(filenames) # Threaded 86 | 87 | # use depth camera 88 | # from util.cameradevice import CreativeCameraDevice 89 | # dev = CreativeCameraDevice() 90 | # dev.start() 91 | # rtp.processVideo(dev) # Threaded 92 | # dev.stop() 93 | -------------------------------------------------------------------------------- /src/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/trainer/optimizer.py: -------------------------------------------------------------------------------- 1 | """Basis for different optimization algorithms. 2 | 3 | Optimizer provides interface for creating the update rules for gradient based optimization. 4 | It includes SGD, NAG, RMSProp, etc. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import theano 26 | import theano.tensor as T 27 | import numpy 28 | 29 | __author__ = "Markus Oberweger " 30 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 31 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 32 | __license__ = "GPL" 33 | __version__ = "1.0" 34 | __maintainer__ = "Markus Oberweger" 35 | __email__ = "oberweger@icg.tugraz.at" 36 | __status__ = "Development" 37 | 38 | 39 | class Optimizer(object): 40 | """ 41 | Class with different optimizers of the loss function 42 | """ 43 | 44 | def __init__(self, grads, params): 45 | """ 46 | Initialize object 47 | :param grads: gradients of the loss function 48 | :param params: model parameters that should be updated 49 | """ 50 | self.grads = grads 51 | self.params = params 52 | 53 | def RMSProp(self, learning_rate=0.01, decay=0.9, epsilon=1.0 / 100.): 54 | """ 55 | RMSProp of Tieleman et al. 56 | :param learning_rate: learning rate 57 | :param decay: decay rate of gradient history 58 | :param epsilon: gradient clip 59 | :return: update 60 | """ 61 | 62 | updates = [] 63 | 64 | for param_i, grad_i in zip(self.params, self.grads): 65 | # Accumulate gradient 66 | msg = theano.shared(numpy.zeros(param_i.get_value().shape, dtype=theano.config.floatX)) 67 | new_mean_squared_grad = (decay * msg + (1 - decay) * T.sqr(grad_i)) 68 | 69 | # Compute update 70 | rms_grad_t = T.sqrt(new_mean_squared_grad) 71 | rms_grad_t = T.maximum(rms_grad_t, epsilon) 72 | delta_x_t = -learning_rate * grad_i / rms_grad_t 73 | 74 | # Apply update 75 | updates.append((param_i, param_i + delta_x_t)) 76 | updates.append((msg, new_mean_squared_grad)) 77 | 78 | return updates 79 | 80 | -------------------------------------------------------------------------------- /src/trainer/poseregnettrainer.py: -------------------------------------------------------------------------------- 1 | """Network trainer for regression networks. 2 | 3 | PoseRegNetTrainer provides interface for training regressors for 4 | estimating the hand pose. 5 | PoseRegNetTrainerParams is the parametrization of the PoseRegNetTrainer. 6 | 7 | Copyright 2015 Markus Oberweger, ICG, 8 | Graz University of Technology 9 | 10 | This file is part of DeepPrior. 11 | 12 | DeepPrior is free software: you can redistribute it and/or modify 13 | it under the terms of the GNU General Public License as published by 14 | the Free Software Foundation, either version 3 of the License, or 15 | (at your option) any later version. 16 | 17 | DeepPrior is distributed in the hope that it will be useful, 18 | but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | GNU General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License 23 | along with DeepPrior. If not, see . 24 | """ 25 | 26 | import theano 27 | import theano.tensor as T 28 | from net.poollayer import PoolLayer 29 | from net.poseregnet import PoseRegNet 30 | from trainer.nettrainer import NetTrainerParams, NetTrainer 31 | from trainer.optimizer import Optimizer 32 | 33 | __author__ = "Markus Oberweger " 34 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 35 | __credits__ = ["Paul Wohlhart", "Markus Oberweger"] 36 | __license__ = "GPL" 37 | __version__ = "1.0" 38 | __maintainer__ = "Markus Oberweger" 39 | __email__ = "oberweger@icg.tugraz.at" 40 | __status__ = "Development" 41 | 42 | 43 | class PoseRegNetTrainerParams(NetTrainerParams): 44 | def __init__(self): 45 | super(PoseRegNetTrainerParams, self).__init__() 46 | 47 | 48 | class PoseRegNetTrainer(NetTrainer): 49 | """ 50 | classdocs 51 | """ 52 | 53 | def __init__(self, poseNet=None, cfgParams=None, rng=None): 54 | """ 55 | Constructor 56 | 57 | :param poseNet: initialized DescriptorNet 58 | :param cfgParams: initialized PoseRegNetTrainerParams 59 | """ 60 | super(PoseRegNetTrainer, self).__init__(cfgParams, 3) 61 | self.poseNet = poseNet 62 | self.cfgParams = cfgParams 63 | self.rng = rng 64 | 65 | if not isinstance(cfgParams, PoseRegNetTrainerParams): 66 | raise ValueError("cfgParams must be an instance of PoseRegNetTrainerParams") 67 | 68 | self.setupFunctions() 69 | 70 | def setupFunctions(self): 71 | floatX = theano.config.floatX # @UndefinedVariable 72 | 73 | dnParams = self.poseNet.cfgParams 74 | 75 | # params 76 | self.learning_rate = T.scalar('learning_rate', dtype=floatX) 77 | self.momentum = T.scalar('momentum', dtype=floatX) 78 | 79 | # input 80 | self.index = T.lscalar() # index to a [mini]batch 81 | self.x = self.poseNet.inputVar 82 | 83 | # targets 84 | if self.poseNet.cfgParams.numJoints == 1 and self.poseNet.cfgParams.nDims == 1: 85 | y = T.vector('y',dtype=floatX) # R^D 86 | elif self.poseNet.cfgParams.numJoints == 1: 87 | y = T.matrix('y',dtype=floatX) # R^Dx3 88 | else: 89 | y = T.tensor3('y',dtype=floatX) # R^Dx16x3 90 | 91 | # L2 error 92 | if self.poseNet.cfgParams.numJoints == 1 and self.poseNet.cfgParams.nDims == 1: 93 | cost = T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y).mean(axis=1) 94 | elif self.poseNet.cfgParams.numJoints == 1: 95 | cost = T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y).sum(axis=1) 96 | else: 97 | cost = T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.numJoints,self.poseNet.cfgParams.nDims))-y).sum(axis=2).mean(axis=1) # error is sum of all joints 98 | 99 | self.cost = cost.mean() # The cost to minimize 100 | 101 | # weight vector length for regularization (weight decay) 102 | totalWeightVectorLength = 0 103 | for W in self.poseNet.weights: 104 | totalWeightVectorLength += self.cfgParams.weightreg_factor * (W ** 2).sum() 105 | 106 | if not self.poseNet.hasDropout(): 107 | self.cost += totalWeightVectorLength # + weight vector norm 108 | 109 | # create a list of gradients for all model parameters 110 | self.params = self.poseNet.params 111 | self.grads = T.grad(self.cost, self.params) 112 | 113 | # euclidean mean errors over all joints 114 | if self.poseNet.cfgParams.numJoints == 1 and self.poseNet.cfgParams.nDims == 1: 115 | errors = T.sqrt(T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y)).mean(axis=1) 116 | elif self.poseNet.cfgParams.numJoints == 1: 117 | errors = T.sqrt(T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y).sum(axis=1)) 118 | else: 119 | errors = T.sqrt(T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.numJoints,self.poseNet.cfgParams.nDims))-y).sum(axis=2)).mean(axis=1) 120 | 121 | # mean error over full set 122 | self.errors = errors.mean() 123 | 124 | # store stuff 125 | self.y = y 126 | 127 | def compileFunctions(self, compileDebugFcts=False): 128 | # TRAIN 129 | self.setupTrain() 130 | 131 | # DEBUG 132 | self.compileDebugFcts = compileDebugFcts 133 | if compileDebugFcts: 134 | self.setupDebugFunctions() 135 | 136 | # VALIDATE 137 | self.setupValidate() 138 | 139 | def setupTrain(self): 140 | # train_model is a function that updates the model parameters by SGD 141 | opt = Optimizer(self.grads, self.params) 142 | updates = opt.RMSProp(self.learning_rate, 0.9, 1.0/100.) 143 | 144 | batch_size = self.cfgParams.batch_size 145 | givens_train = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 146 | givens_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] 147 | 148 | print("compiling train_model() ... ") 149 | self.train_model = theano.function(inputs=[self.index, self.learning_rate], 150 | outputs=self.cost, 151 | updates=updates, 152 | givens=givens_train) 153 | print("done.") 154 | 155 | print("compiling test_model_on_train() ... ") 156 | batch_size = self.cfgParams.batch_size 157 | givens_test_on_train = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 158 | givens_test_on_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] 159 | 160 | self.test_model_on_train = theano.function(inputs=[self.index], 161 | outputs=self.errors, 162 | givens=givens_test_on_train) 163 | print("done.") 164 | 165 | def setupValidate(self): 166 | 167 | batch_size = self.cfgParams.batch_size 168 | givens_val = {self.x: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 169 | givens_val[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size] 170 | 171 | print("compiling validation_error() ... ") 172 | self.validation_error = theano.function(inputs=[self.index], 173 | outputs=self.errors, 174 | givens=givens_val) 175 | print("done.") 176 | 177 | print("compiling validation_cost() ... ") 178 | self.validation_cost = theano.function(inputs=[self.index], 179 | outputs=self.cost, 180 | givens=givens_val) 181 | print("done.") 182 | 183 | # debug and so 184 | print("compiling compute_val_descr() ... ") 185 | givens_val_descr = {self.x: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 186 | self.compute_val_descr = theano.function(inputs=[self.index], 187 | outputs=self.poseNet.output, 188 | givens=givens_val_descr) 189 | print("done.") 190 | 191 | def setupDebugFunctions(self): 192 | batch_size = self.cfgParams.batch_size 193 | 194 | print("compiling compute_train_descr() ... ") 195 | givens_train_descr = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 196 | self.compute_train_descr = theano.function(inputs=[self.index], 197 | outputs=self.poseNet.output, 198 | givens=givens_train_descr) 199 | print("done.") 200 | -------------------------------------------------------------------------------- /src/trainer/scalenettrainer.py: -------------------------------------------------------------------------------- 1 | """Network trainer for multiscale regression networks. 2 | 3 | ScaleNetTrainer provides interface for training regressors for 4 | estimating the hand pose. 5 | ScaleNetTrainerParams is the parametrization of the ScaleNetTrainer. 6 | 7 | Copyright 2015 Markus Oberweger, ICG, 8 | Graz University of Technology 9 | 10 | This file is part of DeepPrior. 11 | 12 | DeepPrior is free software: you can redistribute it and/or modify 13 | it under the terms of the GNU General Public License as published by 14 | the Free Software Foundation, either version 3 of the License, or 15 | (at your option) any later version. 16 | 17 | DeepPrior is distributed in the hope that it will be useful, 18 | but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | GNU General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License 23 | along with DeepPrior. If not, see . 24 | """ 25 | 26 | import numpy 27 | 28 | import theano 29 | import theano.tensor as T 30 | 31 | from trainer.nettrainer import NetTrainerParams, NetTrainer 32 | from trainer.optimizer import Optimizer 33 | 34 | 35 | __author__ = "Markus Oberweger " 36 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 37 | __credits__ = ["Markus Oberweger"] 38 | __license__ = "GPL" 39 | __version__ = "1.0" 40 | __maintainer__ = "Markus Oberweger" 41 | __email__ = "oberweger@icg.tugraz.at" 42 | __status__ = "Development" 43 | 44 | 45 | class ScaleNetTrainerParams(NetTrainerParams): 46 | def __init__(self): 47 | super(ScaleNetTrainerParams, self).__init__() 48 | 49 | 50 | class ScaleNetTrainer(NetTrainer): 51 | """ 52 | classdocs 53 | """ 54 | 55 | def __init__(self, poseNet=None, cfgParams=None, rng=None): 56 | """ 57 | Constructor 58 | 59 | :param poseNet: initialized DescriptorNet 60 | :param cfgParams: initialized PoseRegNetTrainerParams 61 | """ 62 | 63 | # use lazy init instead 64 | if poseNet is None: 65 | return 66 | 67 | super(ScaleNetTrainer, self).__init__(cfgParams, 8) 68 | self.poseNet = poseNet 69 | self.cfgParams = cfgParams 70 | self.rng = rng 71 | 72 | if not isinstance(cfgParams, ScaleNetTrainerParams): 73 | raise ValueError("cfgParams must be an instance of ScaleNetTrainerParams") 74 | 75 | self.setupFunctions() 76 | 77 | def setupFunctions(self): 78 | floatX = theano.config.floatX # @UndefinedVariable 79 | 80 | dnParams = self.poseNet.cfgParams 81 | 82 | # params 83 | self.learning_rate = T.scalar('learning_rate', dtype=floatX) 84 | self.momentum = T.scalar('momentum', dtype=floatX) 85 | 86 | # input 87 | self.index = T.lscalar() # index to a [mini]batch 88 | self.x = [] 89 | for i in range(self.poseNet.cfgParams.numInputs): 90 | self.x.append(self.poseNet.inputVar[i]) 91 | 92 | # targets 93 | if self.poseNet.cfgParams.numJoints == 1 and self.poseNet.cfgParams.nDims == 1: 94 | y = T.vector('y', dtype=floatX) # R^D 95 | elif self.poseNet.cfgParams.numJoints == 1: 96 | y = T.matrix('y', dtype=floatX) # R^Dx3 97 | else: 98 | y = T.tensor3('y', dtype=floatX) # R^Dx16x3 99 | 100 | # L2 error 101 | if self.poseNet.cfgParams.numJoints == 1 and self.poseNet.cfgParams.nDims == 1: 102 | cost = T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims)) - y) 103 | elif self.poseNet.cfgParams.numJoints == 1: 104 | cost = T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y).sum(axis=1) 105 | else: 106 | cost = T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.numJoints,self.poseNet.cfgParams.nDims))-y).sum(axis=2).mean(axis=1) # error is sum of all joints 107 | 108 | self.cost = cost.mean() # The cost to minimize 109 | 110 | # weight vector length for regularization (weight decay) 111 | totalWeightVectorLength = 0 112 | for W in self.poseNet.weights: 113 | totalWeightVectorLength += self.cfgParams.weightreg_factor * (W ** 2).sum() 114 | 115 | if not self.poseNet.hasDropout(): 116 | self.cost += totalWeightVectorLength # + weight vector norm 117 | 118 | # create a list of gradients for all model parameters 119 | self.params = self.poseNet.params 120 | self.grads = T.grad(self.cost, self.params) 121 | 122 | # euclidean mean errors over all joints 123 | if self.poseNet.cfgParams.numJoints == 1 and self.poseNet.cfgParams.nDims == 1: 124 | errors = T.sqrt(T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y)) 125 | elif self.poseNet.cfgParams.numJoints == 1: 126 | errors = T.sqrt(T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.nDims))-y).sum(axis=1)) 127 | else: 128 | errors = T.sqrt(T.sqr(T.reshape(self.poseNet.output,(self.cfgParams.batch_size,self.poseNet.cfgParams.numJoints,self.poseNet.cfgParams.nDims))-y).sum(axis=2)).mean(axis=1) 129 | 130 | # mean error over full set 131 | self.errors = errors.mean() 132 | 133 | # store stuff 134 | self.y = y 135 | 136 | def compileFunctions(self, compileDebugFcts=False): 137 | # TRAIN 138 | self.setupTrain() 139 | 140 | self.compileDebugFcts = compileDebugFcts 141 | if compileDebugFcts: 142 | self.setupDebugFunctions() 143 | 144 | # VALIDATE 145 | self.setupValidate() 146 | 147 | def setupTrain(self): 148 | # train_model is a function that updates the model parameters by SGD 149 | 150 | opt = Optimizer(self.grads, self.params) 151 | self.updates = opt.RMSProp(self.learning_rate, 0.9, 1.0/100.) 152 | 153 | batch_size = self.cfgParams.batch_size 154 | givens_train = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 155 | for i in range(1, self.poseNet.cfgParams.numInputs): 156 | givens_train[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] 157 | givens_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] 158 | 159 | print("compiling train_model() ... ") 160 | self.train_model = theano.function(inputs=[self.index, self.learning_rate], 161 | outputs=self.cost, 162 | updates=self.updates, 163 | givens=givens_train) 164 | 165 | print("done.") 166 | 167 | print("compiling test_model_on_train() ... ") 168 | batch_size = self.cfgParams.batch_size 169 | givens_test_on_train = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 170 | for i in range(1, self.poseNet.cfgParams.numInputs): 171 | givens_test_on_train[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] 172 | givens_test_on_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] 173 | 174 | self.test_model_on_train = theano.function(inputs=[self.index], 175 | outputs=self.errors, 176 | givens=givens_test_on_train) 177 | print("done.") 178 | 179 | def setupValidate(self): 180 | 181 | batch_size = self.cfgParams.batch_size 182 | givens_val = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 183 | for i in range(1, self.poseNet.cfgParams.numInputs): 184 | givens_val[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] 185 | givens_val[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size] 186 | 187 | givens_val_cost = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 188 | for i in range(1, self.poseNet.cfgParams.numInputs): 189 | givens_val_cost[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] 190 | givens_val_cost[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size] 191 | print("compiling validation_cost() ... ") 192 | self.validation_cost = theano.function(inputs=[self.index], 193 | outputs=self.cost, 194 | givens=givens_val_cost) 195 | print("done.") 196 | 197 | print("compiling validation_error() ... ") 198 | self.validation_error = theano.function(inputs=[self.index], 199 | outputs=self.errors, 200 | givens=givens_val) 201 | print("done.") 202 | 203 | # debug and so 204 | print("compiling compute_val_descr() ... ") 205 | 206 | givens_val_descr = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 207 | for i in range(1, self.poseNet.cfgParams.numInputs): 208 | givens_val_descr[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] 209 | self.compute_val_descr = theano.function(inputs=[self.index], 210 | outputs=self.poseNet.output, 211 | givens=givens_val_descr) 212 | print("done.") 213 | 214 | def setupDebugFunctions(self): 215 | batch_size = self.cfgParams.batch_size 216 | 217 | print("compiling compute_train_descr() ... ") 218 | givens_train_descr = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} 219 | for i in range(1, self.poseNet.cfgParams.numInputs): 220 | givens_train_descr[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] 221 | self.compute_train_descr = theano.function(inputs=[self.index], 222 | outputs=self.poseNet.output, 223 | givens=givens_train_descr) 224 | print("done.") 225 | 226 | -------------------------------------------------------------------------------- /src/util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(EXE_NAME "_dscapture") 2 | set(DEPTHSENSE_SDK "/opt/softkinetic/DepthSenseSDK") 3 | set(CMAKE_CXX_FLAGS "-O0 -fPIC") 4 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 5 | project(${EXE_NAME}) 6 | include_directories("${DEPTHSENSE_SDK}/include" ${LIBPERCEPT} "/usr/include/python2.7") 7 | link_directories("${DEPTHSENSE_SDK}/lib" "/usr/local/lib") 8 | find_package(PythonLibs) 9 | add_library(${EXE_NAME} SHARED depthsense.cxx initdepthsense.cxx) 10 | target_link_libraries(${EXE_NAME} 11 | python2.7 12 | DepthSense 13 | DepthSensePlugins) 14 | -------------------------------------------------------------------------------- /src/util/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/util/cameradevice.py: -------------------------------------------------------------------------------- 1 | """Basis for depth camera devices. 2 | 3 | CameraDevice provides interface for managing depth cameras. 4 | It can be used to retrieve basic information and read 5 | depth and color frames. 6 | 7 | Copyright 2015 Markus Oberweger, ICG, 8 | Graz University of Technology 9 | 10 | This file is part of DeepPrior. 11 | 12 | DeepPrior is free software: you can redistribute it and/or modify 13 | it under the terms of the GNU General Public License as published by 14 | the Free Software Foundation, either version 3 of the License, or 15 | (at your option) any later version. 16 | 17 | DeepPrior is distributed in the hope that it will be useful, 18 | but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | GNU General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License 23 | along with DeepPrior. If not, see . 24 | """ 25 | 26 | import numpy 27 | import cv2 28 | import scipy.misc 29 | import lib_dscapture as dsc 30 | import openni 31 | 32 | __author__ = "Markus Oberweger " 33 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 34 | __credits__ = ["Markus Oberweger"] 35 | __license__ = "GPL" 36 | __version__ = "1.0" 37 | __maintainer__ = "Markus Oberweger" 38 | __email__ = "oberweger@icg.tugraz.at" 39 | __status__ = "Development" 40 | 41 | 42 | class CameraDevice(object): 43 | """ 44 | Abstract class that handles all camera devices 45 | """ 46 | 47 | def __init__(self, mirror=False): 48 | """ 49 | Initialize device 50 | :param mirror: mirror all images 51 | :return: None 52 | """ 53 | 54 | self.mirror = mirror 55 | 56 | def start(self): 57 | """ 58 | Start device 59 | :return: None 60 | """ 61 | raise NotImplementedError("!") 62 | 63 | def stop(self): 64 | """ 65 | Stop device 66 | :return: None 67 | """ 68 | raise NotImplementedError("!") 69 | 70 | def saveDepth(self, data, file_name): 71 | """ 72 | Save data to file, we need special treatment because we have 16bit depth 73 | :param data: data 74 | :param file_name: file name 75 | :return: None 76 | """ 77 | 78 | im = scipy.misc.toimage(data.astype('uint16'), high=numpy.max(data), low=numpy.min(data), mode='I') 79 | im.save(file_name+'.png') 80 | # read with: b = scipy.misc.imread('my16bit.png') 81 | 82 | def getDepth(self): 83 | """ 84 | Return a median smoothed depth image 85 | :return: depth data as numpy array 86 | """ 87 | raise NotImplementedError("!") 88 | 89 | def getColor(self): 90 | """ 91 | Return a bit color image 92 | :return: color image as numpy array 93 | """ 94 | raise NotImplementedError("!") 95 | 96 | def getGrayScale(self): 97 | """ 98 | Return a grayscale image 99 | :return: grayscale image as numpy array 100 | """ 101 | raise NotImplementedError("!") 102 | 103 | def getLastColorNum(self): 104 | """ 105 | Get frame number of last color frame 106 | :return: frame number 107 | """ 108 | raise NotImplementedError("!") 109 | 110 | def getLastDepthNum(self): 111 | """ 112 | Get frame number of last depth frame 113 | :return: frame number 114 | """ 115 | raise NotImplementedError("!") 116 | 117 | def getDepthIntrinsics(self): 118 | """ 119 | Get intrinsic matrix of depth camera 120 | :return: 3x3 intrinsic camera matrix 121 | """ 122 | raise NotImplementedError("!") 123 | 124 | 125 | class CreativeCameraDevice(CameraDevice): 126 | """ DepthSense camera class, for Creative Gesture Camera, DS325, etc.""" 127 | 128 | def __init__(self, mirror=False): 129 | """ 130 | Initialize device 131 | :param mirror: mirror image 132 | """ 133 | 134 | super(CreativeCameraDevice, self).__init__(mirror) 135 | 136 | def start(self): 137 | """ 138 | Start device 139 | :return: None 140 | """ 141 | dsc.start() 142 | 143 | def stop(self): 144 | """ 145 | Stop device 146 | :return: None 147 | """ 148 | dsc.stop() 149 | 150 | def getDepth(self): 151 | """ 152 | Return a median smoothed depth image 153 | :return: depth data as numpy array 154 | """ 155 | 156 | if self.mirror: 157 | depth = dsc.getDepthMap()[:, ::-1] 158 | else: 159 | depth = dsc.getDepthMap() 160 | depth = cv2.medianBlur(depth, 3) 161 | return (numpy.count_nonzero(depth) != 0), numpy.asarray(depth, numpy.float32) 162 | 163 | def getColor(self): 164 | """ 165 | Return a bit color image 166 | :return: color image as numpy array 167 | """ 168 | 169 | if self.mirror: 170 | image = dsc.getColorMap()[:, ::-1, :] 171 | else: 172 | image = dsc.getColorMap() 173 | return (numpy.count_nonzero(image) != 0), image 174 | 175 | def getGrayScale(self): 176 | """ 177 | Return a grayscale image 178 | :return: grayscale image as numpy array 179 | """ 180 | 181 | if self.mirror: 182 | image = dsc.getColorMap()[:, ::-1, :] 183 | else: 184 | image = dsc.getColorMap() 185 | grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 186 | return (numpy.count_nonzero(grey) != 0), grey.transpose() 187 | 188 | def getLastColorNum(self): 189 | """ 190 | Get frame number of last color frame 191 | :return: frame number 192 | """ 193 | return dsc.getLastColorNum() 194 | 195 | def getLastDepthNum(self): 196 | """ 197 | Get frame number of last depth frame 198 | :return: frame number 199 | """ 200 | return dsc.getLastDepthNum() 201 | 202 | def getDepthIntrinsics(self): 203 | """ 204 | Get intrinsic matrix of depth camera 205 | :return: 3x3 intrinsic camera matrix 206 | """ 207 | 208 | return dsc.getDepthIntrinsics() 209 | 210 | 211 | class DepthSenseCameraDevice(CameraDevice): 212 | """ 213 | Class for OpenNI based devices, e.g. Kinect, Asus Xtion 214 | """ 215 | 216 | def __init__(self, mirror=False): 217 | """ 218 | Initialize device 219 | :param mirror: mirror image 220 | """ 221 | 222 | super(DepthSenseCameraDevice, self).__init__(mirror) 223 | 224 | def start(self): 225 | """ 226 | Stop device 227 | :return: None 228 | """ 229 | self.ctx = openni.Context() 230 | self.ctx.init() 231 | 232 | # Create a depth generator 233 | self.depth = openni.DepthGenerator() 234 | self.depth.create(self.ctx) 235 | 236 | # Set it to VGA maps at 30 FPS 237 | self.depth.set_resolution_preset(openni.RES_VGA) 238 | self.depth.fps = 30 239 | 240 | # Start generating 241 | self.ctx.start_generating_all() 242 | 243 | def stop(self): 244 | """ 245 | Stop device 246 | :return: None 247 | """ 248 | 249 | self.ctx.stop_generating_all() 250 | self.ctx.shutdown() 251 | 252 | def getDepth(self): 253 | """ 254 | Return a median smoothed depth image 255 | :return: depth data as numpy array 256 | """ 257 | 258 | # Get the pixel at these coordinates 259 | try: 260 | # Wait for new data to be available 261 | self.ctx.wait_one_update_all(self.depth) 262 | except openni.OpenNIError, err: 263 | print "Failed updating data:", err 264 | else: 265 | dpt = numpy.asarray(self.depth.get_tuple_depth_map(), dtype='float32').reshape(self.depth.map.height, self.depth.map.width) 266 | 267 | return True, dpt 268 | -------------------------------------------------------------------------------- /src/util/depthsense.cxx: -------------------------------------------------------------------------------- 1 | /* 2 | * DepthSense SDK for Python and SimpleCV 3 | * ----------------------------------------------------------------------------- 4 | * file: depthsense.cxx 5 | * author: Abdi Dahir 6 | * modified: May 9 2014 7 | * vim: set fenc=utf-8:ts=4:sw=4:expandtab: 8 | * 9 | * Python hooks happen here. This is the main file. 10 | * ----------------------------------------------------------------------------- 11 | */ 12 | 13 | // Python Module includes 14 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 15 | #include 16 | #include 17 | 18 | // MS completly untested 19 | #ifdef _MSC_VER 20 | #include 21 | #endif 22 | 23 | // C includes 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | // C++ includes 32 | #include 33 | #include 34 | #include 35 | //#include 36 | 37 | // Application includes 38 | #include "initdepthsense.h" 39 | 40 | // internal map copies 41 | uint8_t colourMapClone[640*480*3]; 42 | int16_t depthMapClone[320*240]; 43 | int16_t vertexMapClone[320*240*3]; 44 | float accelMapClone[3]; 45 | float uvMapClone[320*240*2]; 46 | float vertexFMapClone[320*240*3]; 47 | uint8_t syncMapClone[320*240*3]; 48 | 49 | uint8_t depthColouredMapClone[320*240*3]; 50 | 51 | using namespace std; 52 | 53 | void buildSyncMap() 54 | { 55 | int ci, cj; 56 | uint8_t colx; 57 | uint8_t coly; 58 | uint8_t colz; 59 | float uvx; 60 | float uvy; 61 | 62 | for(int i=0; i < dH; i++) { 63 | for(int j=0; j < dW; j++) { 64 | uvx = uvMapClone[i*dW*2 + j*2 + 0]; 65 | uvy = uvMapClone[i*dW*2 + j*2 + 1]; 66 | colx = 0; 67 | coly = 0; 68 | colz = 0; 69 | 70 | if((uvx > 0 && uvx < 1 && uvy > 0 && uvy < 1) && 71 | (depthMapClone[i*dW + j] < 32000)){ 72 | ci = (int) (uvy * ((float) cH)); 73 | cj = (int) (uvx * ((float) cW)); 74 | colx = colourMapClone[ci*cW*3 + cj*3 + 0]; 75 | coly = colourMapClone[ci*cW*3 + cj*3 + 1]; 76 | colz = colourMapClone[ci*cW*3 + cj*3 + 2]; 77 | } 78 | 79 | syncMapClone[i*dW*3 + j*3 + 0] = colx; 80 | syncMapClone[i*dW*3 + j*3 + 1] = coly; 81 | syncMapClone[i*dW*3 + j*3 + 2] = colz; 82 | 83 | } 84 | } 85 | } 86 | 87 | // Python Callbacks 88 | static PyObject *getColour(PyObject *self, PyObject *args) 89 | { 90 | npy_intp dims[3] = {cH, cW, 3}; 91 | 92 | memcpy(colourMapClone, colourFullMap, cshmsz*3); 93 | return PyArray_SimpleNewFromData(3, dims, NPY_UINT8, colourMapClone); 94 | } 95 | 96 | static PyObject *getDepth(PyObject *self, PyObject *args) 97 | { 98 | npy_intp dims[2] = {dH, dW}; 99 | 100 | memcpy(depthMapClone, depthFullMap, dshmsz); 101 | return PyArray_SimpleNewFromData(2, dims, NPY_INT16, depthMapClone); 102 | } 103 | 104 | static PyObject *getAccel(PyObject *self, PyObject *args) 105 | { 106 | npy_intp dims[1] = {3}; 107 | 108 | memcpy(accelMapClone, accelFullMap, 3*sizeof(float)); 109 | return PyArray_SimpleNewFromData(1, dims, NPY_FLOAT32, accelMapClone); 110 | } 111 | 112 | static PyObject *getVertex(PyObject *self, PyObject *args) 113 | { 114 | npy_intp dims[3] = {dH, dW, 3}; 115 | memcpy(vertexMapClone, vertexFullMap, vshmsz*3); 116 | return PyArray_SimpleNewFromData(3, dims, NPY_INT16, vertexMapClone); 117 | } 118 | 119 | static PyObject *getVertexFP(PyObject *self, PyObject *args) 120 | { 121 | npy_intp dims[3] = {dH, dW, 3}; 122 | memcpy(vertexFMapClone, vertexFFullMap, ushmsz*3); 123 | return PyArray_SimpleNewFromData(3, dims, NPY_FLOAT32, vertexFMapClone); 124 | } 125 | 126 | static PyObject *getUV(PyObject *self, PyObject *args) 127 | { 128 | npy_intp dims[3] = {dH, dW, 2}; 129 | memcpy(uvMapClone, uvFullMap, ushmsz*2); 130 | return PyArray_SimpleNewFromData(3, dims, NPY_FLOAT32, uvMapClone); 131 | } 132 | 133 | static PyObject *getSync(PyObject *self, PyObject *args) 134 | { 135 | npy_intp dims[3] = {dH, dW, 3}; 136 | 137 | memcpy(uvMapClone, uvFullMap, ushmsz*2); 138 | memcpy(colourMapClone, colourFullMap, cshmsz*3); 139 | memcpy(depthMapClone, depthFullMap, dshmsz); 140 | 141 | buildSyncMap(); 142 | return PyArray_SimpleNewFromData(3, dims, NPY_UINT8, syncMapClone); 143 | } 144 | 145 | static PyObject *getDepthIntrinsics(PyObject *self, PyObject *args) 146 | { 147 | npy_intp dims[2] = {3, 3}; 148 | 149 | return PyArray_SimpleNewFromData(2, dims, NPY_FLOAT32, g_dIntrinsics); 150 | } 151 | 152 | 153 | static PyObject *initDS(PyObject *self, PyObject *args) 154 | { 155 | initds(); 156 | return Py_None; 157 | } 158 | 159 | static PyObject *killDS(PyObject *self, PyObject *args) 160 | { 161 | killds(); 162 | return Py_None; 163 | } 164 | 165 | 166 | static PyObject *last_ctime(PyObject *self, PyObject *args) 167 | { 168 | return Py_BuildValue("i", 0); 169 | } 170 | 171 | 172 | static PyObject *last_cframe(PyObject *self, PyObject *args) 173 | { 174 | return Py_BuildValue("i", g_cFrames); 175 | } 176 | 177 | 178 | static PyObject *last_dtime(PyObject *self, PyObject *args) 179 | { 180 | return Py_BuildValue("i", 0); 181 | } 182 | 183 | 184 | static PyObject *last_dframe(PyObject *self, PyObject *args) 185 | { 186 | return Py_BuildValue("i", g_dFrames); 187 | } 188 | 189 | 190 | static PyMethodDef DepthSenseMethods[] = { 191 | // GET MAPS 192 | {"getDepthMap", getDepth, METH_VARARGS, "Get Depth Map"}, 193 | {"getColourMap", getColour, METH_VARARGS, "Get Colour Map"}, 194 | {"getVertices", getVertex, METH_VARARGS, "Get Vertex Map"}, 195 | {"getVerticesFP", getVertexFP, METH_VARARGS, "Get Floating Point Vertex Map"}, 196 | {"getUVMap", getUV, METH_VARARGS, "Get UV Map"}, 197 | {"getSyncMap", getSync, METH_VARARGS, "Get Colour Overlay Map"}, 198 | {"getAcceleration", getAccel, METH_VARARGS, "Get Acceleration"}, 199 | {"getLastColorNum", last_cframe, METH_VARARGS, "Get last color frame number"}, 200 | {"getLastColorTime", last_ctime, METH_VARARGS, "Get last color frame time"}, 201 | {"getLastDepthNum", last_dframe, METH_VARARGS, "Get last depth frame number"}, 202 | {"getLastDepthTime", last_dtime, METH_VARARGS, "Get last depth frame time"}, 203 | {"getDepthIntrinsics", getDepthIntrinsics, METH_VARARGS, "Get intrinsics of depth camera"}, 204 | // CREATE MODULE 205 | {"start", initDS, METH_VARARGS, "Init DepthSense"}, 206 | {"stop", killDS, METH_VARARGS, "Kill DepthSense"}, 207 | {NULL, NULL, 0, NULL} /* Sentinel */ 208 | }; 209 | 210 | 211 | PyMODINIT_FUNC initlib_dscapture(void) 212 | { 213 | (void) Py_InitModule("lib_dscapture", DepthSenseMethods); 214 | // Clean up forked process, attach it to the python exit hook 215 | (void) Py_AtExit(killds); 216 | import_array(); 217 | } 218 | 219 | int main(int argc, char* argv[]) 220 | { 221 | 222 | /* Pass argv[0] to the Python interpreter */ 223 | Py_SetProgramName((char *)"DepthSense"); 224 | 225 | /* Initialize the Python interpreter. Required. */ 226 | Py_Initialize(); 227 | 228 | /* Add a static module */ 229 | initlib_dscapture(); 230 | 231 | //initds(); //for testing 232 | 233 | return 0; 234 | } 235 | -------------------------------------------------------------------------------- /src/util/helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the file for diverse helper functions. 3 | 4 | Copyright 2015 Markus Oberweger, ICG, 5 | Graz University of Technology 6 | 7 | This file is part of DeepPrior. 8 | 9 | DeepPrior is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | DeepPrior is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with DeepPrior. If not, see . 21 | """ 22 | 23 | import numpy 24 | 25 | __author__ = "Markus Oberweger " 26 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 27 | __credits__ = ["Markus Oberweger"] 28 | __license__ = "GPL" 29 | __version__ = "1.0" 30 | __maintainer__ = "Markus Oberweger" 31 | __email__ = "oberweger@icg.tugraz.at" 32 | __status__ = "Development" 33 | 34 | 35 | def ReLU(x): 36 | """ 37 | Rectified linear unit 38 | :param x: input value 39 | :return: max(x, 0) 40 | """ 41 | import theano.tensor as T 42 | return T.nnet.relu(x, 0) 43 | 44 | 45 | def LeakyReLU(a=0.33): 46 | """ 47 | Leaky rectified linear unit with different scale 48 | :param a: scale 49 | :return: max(x, a*x) 50 | """ 51 | import theano.tensor as T 52 | 53 | def inner(x): 54 | return T.nnet.relu(x, a) 55 | return inner 56 | 57 | 58 | def InvReLU(x): 59 | """ 60 | Rectified linear unit 61 | :param x: input value 62 | :return: max(x,0) 63 | """ 64 | import theano.tensor as T 65 | x *= -1. 66 | return T.switch(x < 0, 0, x) 67 | 68 | 69 | def TruncLin(x): 70 | """ 71 | Truncated linear unit 72 | :param x: input value 73 | :return: max(min(x,1),-1) 74 | """ 75 | import theano.tensor as T 76 | return T.switch(x < -1, -1, T.switch(x > 1, 1, x)) 77 | 78 | 79 | def TruncReLU(x): 80 | """ 81 | Truncated rectified linear unit 82 | :param x: input value 83 | :return: max(min(x,1),0) 84 | """ 85 | import theano.tensor as T 86 | return T.switch(x < 0, 0, T.switch(x > 1, 1, x)) 87 | 88 | 89 | def SlopeLin(slope): 90 | """ 91 | Linear unit with different slopes 92 | :param slope: slope of negative quadrant 93 | :return: x if x > 0 else x/slope 94 | """ 95 | import theano.tensor as T 96 | 97 | def inner(x): 98 | return T.switch(T.gt(x, 0), x, T.true_div(x, slope)) 99 | return inner 100 | 101 | 102 | def SlopeLinInv(slope): 103 | """ 104 | Truncated linear unit 105 | :param slope: slope of negative quadrant 106 | :return: x if x > 0 else x*slope 107 | """ 108 | import theano.tensor as T 109 | 110 | def inner(x): 111 | return T.switch(T.gt(x, 0), x, T.mul(x, slope)) 112 | return inner 113 | 114 | 115 | def SlopeLin2(x, slope): 116 | """ 117 | Linear unit with different slopes 118 | :param slope: slope of negative quadrant 119 | :return: x if x > 0 else x/slope 120 | """ 121 | 122 | import theano.tensor as T 123 | return T.switch(T.gt(x, 0), x, T.true_div(x, slope)) 124 | 125 | 126 | def huber(delta): 127 | """ 128 | Huber loss, robust at 0 129 | :param delta: delta parameter 130 | :return: loss value 131 | """ 132 | import theano.tensor as T 133 | 134 | def inner(target, output): 135 | d = target - output 136 | a = .5 * d**2 137 | b = delta * (T.abs_(d) - delta / 2.) 138 | l = T.switch(T.abs_(d) <= delta, a, b) 139 | return l 140 | return inner 141 | 142 | 143 | def cartesian(arrays, out=None): 144 | """ 145 | Generate a cartesian product of input arrays. 146 | 147 | Parameters 148 | ---------- 149 | arrays : list of array-like 150 | 1-D arrays to form the cartesian product of. 151 | out : ndarray 152 | Array to place the cartesian product in. 153 | 154 | Returns 155 | ------- 156 | out : ndarray 157 | 2-D array of shape (M, len(arrays)) containing cartesian products 158 | formed of input arrays. 159 | 160 | Examples 161 | -------- 162 | >>> cartesian(([1, 2, 3], [4, 5], [6, 7])) 163 | array([[1, 4, 6], 164 | [1, 4, 7], 165 | [1, 5, 6], 166 | [1, 5, 7], 167 | [2, 4, 6], 168 | [2, 4, 7], 169 | [2, 5, 6], 170 | [2, 5, 7], 171 | [3, 4, 6], 172 | [3, 4, 7], 173 | [3, 5, 6], 174 | [3, 5, 7]]) 175 | 176 | """ 177 | 178 | # https://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays 179 | arrays = [numpy.asarray(x) for x in arrays] 180 | dtype = arrays[0].dtype 181 | 182 | n = numpy.prod([x.size for x in arrays]) 183 | if out is None: 184 | out = numpy.zeros([n, len(arrays)], dtype=dtype) 185 | 186 | m = n / arrays[0].size 187 | out[:, 0] = numpy.repeat(arrays[0], m) 188 | if arrays[1:]: 189 | cartesian(arrays[1:], out=out[0:m, 1:]) 190 | for j in xrange(1, arrays[0].size): 191 | out[j*m:(j+1)*m, 1:] = out[0:m, 1:] 192 | return out 193 | 194 | 195 | def shuffle_many_inplace(arrays, random_state=None): 196 | """ 197 | Shuffle given list of array consistently along first dimension 198 | :param arrays: list of arrays 199 | :param random_state: random state or seed 200 | :return: None 201 | """ 202 | 203 | if random_state is None: 204 | rng = numpy.random.mtrand._rand 205 | elif isinstance(random_state, numpy.random.RandomState): 206 | rng = random_state 207 | else: 208 | raise ValueError("random_state must be None or numpy RandomState") 209 | 210 | assert all(i.shape[0] == arrays[0].shape[0] for i in arrays[1:]) 211 | 212 | # Fisher-Yates Shuffle 213 | for oi in reversed(range(1, arrays[0].shape[0])): 214 | ni = rng.randint(oi+1) 215 | for a in arrays: 216 | a[[oi, ni]] = a[[ni, oi]] 217 | 218 | 219 | def gaussian_kernel(kernel_shape, sigma=None): 220 | """ 221 | Get 2D Gaussian kernel 222 | :param kernel_shape: kernel size 223 | :param sigma: sigma of Gaussian distribution 224 | :return: 2D Gaussian kernel 225 | """ 226 | kern = numpy.zeros((kernel_shape, kernel_shape), dtype='float32') 227 | 228 | # get sigma from kernel size 229 | if sigma is None: 230 | sigma = 0.3*((kernel_shape-1.)*0.5 - 1.) + 0.8 231 | 232 | def gauss(x, y, s): 233 | Z = 2. * numpy.pi * s ** 2. 234 | return 1. / Z * numpy.exp(-(x ** 2. + y ** 2.) / (2. * s ** 2.)) 235 | 236 | mid = numpy.floor(kernel_shape / 2.) 237 | for i in xrange(0, kernel_shape): 238 | for j in xrange(0, kernel_shape): 239 | kern[i, j] = gauss(i - mid, j - mid, sigma) 240 | 241 | return kern / kern.sum() 242 | -------------------------------------------------------------------------------- /src/util/initdepthsense.cxx: -------------------------------------------------------------------------------- 1 | /* 2 | * DepthSense SDK for Python and SimpleCV 3 | * ----------------------------------------------------------------------------- 4 | * file: depthsense.cxx 5 | * author: Abdi Dahir 6 | * modified: May 9 2014 7 | * vim: set fenc=utf-8:ts=4:sw=4:expandtab: 8 | * 9 | * DepthSense hooks happen here. Initializes camera and buffers. 10 | * ----------------------------------------------------------------------------- 11 | */ 12 | 13 | // MS completly untested 14 | #ifdef _MSC_VER 15 | #include 16 | #endif 17 | 18 | // C includes 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | // C++ includes 29 | #include 30 | #include 31 | #include 32 | #include 33 | //#include 34 | 35 | // DepthSense SDK includes 36 | #include 37 | 38 | // Application includes 39 | #include "initdepthsense.h" 40 | 41 | using namespace DepthSense; 42 | using namespace std; 43 | 44 | // depth sense node inits 45 | static Context g_context; 46 | static DepthNode g_dnode; 47 | static ColorNode g_cnode; 48 | static AudioNode g_anode; 49 | 50 | static bool g_bDeviceFound = false; 51 | 52 | // frame counters 53 | uint32_t g_aFrames = 0; 54 | uint32_t g_cFrames = 0; 55 | uint32_t g_dFrames = 0; 56 | 57 | // shared mem 58 | int16_t *depthMap; 59 | int16_t *depthFullMap; 60 | 61 | int16_t *vertexMap; 62 | int16_t *vertexFullMap; 63 | 64 | uint8_t *colourMap; 65 | uint8_t *colourFullMap; 66 | 67 | float *uvMap; 68 | float *uvFullMap; 69 | 70 | float *vertexFMap; 71 | float *vertexFFullMap; 72 | 73 | float *accelMap; 74 | float *accelFullMap; 75 | 76 | // proc mem 77 | int16_t * normalMap; 78 | int16_t * dxMap; 79 | int16_t * dyMap; 80 | int16_t * diffMap; 81 | int16_t * diffResult; 82 | int16_t * normalResult; 83 | 84 | // intrinsics 85 | float g_dIntrinsics[9] = {1.,0.,0., 0.,1.,0., 0.,0.,1.}; 86 | 87 | // clean up 88 | int child_pid = 0; 89 | 90 | // can't write atomic op but i can atleast do a swap 91 | static void uptrSwap (uint8_t **pa, uint8_t **pb){ 92 | uint8_t *temp = *pa; 93 | *pa = *pb; 94 | *pb = temp; 95 | } 96 | 97 | static void fptrSwap (float **pa, float **pb){ 98 | float *temp = *pa; 99 | *pa = *pb; 100 | *pb = temp; 101 | } 102 | 103 | static void iptrSwap (int16_t **pa, int16_t **pb){ 104 | int16_t *temp = *pa; 105 | *pa = *pb; 106 | *pb = temp; 107 | } 108 | 109 | /*----------------------------------------------------------------------------*/ 110 | // New audio sample event handler 111 | static void onNewAudioSample(AudioNode node, AudioNode::NewSampleReceivedData data) 112 | { 113 | //printf("A#%u: %d\n",g_aFrames,data.audioData.size()); 114 | g_aFrames++; 115 | } 116 | 117 | /*----------------------------------------------------------------------------*/ 118 | // New color sample event handler 119 | static void onNewColorSample(ColorNode node, ColorNode::NewSampleReceivedData data) 120 | { 121 | //printf("C#%u: %d\n",g_cFrames,data.colorMap.size()); 122 | memcpy(colourMap, data.colorMap, 3*cshmsz); 123 | uptrSwap(&colourMap, &colourFullMap); 124 | g_cFrames++; 125 | } 126 | 127 | /*----------------------------------------------------------------------------*/ 128 | // New depth sample event handler 129 | static void onNewDepthSample(DepthNode node, DepthNode::NewSampleReceivedData data) 130 | { 131 | // Depth 132 | memcpy(depthMap, data.depthMap, dshmsz); 133 | iptrSwap(&depthMap, &depthFullMap); 134 | 135 | // Verticies 136 | Vertex vertex; 137 | FPVertex fvertex; 138 | for(int i=0; i < dH; i++) { 139 | for(int j=0; j < dW; j++) { 140 | vertex = data.vertices[i*dW + j]; 141 | fvertex = data.verticesFloatingPoint[i*dW + j]; 142 | 143 | vertexMap[i*dW*3 + j*3 + 0] = vertex.x; 144 | vertexMap[i*dW*3 + j*3 + 1] = vertex.y; 145 | vertexMap[i*dW*3 + j*3 + 2] = vertex.z; 146 | 147 | vertexFMap[i*dW*3 + j*3 + 0] = fvertex.x; 148 | vertexFMap[i*dW*3 + j*3 + 1] = fvertex.y; 149 | vertexFMap[i*dW*3 + j*3 + 2] = fvertex.z; 150 | //cout << vertex.x << vertex.y << vertex.z << endl; 151 | //cout << fvertex.x << fvertex.y << fvertex.z << endl; 152 | 153 | } 154 | } 155 | 156 | iptrSwap(&vertexMap, &vertexFullMap); 157 | fptrSwap(&vertexFMap, &vertexFFullMap); 158 | 159 | // uv 160 | UV uv; 161 | for(int i=0; i < dH; i++) { 162 | for(int j=0; j < dW; j++) { 163 | uv = data.uvMap[i*dW + j]; 164 | uvMap[i*dW*2 + j*2 + 0] = uv.u; 165 | uvMap[i*dW*2 + j*2 + 1] = uv.v; 166 | //cout << uv.u << uv.v << endl; 167 | 168 | } 169 | } 170 | 171 | fptrSwap(&uvMap, &uvFullMap); 172 | 173 | // Acceleration 174 | accelMap[0] = data.acceleration.x; 175 | accelMap[1] = data.acceleration.y; 176 | accelMap[2] = data.acceleration.z; 177 | 178 | fptrSwap(&accelMap, &accelFullMap); 179 | 180 | g_dFrames++; 181 | 182 | // intrinsic parameters 183 | StereoCameraParameters stereo_param; 184 | stereo_param = data.stereoCameraParameters; 185 | g_dIntrinsics[0] = stereo_param.depthIntrinsics.fx; //the focal length along the x axis, expressed in pixel units 186 | g_dIntrinsics[2] = stereo_param.depthIntrinsics.cx; //the central point along the x axis, expressed in pixel units 187 | g_dIntrinsics[4] = stereo_param.depthIntrinsics.fy; //the focal length along the y axis, expressed in pixel units 188 | g_dIntrinsics[5] = stereo_param.depthIntrinsics.cy; //the central point along the y axis, expressed in pixel units 189 | } 190 | 191 | /*----------------------------------------------------------------------------*/ 192 | static void configureAudioNode() 193 | { 194 | g_anode.newSampleReceivedEvent().connect(&onNewAudioSample); 195 | 196 | AudioNode::Configuration config = g_anode.getConfiguration(); 197 | config.sampleRate = 44100; 198 | 199 | try 200 | { 201 | g_context.requestControl(g_anode,0); 202 | 203 | g_anode.setConfiguration(config); 204 | 205 | g_anode.setInputMixerLevel(0.5f); 206 | } 207 | catch (ArgumentException& e) 208 | { 209 | printf("Argument Exception: %s\n",e.what()); 210 | } 211 | catch (UnauthorizedAccessException& e) 212 | { 213 | printf("Unauthorized Access Exception: %s\n",e.what()); 214 | } 215 | catch (ConfigurationException& e) 216 | { 217 | printf("Configuration Exception: %s\n",e.what()); 218 | } 219 | catch (StreamingException& e) 220 | { 221 | printf("Streaming Exception: %s\n",e.what()); 222 | } 223 | catch (TimeoutException&) 224 | { 225 | printf("TimeoutException\n"); 226 | } 227 | } 228 | 229 | /*----------------------------------------------------------------------------*/ 230 | static void configureDepthNode() 231 | { 232 | g_dnode.newSampleReceivedEvent().connect(&onNewDepthSample); 233 | 234 | DepthNode::Configuration config = g_dnode.getConfiguration(); 235 | config.frameFormat = FRAME_FORMAT_QVGA; 236 | config.framerate = 30; 237 | config.mode = DepthNode::CAMERA_MODE_CLOSE_MODE; 238 | config.saturation = true; 239 | 240 | try 241 | { 242 | g_context.requestControl(g_dnode,0); 243 | g_dnode.setConfidenceThreshold(100); 244 | 245 | g_dnode.setEnableDepthMap(true); 246 | g_dnode.setEnableVertices(true); 247 | g_dnode.setEnableVerticesFloatingPoint(true); 248 | g_dnode.setEnableAccelerometer(true); 249 | g_dnode.setEnableUvMap(true); 250 | 251 | g_dnode.setConfiguration(config); 252 | 253 | } 254 | catch (ArgumentException& e) 255 | { 256 | printf("Argument Exception: %s\n",e.what()); 257 | } 258 | catch (UnauthorizedAccessException& e) 259 | { 260 | printf("Unauthorized Access Exception: %s\n",e.what()); 261 | } 262 | catch (IOException& e) 263 | { 264 | printf("IO Exception: %s\n",e.what()); 265 | } 266 | catch (InvalidOperationException& e) 267 | { 268 | printf("Invalid Operation Exception: %s\n",e.what()); 269 | } 270 | catch (ConfigurationException& e) 271 | { 272 | printf("Configuration Exception: %s\n",e.what()); 273 | } 274 | catch (StreamingException& e) 275 | { 276 | printf("Streaming Exception: %s\n",e.what()); 277 | } 278 | catch (TimeoutException&) 279 | { 280 | printf("TimeoutException\n"); 281 | } 282 | 283 | } 284 | 285 | /*----------------------------------------------------------------------------*/ 286 | static void configureColorNode() 287 | { 288 | 289 | // connect new color sample handler 290 | g_cnode.newSampleReceivedEvent().connect(&onNewColorSample); 291 | 292 | ColorNode::Configuration config = g_cnode.getConfiguration(); 293 | config.frameFormat = FRAME_FORMAT_VGA; 294 | config.compression = COMPRESSION_TYPE_MJPEG; 295 | config.powerLineFrequency = POWER_LINE_FREQUENCY_50HZ; 296 | config.framerate = 30; 297 | 298 | g_cnode.setEnableColorMap(true); 299 | 300 | try 301 | { 302 | g_context.requestControl(g_cnode,0); 303 | 304 | g_cnode.setConfiguration(config); 305 | g_cnode.setBrightness(0); 306 | g_cnode.setContrast(5); 307 | g_cnode.setSaturation(5); 308 | g_cnode.setHue(0); 309 | g_cnode.setGamma(3); 310 | g_cnode.setWhiteBalance(4650); 311 | g_cnode.setSharpness(5); 312 | g_cnode.setWhiteBalanceAuto(true); 313 | 314 | 315 | } 316 | catch (ArgumentException& e) 317 | { 318 | printf("Argument Exception: %s\n",e.what()); 319 | } 320 | catch (UnauthorizedAccessException& e) 321 | { 322 | printf("Unauthorized Access Exception: %s\n",e.what()); 323 | } 324 | catch (IOException& e) 325 | { 326 | printf("IO Exception: %s\n",e.what()); 327 | } 328 | catch (InvalidOperationException& e) 329 | { 330 | printf("Invalid Operation Exception: %s\n",e.what()); 331 | } 332 | catch (ConfigurationException& e) 333 | { 334 | printf("Configuration Exception: %s\n",e.what()); 335 | } 336 | catch (StreamingException& e) 337 | { 338 | printf("Streaming Exception: %s\n",e.what()); 339 | } 340 | catch (TimeoutException&) 341 | { 342 | printf("TimeoutException\n"); 343 | } 344 | 345 | } 346 | 347 | /*----------------------------------------------------------------------------*/ 348 | static void configureNode(Node node) 349 | { 350 | if ((node.is())&&(!g_dnode.isSet())) 351 | { 352 | g_dnode = node.as(); 353 | configureDepthNode(); 354 | g_context.registerNode(node); 355 | } 356 | 357 | if ((node.is())&&(!g_cnode.isSet())) 358 | { 359 | g_cnode = node.as(); 360 | configureColorNode(); 361 | g_context.registerNode(node); 362 | } 363 | 364 | if ((node.is())&&(!g_anode.isSet())) 365 | { 366 | g_anode = node.as(); 367 | configureAudioNode(); 368 | // Audio seems to take up bandwith on usb3.0 devices ... we'll make this a param 369 | //g_context.registerNode(node); 370 | } 371 | } 372 | 373 | /*----------------------------------------------------------------------------*/ 374 | static void onNodeConnected(Device device, Device::NodeAddedData data) 375 | { 376 | configureNode(data.node); 377 | } 378 | 379 | /*----------------------------------------------------------------------------*/ 380 | static void onNodeDisconnected(Device device, Device::NodeRemovedData data) 381 | { 382 | if (data.node.is() && (data.node.as() == g_anode)) 383 | g_anode.unset(); 384 | if (data.node.is() && (data.node.as() == g_cnode)) 385 | g_cnode.unset(); 386 | if (data.node.is() && (data.node.as() == g_dnode)) 387 | g_dnode.unset(); 388 | printf("Node disconnected\n"); 389 | } 390 | 391 | /*----------------------------------------------------------------------------*/ 392 | static void onDeviceConnected(Context context, Context::DeviceAddedData data) 393 | { 394 | if (!g_bDeviceFound) 395 | { 396 | data.device.nodeAddedEvent().connect(&onNodeConnected); 397 | data.device.nodeRemovedEvent().connect(&onNodeDisconnected); 398 | g_bDeviceFound = true; 399 | } 400 | } 401 | 402 | /*----------------------------------------------------------------------------*/ 403 | static void onDeviceDisconnected(Context context, Context::DeviceRemovedData data) 404 | { 405 | g_bDeviceFound = false; 406 | printf("Device disconnected\n"); 407 | } 408 | 409 | void killds() 410 | { 411 | if (child_pid !=0) { 412 | cout << "DEPTHSENSE SHUTDOWN INPROGRESS ..." << endl; 413 | kill(child_pid, SIGTERM); 414 | munmap(depthMap, dshmsz); 415 | munmap(depthFullMap, dshmsz); 416 | munmap(colourMap, cshmsz*3); 417 | munmap(colourFullMap, cshmsz*3); 418 | munmap(vertexMap, vshmsz*3); 419 | munmap(vertexFullMap, vshmsz*3); 420 | munmap(vertexFMap, ushmsz*3); 421 | munmap(vertexFFullMap, ushmsz*3); 422 | munmap(uvMap, ushmsz*2); 423 | munmap(uvMap, ushmsz*2); 424 | munmap(uvFullMap, ushmsz*2); 425 | cout << "DEPTHSENSE SHUTDOWN SUCCESSFUL" << endl; 426 | } 427 | } 428 | 429 | 430 | static void * initmap(int sz) 431 | { 432 | void * map; 433 | if ((map = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0)) == MAP_FAILED) { 434 | perror("mmap: cannot alloc shmem;"); 435 | exit(1); 436 | } 437 | 438 | return map; 439 | } 440 | 441 | static void * initblock(int sz) 442 | { 443 | void * block; 444 | if ((block = malloc(sz)) == NULL) { 445 | perror("malloc: cannot alloc mem;"); 446 | exit(1); 447 | } 448 | 449 | return block; 450 | } 451 | 452 | void initds() 453 | { 454 | cout << "DEPTHSENSE STARTUP INPROGRESS ..." << endl; 455 | // shared mem double buffers 456 | depthMap = (int16_t *) initmap(dshmsz); 457 | depthFullMap = (int16_t *) initmap(dshmsz); 458 | 459 | accelMap = (float *) initmap(3*sizeof(float)); 460 | accelFullMap = (float *) initmap(3*sizeof(float)); 461 | 462 | colourMap = (uint8_t *) initmap(cshmsz*3); 463 | colourFullMap = (uint8_t *) initmap(cshmsz*3); 464 | 465 | vertexMap = (int16_t *) initmap(vshmsz*3); 466 | vertexFullMap = (int16_t *) initmap(vshmsz*3); 467 | 468 | uvMap = (float *) initmap(ushmsz*2); 469 | uvFullMap = (float *) initmap(ushmsz*2); 470 | 471 | vertexFMap = (float *) initmap(ushmsz*3); 472 | vertexFFullMap = (float *) initmap(ushmsz*3); 473 | 474 | child_pid = fork(); 475 | 476 | // child goes into loop 477 | if (child_pid == 0) { 478 | g_context = Context::createStandalone(); 479 | // TODO: Support multiple cameras ... standalone mode forces 480 | // a single session, can instead create a server once and join 481 | // to that server each time. Allow a list of devices 482 | //g_context = Context::create("localhost"); 483 | g_context.deviceAddedEvent().connect(&onDeviceConnected); 484 | g_context.deviceRemovedEvent().connect(&onDeviceDisconnected); 485 | 486 | // Get the list of currently connected devices 487 | vector da = g_context.getDevices(); 488 | 489 | // We are only interested in the first device 490 | if (da.size() >= 1) 491 | { 492 | g_bDeviceFound = true; 493 | 494 | da[0].nodeAddedEvent().connect(&onNodeConnected); 495 | da[0].nodeRemovedEvent().connect(&onNodeDisconnected); 496 | 497 | vector na = da[0].getNodes(); 498 | 499 | for (int n = 0; n < (int)na.size();n++) 500 | configureNode(na[n]); 501 | } 502 | 503 | g_context.startNodes(); 504 | g_context.run(); 505 | 506 | //TODO: Proper clean up call context.quit() async in child proc somehow 507 | // Currently proc is just killed 508 | g_context.stopNodes(); 509 | 510 | if (g_cnode.isSet()) g_context.unregisterNode(g_cnode); 511 | if (g_dnode.isSet()) g_context.unregisterNode(g_dnode); 512 | if (g_anode.isSet()) g_context.unregisterNode(g_anode); 513 | 514 | exit(EXIT_SUCCESS); 515 | } 516 | 517 | cout << "DEPTHSENSE STARTUP SUCCESSFUL" << endl; 518 | 519 | } 520 | 521 | -------------------------------------------------------------------------------- /src/util/initdepthsense.h: -------------------------------------------------------------------------------- 1 | /* 2 | * DepthSense SDK for Python and SimpleCV 3 | * ----------------------------------------------------------------------------- 4 | * file: initdepthsense.h 5 | * author: Abdi Dahir 6 | * modified: May 9 2014 7 | * vim: set fenc=utf-8:ts=4:sw=4:expandtab: 8 | * 9 | * Imagebuffers defined here along with the depthsense start/stop ops 10 | * ----------------------------------------------------------------------------- 11 | */ 12 | 13 | #include 14 | 15 | // map dimensions 16 | static int32_t dW = 320; 17 | static int32_t dH = 240; 18 | static int32_t cW = 640; 19 | static int32_t cH = 480; 20 | 21 | static int dshmsz = dW*dH*sizeof(int16_t); 22 | static int cshmsz = cW*cH*sizeof(uint8_t); 23 | static int vshmsz = dW*dH*sizeof(int16_t); 24 | static int ushmsz = dW*dH*sizeof(float); 25 | static int hshmsz = dW*dH*sizeof(uint8_t); 26 | 27 | // shared mem depth maps 28 | extern int16_t *depthMap; 29 | extern int16_t *depthFullMap; 30 | 31 | // shared mem vertex maps 32 | extern int16_t *vertexMap; 33 | extern int16_t *vertexFullMap; 34 | 35 | extern float *vertexFMap; 36 | extern float *vertexFFullMap; 37 | 38 | // shared mem colour maps 39 | extern uint8_t *colourMap; 40 | extern uint8_t *colourFullMap; 41 | 42 | // shared mem accel maps 43 | extern float *accelMap; 44 | extern float *accelFullMap; 45 | 46 | // shared mem uv maps 47 | extern float *uvMap; 48 | extern float *uvFullMap; 49 | 50 | // frame counters 51 | extern uint32_t g_aFrames; 52 | extern uint32_t g_cFrames; 53 | extern uint32_t g_dFrames; 54 | 55 | // intrinsics 56 | extern float g_dIntrinsics[9]; 57 | 58 | extern "C" { 59 | void killds(); 60 | void initds(); 61 | } 62 | -------------------------------------------------------------------------------- /src/util/vtkpointcloud.py: -------------------------------------------------------------------------------- 1 | """Provides class for handling point clouds in VTK. 2 | 3 | VtkPointCloud resembles a point cloud for display in VTK. 4 | Use to manage the 3D points. 5 | 6 | Copyright 2015 Markus Oberweger, ICG, 7 | Graz University of Technology 8 | 9 | This file is part of DeepPrior. 10 | 11 | DeepPrior is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | DeepPrior is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with DeepPrior. If not, see . 23 | """ 24 | 25 | import vtk 26 | import numpy 27 | 28 | __author__ = "Markus Oberweger " 29 | __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" 30 | __credits__ = ["Markus Oberweger"] 31 | __license__ = "GPL" 32 | __version__ = "1.0" 33 | __maintainer__ = "Markus Oberweger" 34 | __email__ = "oberweger@icg.tugraz.at" 35 | __status__ = "Development" 36 | 37 | 38 | class VtkPointCloud: 39 | """ 40 | Manage 3D point cloud in VTK 41 | @see: http://sukhbinder.wordpress.com/2013/09/17/python-vtk-script-to-display-3d-xyz-data/ 42 | """ 43 | def __init__(self, zMin=-10.0, zMax=10.0, maxNumPoints=1e6): 44 | """ 45 | Initialize class 46 | :param zMin: minimum depth 47 | :param zMax: maximum depth 48 | :param maxNumPoints: maximum number of points 49 | :return: None 50 | """ 51 | self.maxNumPoints = int(maxNumPoints) 52 | self.vtkPolyData = vtk.vtkPolyData() 53 | self.clearPoints() 54 | mapper = vtk.vtkPolyDataMapper() 55 | mapper.SetInputData(self.vtkPolyData) 56 | mapper.SetColorModeToDefault() 57 | mapper.SetScalarRange(zMin, zMax) 58 | mapper.SetScalarVisibility(1) 59 | self.vtkActor = vtk.vtkActor() 60 | self.vtkActor.SetMapper(mapper) 61 | 62 | def addPoint(self, point): 63 | """ 64 | Add point to point cloud, if more than maximum points are set, they are randomly subsampled 65 | :param point: 3D coordinates 66 | :return: None 67 | """ 68 | if self.vtkPoints.GetNumberOfPoints() < self.maxNumPoints: 69 | pointId = self.vtkPoints.InsertNextPoint(point[:]) 70 | self.vtkDepth.InsertNextValue(point[2]) 71 | self.vtkCells.InsertNextCell(1) 72 | self.vtkCells.InsertCellPoint(pointId) 73 | else: 74 | r = numpy.random.randint(0, self.maxNumPoints) 75 | self.vtkPoints.SetPoint(r, point[:]) 76 | self.vtkCells.Modified() 77 | self.vtkPoints.Modified() 78 | self.vtkDepth.Modified() 79 | 80 | def clearPoints(self): 81 | """ 82 | Clear all points from the point cloud 83 | :return: None 84 | """ 85 | self.vtkPoints = vtk.vtkPoints() 86 | self.vtkCells = vtk.vtkCellArray() 87 | self.vtkDepth = vtk.vtkDoubleArray() 88 | self.vtkDepth.SetName('DepthArray') 89 | self.vtkPolyData.SetPoints(self.vtkPoints) 90 | self.vtkPolyData.SetVerts(self.vtkCells) 91 | self.vtkPolyData.GetPointData().SetScalars(self.vtkDepth) 92 | self.vtkPolyData.GetPointData().SetActiveScalars('DepthArray') --------------------------------------------------------------------------------