├── .gitattributes ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── github_census ├── fit_noise.py ├── generate_kitti2012.py ├── generate_middlebury.py ├── generate_noise.py ├── github_census.json ├── github_dt.json ├── github_sgbm.json ├── optimize.py ├── optimize_sgm.py ├── optimize_thresh.py ├── pystereo-fscore.ipynb ├── pystereo-grad.ipynb ├── pystereo-tf.ipynb ├── pystereo.ipynb ├── rms_error.json ├── run_all.py └── util.py ├── learning ├── 1dcnn-basic.py ├── 1dcnn-nin-basic.py ├── basic.py ├── converging-nn-basic.py ├── cosine-basic.py ├── nin-smaller-basic.py ├── results.md ├── run_model.py ├── tflow.py ├── train_2d-orig.py └── train_2d.py ├── msvc ├── CensusMatching.vcxproj ├── CensusMatching.vcxproj.filters ├── CensusTesting.sln ├── librs_demo.vcxproj ├── librs_demo.vcxproj.filters ├── packages.config ├── rsm_error.vcxproj ├── rsm_error.vcxproj.filters ├── vis_pfm.vcxproj └── vis_pfm.vcxproj.filters └── src ├── Main.cpp ├── bmMatch.cpp ├── bmMatch.h ├── cMatch.cpp ├── cMatch.h ├── cam_util.h ├── cost_to_conf.cpp ├── geometric.h ├── image.h ├── image_filter.h ├── image_io.h ├── imio.cpp ├── imio.h ├── imshow.cpp ├── json.h ├── r200Match.cpp ├── r200Match.h ├── rms_error.cpp ├── rs_demo.cpp ├── sgbmMatch.cpp ├── sgbmMatch.h ├── stb_image.h ├── stb_image_write.h ├── stereo.h ├── subpixel_extract.cpp ├── vecmatquat.h ├── vecmatquat_minimal.h └── vis_pfm.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.sln.docstates 8 | 9 | # Build results 10 | [Dd]ebug/ 11 | [Dd]ebugPublic/ 12 | [Rr]elease/ 13 | x64/ 14 | build/ 15 | bld/ 16 | [Bb]in/ 17 | [Oo]bj/ 18 | 19 | # Roslyn cache directories 20 | *.ide/ 21 | 22 | # MSTest test Results 23 | [Tt]est[Rr]esult*/ 24 | [Bb]uild[Ll]og.* 25 | 26 | #NUNIT 27 | *.VisualState.xml 28 | TestResult.xml 29 | 30 | # Build Results of an ATL Project 31 | [Dd]ebugPS/ 32 | [Rr]eleasePS/ 33 | dlldata.c 34 | *.jpg 35 | *.png 36 | *.pgm 37 | *.ppm 38 | *_i.c 39 | *_p.c 40 | *_i.h 41 | *.ilk 42 | *.meta 43 | *.obj 44 | *.pch 45 | *.pdb 46 | *.pgc 47 | *.pgd 48 | *.rsp 49 | *.sbr 50 | *.tlb 51 | *.tli 52 | *.tlh 53 | *.tmp 54 | *.tmp_proj 55 | *.log 56 | *.vspscc 57 | *.vssscc 58 | .builds 59 | *.pidb 60 | *.svclog 61 | *.scc 62 | 63 | # Chutzpah Test files 64 | _Chutzpah* 65 | 66 | # Visual C++ cache files 67 | ipch/ 68 | *.aps 69 | *.ncb 70 | *.opensdf 71 | *.sdf 72 | *.cachefile 73 | 74 | # Visual Studio profiler 75 | *.psess 76 | *.vsp 77 | *.vspx 78 | 79 | # TFS 2012 Local Workspace 80 | $tf/ 81 | 82 | # Guidance Automation Toolkit 83 | *.gpState 84 | 85 | # ReSharper is a .NET coding add-in 86 | _ReSharper*/ 87 | *.[Rr]e[Ss]harper 88 | *.DotSettings.user 89 | 90 | # JustCode is a .NET coding addin-in 91 | .JustCode 92 | 93 | # TeamCity is a build add-in 94 | _TeamCity* 95 | 96 | # DotCover is a Code Coverage Tool 97 | *.dotCover 98 | 99 | # NCrunch 100 | _NCrunch_* 101 | .*crunch*.local.xml 102 | 103 | # MightyMoose 104 | *.mm.* 105 | AutoTest.Net/ 106 | 107 | # Web workbench (sass) 108 | .sass-cache/ 109 | 110 | # Installshield output folder 111 | [Ee]xpress/ 112 | 113 | # DocProject is a documentation generator add-in 114 | DocProject/buildhelp/ 115 | DocProject/Help/*.HxT 116 | DocProject/Help/*.HxC 117 | DocProject/Help/*.hhc 118 | DocProject/Help/*.hhk 119 | DocProject/Help/*.hhp 120 | DocProject/Help/Html2 121 | DocProject/Help/html 122 | 123 | # Click-Once directory 124 | publish/ 125 | 126 | # Publish Web Output 127 | *.[Pp]ublish.xml 128 | *.azurePubxml 129 | ## TODO: Comment the next line if you want to checkin your 130 | ## web deploy settings but do note that will include unencrypted 131 | ## passwords 132 | #*.pubxml 133 | 134 | # NuGet Packages Directory 135 | msvc/packages/* 136 | ## TODO: If the tool you use requires repositories.config 137 | ## uncomment the next line 138 | #!packages/repositories.config 139 | 140 | # Enable "build/" folder in the NuGet Packages folder since 141 | # NuGet packages use it for MSBuild targets. 142 | # This line needs to be after the ignore of the build folder 143 | # (and the packages folder if the line above has been uncommented) 144 | !packages/build/ 145 | 146 | # Windows Azure Build Output 147 | csx/ 148 | *.build.csdef 149 | 150 | # Windows Store app package directory 151 | AppPackages/ 152 | 153 | # Others 154 | sql/ 155 | *.Cache 156 | ClientBin/ 157 | [Ss]tyle[Cc]op.* 158 | ~$* 159 | *~ 160 | *.dbmdl 161 | *.dbproj.schemaview 162 | *.pfx 163 | *.publishsettings 164 | node_modules/ 165 | 166 | # RIA/Silverlight projects 167 | Generated_Code/ 168 | 169 | # Backup & report files from converting an old project file 170 | # to a newer Visual Studio version. Backup files are not needed, 171 | # because we have git ;-) 172 | _UpgradeReport_Files/ 173 | Backup*/ 174 | UpgradeLog*.XML 175 | UpgradeLog*.htm 176 | 177 | # SQL Server files 178 | *.mdf 179 | *.ldf 180 | 181 | # Business Intelligence projects 182 | *.rdl.data 183 | *.bim.layout 184 | *.bim_*.settings 185 | 186 | # Microsoft Fakes 187 | FakesAssemblies/ 188 | 189 | # LightSwitch generated files 190 | GeneratedArtifacts/ 191 | _Pvt_Extensions/ 192 | ModelManifest.xml 193 | 194 | #NuGet 195 | packages 196 | !packages/repositories.config -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #all: centest librs_demo vis_pfm rsm_error 2 | 3 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 4 | machine := $(shell sh -c "$(CC) -dumpmachine || echo unknown") 5 | 6 | 7 | ifeq ($(uname_S),Darwin) 8 | CXXFLAGS += -I/usr/local/include 9 | GLFW3_FLAGS := -lglfw -framework OpenGL 10 | else 11 | GLFW3_FLAGS := `pkg-config --cflags --libs glfw3 gl` 12 | endif 13 | 14 | CXX ?= g++ 15 | # Extension of source files used in the project 16 | SRC_EXT = cpp 17 | # Path to the source directory, relative to the makefile 18 | LIBS = 19 | # General compiler flags 20 | COMPILE_FLAGS = -std=c++14 21 | #COMPILE_FLAGS = -std=c++11 -g -w 22 | # Additional release-specific flags 23 | RCOMPILE_FLAGS = -D NDEBUG -march=native -Ofast #-fopenmp 24 | # Additional debug-specific flags 25 | DCOMPILE_FLAGS = -D DEBUG -g -Wall -Wunused-variable 26 | # Add additional include paths 27 | INCLUDES = -I src/ 28 | # General linker settings 29 | LINK_FLAGS = $(GLFW3_FLAGS) 30 | # Additional release-specific linker settings 31 | RLINK_FLAGS = 32 | # Additional debug-specific linker settings 33 | DLINK_FLAGS = 34 | # Destination directory, like a jail or mounted system 35 | DESTDIR = / 36 | # Install path (bin/ is appended automatically) 37 | INSTALL_PREFIX = usr/local 38 | #### END PROJECT SETTINGS #### 39 | 40 | # Generally should not need to edit below this line 41 | 42 | # Shell used in this makefile 43 | # bash is used for 'echo -en' 44 | SHELL = /bin/bash 45 | # Clear built-in rules 46 | .SUFFIXES: 47 | 48 | # Append pkg-config specific libraries if need be 49 | ifneq ($(LIBS),) 50 | COMPILE_FLAGS += $(shell pkg-config --cflags $(LIBS)) 51 | LINK_FLAGS += $(shell pkg-config --libs $(LIBS)) 52 | endif 53 | 54 | # Combine compiler and linker flags 55 | DEBUG ?= 0 56 | ifeq ($(DEBUG), 1) 57 | export CXXFLAGS := $(CXXFLAGS) $(COMPILE_FLAGS) $(DCOMPILE_FLAGS) 58 | else 59 | export CXXFLAGS := $(CXXFLAGS) $(COMPILE_FLAGS) $(RCOMPILE_FLAGS) 60 | endif 61 | 62 | SRC = $(wildcard src/*.cpp) 63 | OBJ = $(patsubst src/%.cpp, src/%.o, $(SRC)) 64 | 65 | ALG_SRC = $(wildcard src/*Match.cpp) 66 | ALG_OBJ = $(patsubst src/%.cpp, obj/%.o, $(ALG_SRC)) 67 | #src/%.o: src/%.cpp 68 | # $(CXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $< 69 | all: vis_pfm rms_error librs_demo centest cost_to_conf subpixel_extract 70 | obj: 71 | mkdir -p obj/ 72 | obj/%.o: src/%.cpp | obj 73 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 74 | rms_error: obj/rms_error.o obj/imio.o 75 | $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ 76 | vis_pfm: obj/vis_pfm.o obj/imio.o 77 | $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ 78 | librs_demo: obj/rs_demo.o obj/imio.o obj/imshow.o $(ALG_OBJ) 79 | $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -Iinclude $(GLFW3_FLAGS) -lrealsense -o $@ 80 | centest: obj/Main.o obj/imio.o obj/imshow.o $(ALG_OBJ) 81 | $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -Iinclude $(GLFW3_FLAGS) -o $@ 82 | cost_to_conf: obj/cost_to_conf.o obj/imio.o obj/imshow.o $(ALG_OBJ) 83 | $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -Iinclude $(GLFW3_FLAGS) -o $@ 84 | subpixel_extract: obj/subpixel_extract.o obj/imio.o obj/imshow.o $(ALG_OBJ) 85 | $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -Iinclude $(GLFW3_FLAGS) -o $@ 86 | clean: 87 | rm -f vis_pfm rms_error librs_demo centest cost_to_conf subpixel_extract 88 | rm -rf obj/ 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # census test 2 | Testing of Census-like stereo algorithms. Mostly a space for experimenting with small concepts and ideas. 3 | As such, the code has basically no comments and poor interfaces for most of it's functionality. 4 | 5 | ## Algorithms Implemented 6 | * SSD Block Matching (with an optional Sobel prefilter), for Left + Right frame perspective, with quadratic subpixel estimation 7 | * Census Matching (24 bit, 7x7 descriptor) + Block Matching, for Left + Right frame perspective, with linear subpixel estimation 8 | * Census (left frame only) with thresholds from the [Intel RealSense R200](https://github.com/IntelRealSense/librealsense/blob/master/include/librealsense/rsutil.h). Implemented as documented in [Intel's released documentation](https://github.com/PercATI/RealSense_ROS/blob/master/r200_install/Include/DSAPI/DSAPITypes.h). Optionally includes a domain-transform on the cost volume. [As published](https://arxiv.org/abs/1705.05548) these thresholds can be effective in removing spurious matches. 9 | * Semiglobal Matching (5 paths), with SAD + Census cost metrics, subpixel matching, R200 thresholds, discontinuity scaling for SGM, naive hole filling and bilateral filter window weights. Only on the left-frame perspective. 10 | 11 | ## Dependencies 12 | * C++14 compliant compiler 13 | * GLFW3 for visualization 14 | * librealsense for librealsense executable 15 | 16 | ## Building 17 | ### Windows 18 | * Pull down a version of this repository 19 | * A Visual Studio 2015 Solution is included in msvc/ 20 | * CensusMatching is the primary project of interest 21 | * GLFW3 is installed via a NuGet Package, so Visual Studio should pull it down automatically 22 | 23 | ### Linux 24 | * Pull down a version of this repository 25 | * Type `make` in the top-level-centest. This'll build everything and generate binaries in the Makefile directory. 26 | * Makefile can be modified to build a debug version of the executable 27 | 28 | ## Running 29 | * Run via `./centest ` 30 | * An example would be `./centest github_census/github_census.json` 31 | * The example code expects to have a converted version of [Middlebury's v3 dataset](http://vision.middlebury.edu/stereo/submit3/) 32 | * The conversion routine is on github_census/generate_middlebury.py. This is a python script that requires Pillow and Imagemagick. 33 | * This program also supports passing valid json on the command line, instead of a file. 34 | * Output results are in the current working directory, to an 32-bit float disparity image PFM, and a confidence map in 32-bit floating point. 35 | * To visualize the pfm files, a vis_pfm script is provided `./vis_pfm depth.pfm [conf.pfm]` with an optional conf.pfm argument, which generates an out.png file the execution directory with a histogram colored depthmap. 36 | 37 | ## License 38 | Mozilla Public License 2.0. More information is available on the [Wikpedia article on MPL](https://en.wikipedia.org/wiki/Mozilla_Public_License) or [MPL's official FAQ](https://www.mozilla.org/en-US/MPL/2.0/FAQ/). 39 | -------------------------------------------------------------------------------- /github_census/fit_noise.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | import copy 6 | from collections import defaultdict 7 | import matplotlib.pyplot as plt 8 | from scipy.optimize import minimize 9 | from scipy.optimize import minimize_scalar 10 | import os 11 | import sys 12 | from util import load_psm 13 | from scipy.ndimage.filters import convolve 14 | from scipy.ndimage.filters import gaussian_filter 15 | from scipy.ndimage.filters import sobel 16 | from scipy.ndimage.interpolation import rotate 17 | from scipy.ndimage.interpolation import shift 18 | from scipy.special import erf 19 | 20 | well_capacity = 1000.0 21 | read_noise = 120.0 22 | sensor_depth = 1024.0 23 | min_vals = 20 24 | 25 | edge_pixels = 3 26 | 27 | def fit_ge(xspan,span): 28 | if span[-1] < span[0]: 29 | span = span[::-1] 30 | #print xspan,[x for x in span] 31 | span -= span.min() 32 | span /= span.max() 33 | opt= minimize(lambda p: sum([abs(float(yv)-(erf(float(xv+p[1])/p[0])+1.0)/2.0) for xv,yv in zip(xspan,span)]),[10,0],bounds=[[1e-6,None],[-1,1]]) 34 | #print opt 35 | return opt.x[0] 36 | 37 | def plot_span(xspan,span,sigma): 38 | if span[-1] < span[0]: 39 | span = span[::-1] 40 | span = np.array(span) 41 | span -= span.min() 42 | span /= span.max() 43 | plt.plot(xspan,span,c='b') 44 | plt.plot(xspan,[(erf(float(xv)/sigma)+1.0)/2.0 for x in xspan],c='r') 45 | plt.show() 46 | 47 | def fit_mtf(good_image): 48 | if len(good_image.shape) == 3: 49 | good_image = good_image.mean(axis=2) 50 | blur_est = [] 51 | img = good_image 52 | ye = sobel(img,axis=0) 53 | xe = sobel(img,axis=1) 54 | #e1 = convolve(img,np.array([[0,-1,0],[0,0,0],[0,1,0]])) 55 | #e2 = convolve(img,np.array([[0,0,0],[-1,0,1],[0,0,0]])) 56 | gs = np.sqrt(xe**2 + ye**2) 57 | largest_edges = np.argsort(-gs.ravel())[:edge_pixels] 58 | yi,xi = np.unravel_index(largest_edges,gs.shape) 59 | for y,x in zip(yi,xi): 60 | m = gs[y,x] 61 | yx = ye[y,x] 62 | xx = xe[y,x] 63 | a = np.arctan2(yx,xx) 64 | gr = rotate(img,a*180.0/3.14159,mode='nearest') 65 | xer = sobel(gr,axis=1) 66 | #e1 = convolve(img,np.array([[0,-1,0],[0,0,0],[0,1,0]])) 67 | #e2 = convolve(img,np.array([[0,0,0],[-1,0,1],[0,0,0]])) 68 | gsr = np.sqrt(xer**2) 69 | ler = np.argsort(-gsr.ravel())[:1] 70 | yir,xir = np.unravel_index(ler,gsr.shape) 71 | for y2,x2 in zip(yir,xir): 72 | cur = gr[y2,x2] 73 | xp = 0.0 74 | xm = 0.0 75 | for plus in xrange(1,gr.shape[1]-x2): 76 | diff = cur - gr[y2,x2+plus] 77 | if abs(diff) > abs(xp): 78 | xp = diff 79 | else: 80 | plus -=1 81 | break 82 | for minus in xrange(1,x2): 83 | diff = cur - gr[y2,x2-minus] 84 | if abs(diff) > abs(xm): 85 | xm = diff 86 | else: 87 | minus -=1 88 | break 89 | xspan = range(-minus,plus+1) 90 | span = gr[y2,x2-minus:x2+plus+1] 91 | 92 | res = fit_ge(xspan,span) 93 | blur_est.append(res) 94 | return blur_est 95 | #print m,a,y,x,a*180.0/3.14159,xx,yx 96 | def add_noise(x): 97 | x *= well_capacity 98 | x = np.random.poisson(x).astype(np.float64) 99 | x += np.random.standard_normal(x.shape)*read_noise 100 | #x = x + np.random.poisson(np.ones(x.shape)*read_noise).astype(np.float64) 101 | x /= well_capacity 102 | return x 103 | 104 | 105 | def tr(x,n=0): 106 | fft_of_signal = np.fft.fft(x) 107 | if n > 0: 108 | fft_of_signal[0:n] = 0 109 | return np.real(np.fft.ifft(fft_of_signal)) 110 | 111 | if len(sys.argv) == 1: 112 | num_pixels = 10000 113 | num_int = 1024 114 | num_samples = 1024 115 | a = np.random.rand(num_pixels) 116 | r = np.empty(shape=(num_samples,num_pixels)) 117 | for i in xrange(num_samples): 118 | n = add_noise(copy.copy(a)) 119 | r[i,:] = n 120 | #x,y = cont_to_hist(n,num_int) 121 | x = r.mean(axis=0) 122 | y = r.var(axis=0) 123 | opt= minimize(lambda p: sum([abs(yv-p[0]*(xv + p[1])) for xv,yv in zip(x,y)]), [0,0]) 124 | res = opt.x 125 | print 1.0/res[0],np.sqrt(res[1]*(1.0/res[0])) 126 | plt.scatter(x,y) 127 | plt.show() 128 | else: 129 | target_dir = sys.argv[1] 130 | imgs = [] 131 | for f in os.listdir(target_dir): 132 | fl = os.path.join(target_dir,f) 133 | img = load_psm(fl) 134 | imgs.append(img[0].astype(np.float64)) 135 | imgs = np.array(imgs) 136 | 137 | img = imgs.mean(axis=0) 138 | img = convolve(img/sensor_depth,np.array([[1,2,1],[2,4,2],[1,2,1]])/16.0) 139 | sigmas = fit_mtf(img) 140 | print sigmas, sum(sigmas)/len(sigmas) 141 | print 'Gaussian Sigma: {0:.2f}'.format(sigmas[0]) 142 | 143 | r = imgs.reshape((imgs.shape[0],-1))/sensor_depth 144 | xo = r.mean(axis=0) 145 | for row in r: 146 | row = tr(row,5) 147 | yo = r.var(axis=0) 148 | d = defaultdict(list) 149 | for x,y in zip(xo,yo): 150 | d[round(x*sensor_depth)].append(y) 151 | d2 = [(k,sum(v)/float(len(v))) for k,v in d.iteritems() if len(v) > min_vals] 152 | x = np.array([t[0]/sensor_depth for t in d2]) 153 | y = np.array([t[1] for t in d2]) 154 | opt= minimize(lambda p: sum([abs(yv-p[0]*(xv + p[1])) for xv,yv in zip(x,y)]), [0,0]) 155 | res = opt.x 156 | well_cap = 1.0/res[0] 157 | sn = np.sqrt(max(res[1],0)*well_cap) 158 | print 'Well Capacity: {0:.0f} \n Shot Noise: {1:.2f}'.format(well_cap,sn) 159 | plt.scatter(x,y,s=2,lw=0) 160 | plt.xlim(x.min(),x.max()) 161 | plt.ylim(y.min(),y.max()) 162 | plt.show() 163 | -------------------------------------------------------------------------------- /github_census/generate_kitti2012.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import os, sys 5 | from PIL import Image 6 | import numpy as np 7 | from subprocess import call 8 | import re, shutil, os 9 | from util import * 10 | 11 | config = get_default_config() 12 | config['description'] = "all of KITTI 2012" 13 | 14 | basePath = './data_stereo_flow/training/' 15 | targetPath = 'kitti2012' 16 | 17 | check_and_make_dir(targetPath) 18 | for filename in os.listdir(os.path.join(basePath,'disp_noc')): 19 | lcfolder = filename[:-7] 20 | 21 | nfolder = os.path.join(targetPath,lcfolder) 22 | check_and_make_dir(nfolder) 23 | check_and_make_dir(os.path.join(nfolder,'left')) 24 | check_and_make_dir(os.path.join(nfolder, 'right')) 25 | check_and_make_dir(os.path.join(nfolder, 'gt')) 26 | 27 | lft_rgb = os.path.join(nfolder,'left','rgb.png') 28 | lft_mono = os.path.join(nfolder,'left','mono.png') 29 | rgt_rgb = os.path.join(nfolder,'right','rgb.png') 30 | rgt_mono = os.path.join(nfolder,'right','mono.png') 31 | gt_mask = os.path.join(nfolder,'gt','mask.pfm') 32 | gt = os.path.join(nfolder,'gt','gt.pfm') 33 | 34 | 35 | if platform.system() == 'Windows': 36 | flags=0x08000000 37 | script='magick' 38 | else: 39 | flags = 0 40 | script = 'convert' 41 | 42 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,'image_0',filename),'-define','png:format=png48',lft_rgb],creationflags=flags) 43 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,'image_1',filename),'-define','png:format=png48',rgt_rgb],creationflags=flags) 44 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,'image_0',filename),lft_mono],creationflags=flags) 45 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,'image_1',filename),rgt_mono],creationflags=flags) 46 | 47 | data = {'left' : {'mono' : lft_mono, 'rgb': lft_rgb},'right' : {'mono' : rgt_mono, 'rgb': rgt_rgb}} 48 | with open(os.path.join(basePath,'calib',lcfolder + '.txt'),'r') as myfile: 49 | calib=myfile.read().split('\n') 50 | calib = {x[0]:x[1] for x in [x.split(':') for x in calib[:-1]]} # trust leo 51 | calibP0 = [float(x) for x in calib['P0'][1:].split(' ')] 52 | calibP1 = [float(x) for x in calib['P1'][1:].split(' ')] 53 | 54 | gtimg = Image.open(os.path.join(basePath,'disp_noc',filename)) 55 | gtarr = np.array(gtimg).astype(np.float32)/256.0 56 | save_pfm(gt_mask,(gtarr !=0).astype(np.float32)) 57 | save_pfm(gt,gtarr) 58 | config['names'].append(lcfolder) 59 | config['data'][lcfolder] = data 60 | config['maxdisp'][lcfolder] = int(256) 61 | config['dpx'][lcfolder] = float(calibP0[2])-float(calibP1[2]) 62 | config['baseline'][lcfolder] = -float(calibP1[3])/float(calibP1[0]) 63 | config['fx'][lcfolder] = float(calibP0[0]) 64 | config['fy'][lcfolder] = float(calibP0[5]) 65 | config['px'][lcfolder] = float(calibP0[2]) 66 | config['py'][lcfolder] = float(calibP0[6]) 67 | config['width'][lcfolder] = int(gtarr.shape[1]) 68 | config['height'][lcfolder] = int(gtarr.shape[0]) 69 | config['gt'][lcfolder] = gt 70 | config['gt_mask'][lcfolder] = gt_mask 71 | config['minint'][lcfolder] = 0x00FF 72 | config['maxint'][lcfolder] = 0xFFFF 73 | 74 | with open('kitti2012.json','w') as fp: 75 | json.dump(config,fp, sort_keys=True,indent=4, separators=(',', ': ')) 76 | -------------------------------------------------------------------------------- /github_census/generate_middlebury.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import os, sys 5 | from PIL import Image 6 | import numpy as np 7 | from subprocess import call 8 | import platform 9 | import os 10 | from util import * 11 | 12 | config = get_default_dataset_config() 13 | config['description'] = "a standard collection of quarter sized middlebury images" 14 | 15 | basePath = './MiddEval3/trainingQ/' 16 | targetPath = 'middlebury' 17 | 18 | check_and_make_dir(targetPath) 19 | for folder in os.listdir(basePath): 20 | lcfolder = folder.lower() 21 | nfolder = os.path.join(targetPath,lcfolder) 22 | check_and_make_dir(nfolder) 23 | check_and_make_dir(os.path.join(nfolder,'left')) 24 | check_and_make_dir(os.path.join(nfolder,'right')) 25 | check_and_make_dir(os.path.join(nfolder,'gt')) 26 | 27 | lft_rgb = os.path.join(nfolder,'left','rgb.png') 28 | lft_mono = os.path.join(nfolder,'left','mono.png') 29 | rgt_rgb = os.path.join(nfolder,'right','rgb.png') 30 | rgt_mono = os.path.join(nfolder,'right','mono.png') 31 | gt_mask = os.path.join(nfolder,'gt','mask.pfm') 32 | gt = os.path.join(nfolder,'gt','gt.pfm') 33 | 34 | if platform.system() == 'Windows': 35 | flags=0x08000000 36 | script='magick' 37 | else: 38 | flags = 0 39 | script = 'convert' 40 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,folder,'im0.png'),lft_rgb],creationflags=flags) 41 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,folder,'im1.png'),rgt_rgb],creationflags=flags) 42 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,folder,'im0.png'),'-colorspace','Gray',lft_mono],creationflags=flags) 43 | call([script,'-define','png:bit-depth=16',os.path.join(basePath,folder,'im1.png'),'-colorspace','Gray',rgt_mono],creationflags=flags) 44 | 45 | data = {'left' : {'mono' : lft_mono, 'rgb': lft_rgb},'right' : {'mono' : rgt_mono, 'rgb': rgt_rgb}} 46 | 47 | with open(os.path.join(basePath,folder,'calib.txt'),'r') as myfile: 48 | calib=myfile.read().split('\n') 49 | calib = {x[0]:x[1] for x in [x.split('=') for x in calib[:-1]]} # trust leo 50 | cam0 = calib['cam0'].strip('[]').replace(';','').replace(' ',',').split(',') #see above 51 | 52 | o_gt = load_pfm(os.path.join(basePath,folder,'disp0GT.pfm')) 53 | save_pfm(gt_mask,(o_gt[0] != np.inf).astype(np.float32)) 54 | shutil.copy(os.path.join(basePath,folder,'disp0GT.pfm'),gt) 55 | config['names'].append(lcfolder) 56 | config['data'][lcfolder] = data 57 | config['maxdisp'][lcfolder] = int(calib['ndisp']) 58 | config['dpx'][lcfolder] = float(calib['doffs']) 59 | config['baseline'][lcfolder] = float(calib['baseline'])/1000.0 60 | config['fx'][lcfolder] = float(cam0[0]) 61 | config['fy'][lcfolder] = float(cam0[4]) 62 | config['px'][lcfolder] = float(cam0[2]) 63 | config['py'][lcfolder] = float(cam0[5]) 64 | config['width'][lcfolder] = int(calib['width']) 65 | config['height'][lcfolder] =int(calib['height']) 66 | config['gt'][lcfolder] =gt 67 | config['gt_mask'][lcfolder] =gt_mask 68 | config['minint'][lcfolder] = 0x00FF 69 | config['maxint'][lcfolder] = 0xFFFF 70 | 71 | with open(targetPath + '.json','w') as fp: 72 | json.dump(config,fp, sort_keys=True,indent=4, separators=(',', ': ')) 73 | -------------------------------------------------------------------------------- /github_census/generate_noise.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | from skimage import filter,exposure,io 5 | import skimage 6 | import os 7 | import re, shutil 8 | 9 | orig_dir = 'MiddEval3' 10 | target_dir = 'MiddEval3_noise' 11 | 12 | input_gamma = 2.2 13 | output_gamma = 1.0 #1.0 means do nothing 14 | gaussian_sigma = 0.8 15 | well_capacity = 1600.0 16 | read_noise = 2.0 17 | color_correction = np.array([[1.6013 ,-0.4631, -0.1382 ],[-0.2511, 1.6393, -0.3882 ],[0.0362, -0.5823, 1.5461 ]]) 18 | #color_correction = np.array([[1,0,0],[0,1,0],[0,0,1]]) 19 | cc_inv = np.linalg.pinv(color_correction) 20 | 21 | def check_and_make_dir(directory): 22 | if not os.path.exists(directory): 23 | os.makedirs(directory) 24 | 25 | 26 | def add_noise(img): 27 | img = skimage.img_as_float(img) 28 | img = img.dot(cc_inv) 29 | img = exposure.adjust_gamma(img,input_gamma) 30 | img = filter.gaussian_filter(img,gaussian_sigma,multichannel=True) 31 | 32 | img *= well_capacity 33 | img = np.random.poisson(img).astype(np.float64) 34 | 35 | #img += np.random.poisson(np.ones(img.shape)*read_noise).astype(np.float32) 36 | img += (np.random.standard_normal(img.shape)*read_noise).astype(np.float64) 37 | 38 | img /= well_capacity 39 | 40 | img = np.clip(img,0.0,1.0) 41 | img = exposure.adjust_gamma(img,output_gamma) 42 | return img 43 | 44 | 45 | for f1 in os.listdir(orig_dir): 46 | check_and_make_dir(target_dir) 47 | for f2 in os.listdir(os.path.join(orig_dir,f1)): 48 | check_and_make_dir(os.path.join(target_dir,f1,f2)) 49 | for file in os.listdir(os.path.join(orig_dir,f1,f2)): 50 | if file == 'im0.png' or file == 'im1.png': 51 | img = io.imread(os.path.join(orig_dir,f1,f2,file)) 52 | img = add_noise(img) 53 | io.imsave(os.path.join(target_dir,f1,f2,file),img) 54 | else: 55 | shutil.copy(os.path.join(orig_dir,f1,f2,file),os.path.join(target_dir,f1,f2,file)) 56 | 57 | -------------------------------------------------------------------------------- /github_census/github_census.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "algorithm", 3 | "description": "open-source r200 knockoff", 4 | "config": { 5 | "dispmul": 4, 6 | "algorithm": "r200", 7 | "box_radius": 3, 8 | "left_right_int": 1, 9 | "left_right_sub": 0.75, 10 | "neighbor": 7, 11 | "second_peak": 10, 12 | "texture_diff": 4, 13 | "texture_count": 6, 14 | "score_min": 0, 15 | "score_max": 512, 16 | "median_plus": 5, 17 | "median_minus": 5, 18 | "median_thresh": 192, 19 | "hole_fill": false, 20 | "domain_transform": false, 21 | "dt_scale": 1, 22 | "dt_iter": 1, 23 | "dt_space": 10.0, 24 | "dt_range": 90.0 25 | }, 26 | "supports_cmd": true, 27 | "supports_batch": false, 28 | "setup": [], 29 | "command": "../centest", 30 | "dir": ".", 31 | "dependent_files": [], 32 | 33 | "left_mono": "./middlebury/motorcycle/left/mono.png", 34 | "left_rgb": "./middlebury/motorcycle/left/rgb.png", 35 | "right_mono": "./middlebury/motorcycle/right/mono.png", 36 | "right_rgb": "./middlebury/motorcycle/right/rgb.png", 37 | "output_disp": "./out.pfm", 38 | "output_conf": "./conf.pfm", 39 | "maxint": 65535, 40 | "minint": 255, 41 | "maxdisp": 70, 42 | 43 | "costs": "" 44 | } 45 | -------------------------------------------------------------------------------- /github_census/github_dt.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "algorithm", 3 | "description": "open-source r200 knockoff with domain transform cost filtering based on the left image", 4 | "config": { 5 | "dispmul": 4, 6 | "algorithm": "r200", 7 | "box_radius": 0, 8 | "left_right_int": 1, 9 | "left_right_sub": 0.75, 10 | "neighbor": 4, 11 | "second_peak": 3, 12 | "texture_diff": 0, 13 | "texture_count": 0, 14 | "score_min": 1, 15 | "score_max": 815, 16 | "median_plus": 5, 17 | "median_minus": 5, 18 | "median_thresh": 50, 19 | "hole_fill": false, 20 | "domain_transform": true, 21 | "dt_scale": 49, 22 | "dt_iter": 3, 23 | "dt_space": 8.6, 24 | "dt_range": 90.5, 25 | "dt_range_disp": 0.00 26 | }, 27 | "supports_cmd": true, 28 | "supports_batch": false, 29 | "setup": [], 30 | "command": "github/centest", 31 | "dependent_files": [], 32 | 33 | "left_mono": "./middlebury/motorcycle/left/mono.png", 34 | "left_rgb": "./middlebury/motorcycle/left/rgb.png", 35 | "right_mono": "./middlebury/motorcycle/right/mono.png", 36 | "right_rgb": "./middlebury/motorcycle/right/rgb.png", 37 | "output_disp": "./out.pfm", 38 | "output_conf": "./conf.pfm", 39 | "maxint": 65535, 40 | "minint": 255, 41 | "maxdisp": 70, 42 | 43 | "costs": "" 44 | } 45 | -------------------------------------------------------------------------------- /github_census/github_sgbm.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "algorithm", 3 | "description": "open-source r200 knockoff with causal semiglobal matching", 4 | "config": { 5 | "dispmul": 4, 6 | "algorithm": "sgbm", 7 | "box_radius": 1, 8 | "left_right_int": 1, 9 | "left_right_sub": 0.75, 10 | "neighbor": 20, 11 | "second_peak": 50, 12 | "texture_diff": 0, 13 | "texture_count": 0, 14 | "score_min": 0, 15 | "score_max": 20000, 16 | "median_plus": 10, 17 | "median_minus": 10, 18 | "median_thresh": 500, 19 | "hole_fill": false, 20 | "cost_abs": 1, 21 | "cost_ham": 9, 22 | "p1": 2950, 23 | "p2": 11200, 24 | "sgm": true, 25 | "scale_p2": true, 26 | "use_blf": false, 27 | "blf_range": 25.0, 28 | "blf_space": 2.25 29 | }, 30 | "supports_cmd": true, 31 | "supports_batch": false, 32 | "setup": [], 33 | "command": "../centest", 34 | "dir": ".", 35 | "dependent_files": [], 36 | 37 | "left_mono": "./middlebury/motorcycle/left/mono.png", 38 | "left_rgb": "./middlebury/motorcycle/left/rgb.png", 39 | "right_mono": "./middlebury/motorcycle/right/mono.png", 40 | "right_rgb": "./middlebury/motorcycle/right/rgb.png", 41 | "output_disp": "./out.pfm", 42 | "output_conf": "./conf.pfm", 43 | "maxint": 65535, 44 | "minint": 255, 45 | "maxdisp": 70, 46 | 47 | "costs": "" 48 | } 49 | -------------------------------------------------------------------------------- /github_census/optimize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Optimize blocksize of apps/mmm_block.cpp 4 | # 5 | # This is an extremely simplified version meant only for tutorials 6 | # 7 | 8 | import opentuner 9 | from opentuner import ConfigurationManipulator 10 | from opentuner import IntegerParameter 11 | from opentuner import FloatParameter 12 | from opentuner import LogFloatParameter 13 | from opentuner import MeasurementInterface 14 | from opentuner import Result 15 | import run_all 16 | import json 17 | import numpy as np 18 | class DTFlagsTuner(MeasurementInterface): 19 | 20 | def manipulator(self): 21 | manipulator = ConfigurationManipulator() 22 | manipulator.add_parameter( 23 | FloatParameter('dt_space', 5, 15)) 24 | manipulator.add_parameter( 25 | LogFloatParameter('dt_range', 16, 128 )) 26 | return manipulator 27 | 28 | def run(self, desired_result, input, limit): 29 | cfg = desired_result.configuration.data 30 | 31 | with open('github_dt.json') as fp: 32 | alg_cfg = json.load(fp) 33 | alg_cfg['config'].update(cfg) 34 | with open('opt.json','w') as fp: 35 | alg_cfg = json.dump(alg_cfg,fp) 36 | results = run_all.run_all_algs() 37 | short_results = {} 38 | for alg,ds in results.iteritems(): 39 | rz = [] 40 | for dsn,de in ds.iteritems(): 41 | for den,res in de.iteritems(): 42 | nd = {k:v for k,v in res.iteritems()} 43 | rz.append(nd) 44 | metric_names = rz[0].keys() 45 | rzz = {m: np.mean([y[m]['result'] for y in rz]) for m in metric_names} 46 | short_results[alg] = rzz 47 | return Result(time=short_results['opt']['err_3']*100.0) 48 | 49 | def save_final_config(self, configuration): 50 | print "Optimal block size written to mmm_final_config.json:", configuration.data 51 | self.manipulator().save_to_file(configuration.data, 52 | 'mmm_final_config.json') 53 | 54 | if __name__ == '__main__': 55 | argparser = opentuner.default_argparser() 56 | DTFlagsTuner.main(argparser.parse_args()) 57 | -------------------------------------------------------------------------------- /github_census/optimize_sgm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Optimize blocksize of apps/mmm_block.cpp 4 | # 5 | # This is an extremely simplified version meant only for tutorials 6 | # 7 | 8 | import opentuner 9 | from opentuner import ConfigurationManipulator 10 | from opentuner import IntegerParameter 11 | from opentuner import FloatParameter 12 | from opentuner import LogFloatParameter 13 | from opentuner import LogIntegerParameter 14 | from opentuner import MeasurementInterface 15 | from opentuner import Result 16 | import run_all 17 | import json 18 | import numpy as np 19 | class DTFlagsTuner(MeasurementInterface): 20 | 21 | def manipulator(self): 22 | manipulator = ConfigurationManipulator() 23 | manipulator.add_parameter( 24 | LogIntegerParameter('p1', 0, 5000)) 25 | manipulator.add_parameter( 26 | LogIntegerParameter('p2', 0, 15000)) 27 | manipulator.add_parameter( 28 | IntegerParameter('cost_ham', 3, 9)) 29 | manipulator.add_parameter( 30 | IntegerParameter('cost_abs', 0, 3)) 31 | manipulator.add_parameter( 32 | IntegerParameter('box_radius', 0, 3)) 33 | return manipulator 34 | 35 | def run(self, desired_result, input, limit): 36 | cfg = desired_result.configuration.data 37 | 38 | with open('github_sgbm.json') as fp: 39 | alg_cfg = json.load(fp) 40 | alg_cfg['config'].update(cfg) 41 | with open('opt.json','w') as fp: 42 | alg_cfg = json.dump(alg_cfg,fp) 43 | results = run_all.run_all_algs() 44 | short_results = {} 45 | for alg,ds in results.iteritems(): 46 | rz = [] 47 | for dsn,de in ds.iteritems(): 48 | for den,res in de.iteritems(): 49 | nd = {k:v for k,v in res.iteritems()} 50 | rz.append(nd) 51 | metric_names = rz[0].keys() 52 | rzz = {m: np.mean([y[m]['result'] for y in rz]) for m in metric_names} 53 | short_results[alg] = rzz 54 | return Result(time=short_results['opt']['err_3']*100.0) 55 | 56 | def save_final_config(self, configuration): 57 | print "Optimal block size written to mmm_final_config.json:", configuration.data 58 | self.manipulator().save_to_file(configuration.data, 59 | 'mmm_final_config.json') 60 | 61 | if __name__ == '__main__': 62 | argparser = opentuner.default_argparser() 63 | DTFlagsTuner.main(argparser.parse_args()) 64 | -------------------------------------------------------------------------------- /github_census/optimize_thresh.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Optimize blocksize of apps/mmm_block.cpp 4 | # 5 | # This is an extremely simplified version meant only for tutorials 6 | # 7 | 8 | import opentuner 9 | from opentuner import ConfigurationManipulator 10 | from opentuner import IntegerParameter 11 | from opentuner import FloatParameter 12 | from opentuner import LogFloatParameter 13 | from opentuner import MeasurementInterface 14 | from opentuner import Result 15 | import run_all 16 | import json 17 | import numpy as np 18 | class DTFlagsTuner(MeasurementInterface): 19 | 20 | def manipulator(self): 21 | manipulator = ConfigurationManipulator() 22 | manipulator.add_parameter(IntegerParameter("left_right_int", 0,1)) 23 | manipulator.add_parameter(FloatParameter("left_right_sub", 0.1,1.0)) 24 | manipulator.add_parameter(IntegerParameter("neighbor", 0,100)) 25 | manipulator.add_parameter(IntegerParameter("second_peak", 0,100)) 26 | manipulator.add_parameter(IntegerParameter("texture_diff", 0,40)) 27 | manipulator.add_parameter(IntegerParameter("texture_count", 0,20)) 28 | manipulator.add_parameter(IntegerParameter("score_min", 0,200)) 29 | manipulator.add_parameter(IntegerParameter("score_max", 100,1024)) 30 | manipulator.add_parameter(IntegerParameter("median_plus", 1,15)) 31 | manipulator.add_parameter(IntegerParameter("median_minus", 1,15)) 32 | manipulator.add_parameter(IntegerParameter("median_thresh", 0,250)) 33 | return manipulator 34 | 35 | def run(self, desired_result, input, limit): 36 | cfg = desired_result.configuration.data 37 | 38 | with open('github_census.json') as fp: 39 | alg_cfg = json.load(fp) 40 | alg_cfg['config'].update(cfg) 41 | with open('github_census.json','w') as fp: 42 | alg_cfg = json.dump(alg_cfg,fp) 43 | results = run_all.run_all_algs() 44 | short_results = {} 45 | for alg,ds in results.iteritems(): 46 | rz = [] 47 | for dsn,de in ds.iteritems(): 48 | for den,res in de.iteritems(): 49 | nd = {k:v for k,v in res.iteritems()} 50 | rz.append(nd) 51 | metric_names = rz[0].keys() 52 | rzz = {m: np.mean([y[m]['result'] for y in rz]) for m in metric_names} 53 | short_results[alg] = rzz 54 | return Result(time=(1.0-short_results['github_census']['f_1'])*100.0) 55 | 56 | def save_final_config(self, configuration): 57 | print "Optimal block size written to mmm_final_config.json:", configuration.data 58 | self.manipulator().save_to_file(configuration.data, 59 | 'mmm_final_config.json') 60 | 61 | if __name__ == '__main__': 62 | argparser = opentuner.default_argparser() 63 | DTFlagsTuner.main(argparser.parse_args()) 64 | -------------------------------------------------------------------------------- /github_census/pystereo-fscore.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import os\n", 13 | "import re\n", 14 | "import matplotlib.colors as colors\n", 15 | "\n", 16 | "from scipy.ndimage import filters" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "i = 0\n", 28 | "l = np.load('l{0}.npy'.format(i)).astype(np.float)\n", 29 | "gt = np.load('g{0}.npy'.format(i)).astype(np.float)\n", 30 | "costs = np.load('b{0}.npy'.format(i)).astype(np.float)\n", 31 | "box_costs = (filters.uniform_filter(costs,[7,7,1],mode='nearest')*(7*7)).astype(np.int)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 126, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stderr", 43 | "output_type": "stream", 44 | "text": [ 45 | "C:\\Anaconda2\\lib\\site-packages\\ipykernel\\__main__.py:59: VisibleDeprecationWarning: boolean index did not match indexed array along dimension 0; dimension is 356128 but corresponding boolean dimension is 354918\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "def subpixel_f(left,cent,right):\n", 51 | " num = right - left\n", 52 | " den = (cent-left) if (right < left) else (cent-right)\n", 53 | " return 0.0 if den == 0 else 0.5*(num/float(den))\n", 54 | "def second_peak_f(corr_v,best,max_cost):\n", 55 | " sp = max_cost\n", 56 | " for i in xrange(0,corr_v.shape[0]-1):\n", 57 | " if i!=best and corr_v[i] < corr_v[i-1] and corr_v[i] < corr_v[i+1]:\n", 58 | " if corr_v[i] < sp:\n", 59 | " sp = corr_v[i]\n", 60 | " return sp\n", 61 | " \n", 62 | " \n", 63 | "def stereo_features(costs,gt):\n", 64 | " height,width,mdisp = costs.shape\n", 65 | " \n", 66 | " best = np.argmin(costs,2)\n", 67 | " bestr = np.zeros_like(best)\n", 68 | " bestlS = np.zeros_like(best).astype(np.float)\n", 69 | " bestrS = np.zeros_like(best).astype(np.float)\n", 70 | " l_r_match = np.zeros_like(best).astype(np.float)\n", 71 | "\n", 72 | " for ri,row in enumerate(costs):\n", 73 | " for ci,col in enumerate(row):\n", 74 | " lim = min(width,ci+mdisp)-ci\n", 75 | " rv, r_min = min([(costs[ri,ci+i,i],i) for i in xrange(0,lim)])\n", 76 | " bestr[ri,ci] = r_min\n", 77 | " shift = 0.0\n", 78 | " if r_min >0 and r_min < lim-1:\n", 79 | " rminl = r_min -1\n", 80 | " rminr = r_min + 1\n", 81 | " shift = subpixel_f(costs[ri,ci+rminl,rminl],costs[ri,ci+r_min,r_min],costs[ri,ci+rminr,rminr])\n", 82 | " bestrS[ri,ci] = r_min + shift\n", 83 | " for ri,row in enumerate(costs):\n", 84 | " for ci,col in enumerate(row):\n", 85 | " l_min = best[ri,ci]\n", 86 | " shift = 0.0\n", 87 | " if l_min >0 and l_min < mdisp-1:\n", 88 | " lminl = l_min -1\n", 89 | " lminr = l_min + 1\n", 90 | " shift = subpixel_f(costs[ri,ci,lminl],costs[ri,ci,l_min],costs[ri,ci,lminr])\n", 91 | " bestlS[ri,ci] = l_min + shift\n", 92 | " for ri,row in enumerate(costs):\n", 93 | " for ci,col in enumerate(row):\n", 94 | " l_min = best[ri,ci]\n", 95 | " l_minS = bestlS[ri,ci]\n", 96 | " r_minS = bestrS[ri,ci-l_min]\n", 97 | " l_r_match[ri,ci] = abs(l_minS-r_minS)\n", 98 | " \n", 99 | " costs = costs.reshape([-1,mdisp])\n", 100 | " gt = gt.reshape([-1])\n", 101 | " l_r_match = l_r_match.reshape([-1])\n", 102 | " n = gt.shape[0]\n", 103 | " \n", 104 | " idx = np.where(gt != -2)[0]\n", 105 | " costs = costs[gt != -2]\n", 106 | " max_cost = costs.max()\n", 107 | " gt = gt[gt != -2]\n", 108 | " l_r_match = l_r_match[gt != -2]\n", 109 | " best = np.argmax(costs,1)\n", 110 | " \n", 111 | " valid = (gt != -1).astype(np.int)\n", 112 | " est_correct = (best == np.round(gt)).astype(np.int)\n", 113 | " \n", 114 | " minv = np.min(costs,1)\n", 115 | " maxv = np.max(costs,1)\n", 116 | " median = np.median(costs,1)\n", 117 | " left_d = np.array([costs[idx,v-1]-costs[idx,v] if v > 0 else max_cost for idx,v in enumerate(best)])\n", 118 | " right_d = np.array([costs[idx,v+1]-costs[idx,v] if v < mdisp-1 else max_cost for idx,v in enumerate(best)])\n", 119 | " second_peak = np.array([second_peak_f(costs[idx,:],v,max_cost) for idx,v in enumerate(best)])\n", 120 | " \n", 121 | " return np.vstack([valid,est_correct,minv,maxv,median,left_d,right_d,l_r_match,second_peak])\n", 122 | " \n", 123 | "ft = stereo_features(box_costs,gt).T" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 131, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "[ 0. 0. 0. 0. 0. -553. -511. 0. 0.]\n", 138 | "[ 1.00000000e+00 1.00000000e+00 7.22000000e+02 1.13200000e+03\n", 139 | " 9.01000000e+02 1.13200000e+03 1.13200000e+03 7.20000000e+01\n", 140 | " 1.13200000e+03]\n", 141 | "[ 9.16025674e-01 5.91685967e-04 2.42227438e+02 7.32355214e+02\n", 142 | " 5.41955984e+02 3.44312771e+00 -1.45779786e+01 4.05303926e+00\n", 143 | " 2.49582163e+02]\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "ft.shape\n", 149 | "ft[0,:]\n", 150 | "x_min = np.min(ft,0)\n", 151 | "x_max = np.max(ft,0)\n", 152 | "x_mean = np.mean(ft,0)\n", 153 | "print x_min\n", 154 | "print x_max\n", 155 | "print x_mean\n", 156 | "\n", 157 | "#x_initial = np.array([0,512,256,50,50,0." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "data = []\n", 169 | "i = 0\n", 170 | " \n", 171 | "while True:\n", 172 | " try:\n", 173 | " l = np.load('l{0}.npy'.format(i)).astype(np.float)\n", 174 | " gt = np.load('g{0}.npy'.format(i)).astype(np.float)\n", 175 | " costs = np.load('b{0}.npy'.format(i)).astype(np.float)\n", 176 | " #acosts = np.load('a{0}.npy'.format(i)).astype(np.float)\n", 177 | " box_costs = (filters.uniform_filter(costs,[7,7,1],mode='nearest')*(7*7)).astype(np.int)\n", 178 | "\n", 179 | " best = np.argmin(box_costs,2)\n", 180 | " except:\n", 181 | " break" 182 | ] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 2", 188 | "language": "python", 189 | "name": "python2" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 2 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython2", 201 | "version": "2.7.13" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | -------------------------------------------------------------------------------- /github_census/rms_error.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "metric", 3 | "description": "basic rms error metrics", 4 | "config": { }, 5 | "supports_batch": false, 6 | "setup": [], 7 | "command": "../rms_error", 8 | "prints_output": true, 9 | 10 | "left_mono": "./middlebury/motorcycle/left/mono.png", 11 | "left_rgb": "./middlebury/motorcycle/left/rgb.png", 12 | "right_mono": "./middlebury/motorcycle/right/mono.png", 13 | "right_rgb": "./middlebury/motorcycle/right/rgb.png", 14 | "gt": "./middlebury/motorcycle/gt/gt.pfm", 15 | "gt_mask": "./middlebury/motorcycle/gt/mask.pfm", 16 | "output_disp": "./out.pfm", 17 | "output_conf": "./conf.pfm", 18 | "output": "rms_out.json", 19 | "maxint": 65535, 20 | "minint": 255, 21 | "maxdisp": 70 22 | } 23 | -------------------------------------------------------------------------------- /github_census/run_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os,sys,platform 4 | import json,hashlib 5 | from collections import defaultdict 6 | from util import * 7 | from subprocess import call,check_output 8 | import copy 9 | import numpy as np 10 | def extract_fields(dataset,name,out_dir): 11 | data = {k: v[name] for k,v in dataset.iteritems() if type(v) == type({})} 12 | data["left_mono"] = data["data"]["left"]["mono"] 13 | data["left_rgb"] = data["data"]["left"]["rgb"] 14 | data["right_mono"] = data["data"]["right"]["mono"] 15 | data["right_rgb"] = data["data"]["right"]["rgb"] 16 | data["output_disp"] = os.path.join(out_dir,name + '_disp.pfm') 17 | data["output_conf"] = os.path.join(out_dir,name + '_conf.pfm') 18 | #data["costs"] = os.path.join(out_dir,name + '_cost.pmm') 19 | return data 20 | 21 | def run_alg(dataset,algorithm,out_dir): 22 | for name in dataset['names']: 23 | #print name 24 | data = extract_fields(dataset,name,out_dir) 25 | alg_copy = copy.deepcopy(data) 26 | alg_copy['gt'] = '' 27 | alg_copy['gt_mask'] = '' 28 | algorithm.update(alg_copy) 29 | cfg_path = os.path.join(out_dir,name + '_config.json') 30 | with open(cfg_path,'w') as fp: 31 | json.dump(algorithm,fp) 32 | 33 | with cd(algorithm['dir']): 34 | call([algorithm['command'],cfg_path]) 35 | 36 | def run_all_algs(): 37 | out_dir = '.' 38 | saved_file = os.path.join(out_dir,'finished_results.json') 39 | metric_results = os.path.join(out_dir,'metric_results.json') 40 | dataset_dir = '.' 41 | metrics_dir = '.' 42 | algs_dir = '.' 43 | 44 | try: 45 | with open(saved_file) as fp: 46 | saved = json.load(fp) 47 | except: 48 | saved = defaultdict(str) 49 | 50 | datasets = loadFiles(dataset_dir,'dataset') 51 | algorithms = loadFiles(algs_dir,'algorithm') 52 | metrics = loadFiles(metrics_dir,'metric') 53 | 54 | check_and_make_dir(out_dir) 55 | 56 | results = {} 57 | for algorithm in algorithms: 58 | results[algorithm['name']] = {} 59 | for dataset in datasets: 60 | unique_out = os.path.join(out_dir,algorithm['name'],dataset['name']) 61 | unique_exists = os.path.exists(unique_out) 62 | check_and_make_dir(unique_out) 63 | 64 | hashkey = '_'.join([ hashlib.sha1(json.dumps(dataset)).hexdigest(), \ 65 | hashlib.sha1(json.dumps(algorithm)).hexdigest(), \ 66 | path_checksum([algorithm['command']] + algorithm['dependent_files']) ]) 67 | if (not unique_exists) or (saved[unique_out] != hashkey): 68 | run_alg(dataset,algorithm,unique_out) 69 | saved[unique_out] = hashkey 70 | print unique_out,hashkey 71 | result = {} 72 | for name in dataset['names']: 73 | data = extract_fields(dataset,name,unique_out) 74 | result[name] = {} 75 | for metric in metrics: 76 | metric.update(data) 77 | metric['output'] = '' 78 | 79 | #with open(metric_cfg_path,'w') as fp: 80 | # json.dump(metric,fp) 81 | #print metric_cfg_path, name, unique_out 82 | res = check_output([metric['command'],json.dumps(metric)]) 83 | resn = json.loads(res) 84 | result[name].update(json.loads(res)) 85 | #print res#json.dumps(metric, sort_keys=True,indent=4, separators=(',', ': ')) 86 | #with open(metric['output'],'r') as fp: 87 | # res = json.load(fp) 88 | #if name in results[unique_name]: 89 | # results[unique_name][name].update(res) 90 | #else: 91 | # results[unique_name][name] = res 92 | unique_name = '_'.join([dataset['name'],algorithm['name']]) 93 | results[algorithm['name']][dataset['name']] = result 94 | with open(saved_file,'w') as fp: 95 | json.dump(saved,fp) 96 | with open(metric_results,'w') as fp: 97 | json.dump(results,fp) 98 | return results 99 | 100 | if __name__ == '__main__': 101 | results = run_all_algs() 102 | short_results = {} 103 | geomean = lambda n: reduce(lambda x,y: x*y, n) ** (1.0 / len(n)) 104 | for alg,ds in results.iteritems(): 105 | rz = [] 106 | for dsn,de in ds.iteritems(): 107 | for den,res in de.iteritems(): 108 | nd = {k:v for k,v in res.iteritems()} 109 | rz.append(nd) 110 | metric_names = rz[0].keys() 111 | rzz = {m: np.mean([y[m]['result'] for y in rz]) for m in metric_names} 112 | short_results[alg] = rzz 113 | print json.dumps(short_results, sort_keys=True,indent=4, separators=(',', ': ')) 114 | 115 | -------------------------------------------------------------------------------- /github_census/util.py: -------------------------------------------------------------------------------- 1 | import re, shutil, sys, os 2 | import numpy as np 3 | import json 4 | import hashlib 5 | from os.path import normpath, walk, isdir, isfile, dirname, basename, \ 6 | exists as path_exists, join as path_join 7 | from contextlib import contextmanager 8 | import os 9 | 10 | @contextmanager 11 | def cd(newdir): 12 | prevdir = os.getcwd() 13 | os.chdir(os.path.expanduser(newdir)) 14 | try: 15 | yield 16 | finally: 17 | os.chdir(prevdir) 18 | def path_checksum(paths): 19 | """ 20 | Recursively calculates a checksum representing the contents of all files 21 | found with a sequence of file and/or directory paths. 22 | 23 | """ 24 | if not hasattr(paths, '__iter__'): 25 | raise TypeError('sequence or iterable expected not %r!' % type(paths)) 26 | 27 | def _update_checksum(checksum, dirname, filenames): 28 | for filename in sorted(filenames): 29 | path = path_join(dirname, filename) 30 | if isfile(path): 31 | #print path 32 | fh = open(path, 'rb') 33 | while 1: 34 | buf = fh.read(4096) 35 | if not buf : break 36 | checksum.update(buf) 37 | fh.close() 38 | 39 | chksum = hashlib.sha1() 40 | 41 | for path in sorted([normpath(f) for f in paths]): 42 | if path_exists(path): 43 | if isdir(path): 44 | walk(path, _update_checksum, chksum) 45 | elif isfile(path): 46 | _update_checksum(chksum, dirname(path), basename(path)) 47 | 48 | return chksum.hexdigest() 49 | 50 | 51 | def loadFiles(folder,t): 52 | res = [] 53 | for f in os.listdir(folder): 54 | jd = os.path.join(folder,f) 55 | try: 56 | with open(jd) as fp: 57 | d = json.load(fp) 58 | if d['type'] == t: 59 | base, ext = os.path.splitext(f) 60 | d['name'] = base 61 | res.append(d) 62 | except: 63 | pass 64 | return res 65 | 66 | def load_pfm(fname): 67 | color = None 68 | width = None 69 | height = None 70 | scale = None 71 | endian = None 72 | 73 | file = open(fname,'rU') 74 | header = file.readline().rstrip() 75 | if header == 'PF': 76 | color = True 77 | elif header == 'Pf': 78 | color = False 79 | else: 80 | raise Exception('Not a PFM file.') 81 | 82 | dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) 83 | if dim_match: 84 | width, height = map(int, dim_match.groups()) 85 | else: 86 | raise Exception('Malformed PFM header.') 87 | 88 | scale = float(file.readline().rstrip()) 89 | if scale < 0: # little-endian 90 | endian = '<' 91 | scale = -scale 92 | else: 93 | endian = '>' # big-endian 94 | 95 | data = np.fromfile(file, endian + 'f') 96 | shape = (height, width, 3) if color else (height, width) 97 | return np.flipud(np.reshape(data, shape)), scale 98 | 99 | 100 | def load_psm(fname): 101 | color = None 102 | width = None 103 | height = None 104 | scale = None 105 | endian = None 106 | 107 | file = open(fname,'rU') 108 | header = file.readline().rstrip() 109 | if header == 'PF': 110 | color = True 111 | elif header == 'P9': 112 | color = False 113 | else: 114 | raise Exception('Not a PFM file.') 115 | 116 | dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) 117 | if dim_match: 118 | width, height = map(int, dim_match.groups()) 119 | else: 120 | raise Exception('Malformed PFM header.') 121 | 122 | scale = float(file.readline().rstrip()) 123 | 124 | data = np.fromfile(file,np.uint16) 125 | shape = (height, width, 3) if color else (height, width) 126 | return np.flipud(np.reshape(data, shape)), scale 127 | 128 | 129 | def save_pfm(fname, image, scale=1): 130 | file = open(fname, 'wb') 131 | color = None 132 | 133 | if image.dtype.name != 'float32': 134 | raise Exception('Image dtype must be float32.') 135 | 136 | if len(image.shape) == 3 and image.shape[2] == 3: # color image 137 | color = True 138 | elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale 139 | color = False 140 | else: 141 | raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.') 142 | 143 | file.write('PF\n' if color else 'Pf\n') 144 | file.write('%d %d\n' % (image.shape[1], image.shape[0])) 145 | 146 | endian = image.dtype.byteorder 147 | 148 | if endian == '<' or endian == '=' and sys.byteorder == 'little': 149 | scale = -scale 150 | 151 | file.write('%f\n' % scale) 152 | 153 | np.flipud(image).tofile(file) 154 | 155 | def check_and_make_dir(directory): 156 | if not os.path.exists(directory): 157 | os.makedirs(directory) 158 | 159 | def get_default_dataset_config(): 160 | config = {} 161 | 162 | config['description'] = '' 163 | config['names'] = [] 164 | config['data'] = {} 165 | config['maxdisp'] = {} 166 | config['fx'] = {} 167 | config['fy'] = {} 168 | config['px'] = {} 169 | config['py'] = {} 170 | config['dpx'] = {} 171 | config['gt'] = {} 172 | config['gt_mask'] = {} 173 | config['baseline'] = {} 174 | config['width'] = {} 175 | config['height'] = {} 176 | config['maxint'] = {} 177 | config['minint'] = {} 178 | config['type'] = 'dataset' 179 | return config -------------------------------------------------------------------------------- /learning/1dcnn-basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import accuracy_score 4 | 5 | # load data 6 | gt = np.loadtxt('gt.csv',delimiter=',') 7 | sgbm = np.loadtxt('sgbm.csv',delimiter=',') 8 | raw = np.loadtxt('raw.csv',delimiter=',') 9 | 10 | # data shape 11 | disp_dim = raw.shape[1] 12 | N = gt.shape[0] 13 | 14 | # split things 15 | def split_data(data,pt): 16 | return data[:pt], data[pt:] 17 | rand_perm = np.random.permutation(N) 18 | RAND_FRAC = int(round(0.66 * N)) 19 | gt = gt[rand_perm] 20 | raw_orig = np.copy(raw) 21 | raw = raw[rand_perm,:] 22 | sgbm = sgbm[rand_perm,:] 23 | train_gt, test_gt = split_data(gt,RAND_FRAC) 24 | train_raw, test_raw = split_data(raw,RAND_FRAC) 25 | train_sgbm, test_sgbm = split_data(sgbm,RAND_FRAC) 26 | 27 | naive_raw = np.argmin(test_raw,1) 28 | naive_sgbm = np.argmin(test_sgbm,1) 29 | 30 | print 'raw accuracy ', accuracy_score(test_gt,naive_raw) 31 | print 'sgbm accuracy ', accuracy_score(test_gt,naive_sgbm) 32 | 33 | from sklearn.linear_model import SGDClassifier, LogisticRegression 34 | from sklearn.ensemble import RandomForestClassifier 35 | 36 | #clf = SGDClassifier(n_jobs=-1) 37 | #clf.fit(train_raw,train_gt) 38 | #pred = clf.predict(test_raw) 39 | #print 'linearsvm accuracy ', accuracy_score(test_gt,pred) 40 | 41 | #clf = LogisticRegression(n_jobs=-1) 42 | #clf.fit(train_raw,train_gt) 43 | #pred = clf.predict(test_raw) 44 | #print 'logistic accuracy ', accuracy_score(test_gt,pred) 45 | 46 | #clf = RandomForestClassifier(min_samples_leaf=20,n_jobs=-1) 47 | #clf.fit(train_raw,train_gt) 48 | #pred = clf.predict(test_raw) 49 | #print 'rfc accuracy ', accuracy_score(test_gt,pred) 50 | #pred = clf.predict(raw_orig) 51 | #with open('rfc.txt','w') as otf: 52 | # for p in pred: 53 | # otf.write(str(int(p)) + '\n') 54 | from keras.utils.np_utils import to_categorical 55 | 56 | one_hot_train = to_categorical(train_gt,disp_dim) 57 | one_hot_test = to_categorical(test_gt,disp_dim) 58 | 59 | from keras.models import Sequential 60 | from keras.optimizers import SGD,Adam 61 | from keras.regularizers import * 62 | from keras.layers import Dense,Activation,Convolution1D,Flatten,Dropout 63 | 64 | model = Sequential() 65 | model.add(Convolution1D(16,9,border_mode='same',input_dim=1,input_length=70)) 66 | model.add(Activation('relu')) 67 | model.add(Convolution1D(16,9,border_mode='same')) 68 | model.add(Activation('relu')) 69 | model.add(Convolution1D(1,1,border_mode='same')) 70 | model.add(Flatten()) 71 | model.add(Dense(32)) 72 | model.add(Activation('relu')) 73 | model.add(Dense(disp_dim)) 74 | model.add(Activation('softmax')) 75 | model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001),metrics=['accuracy']) 76 | X = -train_raw + train_raw.mean() 77 | X = X.reshape((-1,70,1)) 78 | model.fit(X,one_hot_train,nb_epoch=24,batch_size=128,verbose=2) 79 | X = -test_raw + test_raw.mean() 80 | X = X.reshape((-1,70,1)) 81 | pred = model.predict_classes(X) 82 | print '2lyer nn accuracy ', accuracy_score(test_gt,pred) 83 | X = -raw_orig + train_raw.mean() 84 | pred = model.predict_classes(X) 85 | with open('1dcnn.txt','w') as otf: 86 | for p in pred: 87 | otf.write(str(int(p)) + '\n') 88 | 89 | -------------------------------------------------------------------------------- /learning/1dcnn-nin-basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import accuracy_score 4 | 5 | # load data 6 | gt = np.loadtxt('gt.csv',delimiter=',') 7 | sgbm = np.loadtxt('sgbm.csv',delimiter=',') 8 | raw = np.loadtxt('raw.csv',delimiter=',') 9 | 10 | # data shape 11 | disp_dim = raw.shape[1] 12 | N = gt.shape[0] 13 | 14 | # split things 15 | def split_data(data,pt): 16 | return data[:pt], data[pt:] 17 | rand_perm = np.random.permutation(N) 18 | RAND_FRAC = int(round(0.66 * N)) 19 | gt = gt[rand_perm] 20 | raw_orig = np.copy(raw) 21 | raw = raw[rand_perm,:] 22 | sgbm = sgbm[rand_perm,:] 23 | train_gt, test_gt = split_data(gt,RAND_FRAC) 24 | train_raw, test_raw = split_data(raw,RAND_FRAC) 25 | train_sgbm, test_sgbm = split_data(sgbm,RAND_FRAC) 26 | 27 | naive_raw = np.argmin(test_raw,1) 28 | naive_sgbm = np.argmin(test_sgbm,1) 29 | 30 | print 'raw accuracy ', accuracy_score(test_gt,naive_raw) 31 | print 'sgbm accuracy ', accuracy_score(test_gt,naive_sgbm) 32 | 33 | from sklearn.linear_model import SGDClassifier, LogisticRegression 34 | from sklearn.ensemble import RandomForestClassifier 35 | 36 | #clf = SGDClassifier(n_jobs=-1) 37 | #clf.fit(train_raw,train_gt) 38 | #pred = clf.predict(test_raw) 39 | #print 'linearsvm accuracy ', accuracy_score(test_gt,pred) 40 | 41 | #clf = LogisticRegression(n_jobs=-1) 42 | #clf.fit(train_raw,train_gt) 43 | #pred = clf.predict(test_raw) 44 | #print 'logistic accuracy ', accuracy_score(test_gt,pred) 45 | 46 | #clf = RandomForestClassifier(min_samples_leaf=20,n_jobs=-1) 47 | #clf.fit(train_raw,train_gt) 48 | #pred = clf.predict(test_raw) 49 | #print 'rfc accuracy ', accuracy_score(test_gt,pred) 50 | #pred = clf.predict(raw_orig) 51 | #with open('rfc.txt','w') as otf: 52 | # for p in pred: 53 | # otf.write(str(int(p)) + '\n') 54 | from keras.utils.np_utils import to_categorical 55 | 56 | one_hot_train = to_categorical(train_gt,disp_dim) 57 | one_hot_test = to_categorical(test_gt,disp_dim) 58 | 59 | from keras.models import Sequential 60 | from keras.optimizers import SGD,Adam 61 | from keras.regularizers import * 62 | from keras.layers import Dense,Activation,Convolution1D,Flatten,Dropout,AveragePooling1D 63 | 64 | model = Sequential() 65 | model.add(Convolution1D(16,9,border_mode='same',input_dim=1,input_length=70)) 66 | model.add(Activation('relu')) 67 | model.add(Convolution1D(32,9,border_mode='same',subsample_length=2)) 68 | model.add(Activation('relu')) 69 | model.add(Convolution1D(64,9,border_mode='same',subsample_length=5)) 70 | model.add(Activation('relu')) 71 | model.add(Convolution1D(128,7,border_mode='same')) 72 | model.add(Activation('relu')) 73 | model.add(AveragePooling1D(7)) 74 | model.add(Flatten()) 75 | model.add(Dense(disp_dim)) 76 | model.add(Activation('softmax')) 77 | model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001),metrics=['accuracy']) 78 | X = -train_raw + train_raw.mean() 79 | X = X.reshape((-1,70,1)) 80 | model.fit(X,one_hot_train,nb_epoch=24,batch_size=128,verbose=2) 81 | X = -test_raw + test_raw.mean() 82 | X = X.reshape((-1,70,1)) 83 | pred = model.predict_classes(X) 84 | print '2lyer nn accuracy ', accuracy_score(test_gt,pred) 85 | X = -raw_orig + train_raw.mean() 86 | X = X.reshape((-1,70,1)) 87 | pred = model.predict_classes(X) 88 | with open('1dcnn-nin.txt','w') as otf: 89 | for p in pred: 90 | otf.write(str(int(p)) + '\n') 91 | 92 | -------------------------------------------------------------------------------- /learning/basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import accuracy_score 4 | 5 | # load data 6 | folders = ['moto/','piano/','pipes/'] 7 | gt = [] 8 | sgbm = [] 9 | raw = [] 10 | for f in folders: 11 | gt.append(np.loadtxt(f + 'gt.csv',delimiter=',')) 12 | sgbm.append(np.loadtxt(f + 'sgbm.csv',delimiter=',')) 13 | raw.append(np.loadtxt(f + 'raw.csv',delimiter=',')) 14 | gt = np.hstack(gt) 15 | sgbm = np.vstack(sgbm) 16 | raw = np.vstack(raw) 17 | 18 | # data shape 19 | disp_dim = raw.shape[1] 20 | N = gt.shape[0] 21 | 22 | # split things 23 | def split_data(data,pt): 24 | return data[:pt], data[pt:] 25 | rand_perm = np.random.permutation(N) 26 | RAND_FRAC = int(round(0.8 * N)) 27 | gt = gt[rand_perm] 28 | raw_orig = np.copy(raw) 29 | raw = raw[rand_perm,:] 30 | sgbm = sgbm[rand_perm,:] 31 | train_gt, test_gt = split_data(gt,RAND_FRAC) 32 | train_raw, test_raw = split_data(raw,RAND_FRAC) 33 | train_sgbm, test_sgbm = split_data(sgbm,RAND_FRAC) 34 | 35 | naive_raw = np.argmin(test_raw,1) 36 | naive_sgbm = np.argmin(test_sgbm,1) 37 | 38 | print 'raw accuracy ', accuracy_score(test_gt,naive_raw) 39 | print 'sgbm accuracy ', accuracy_score(test_gt,naive_sgbm) 40 | 41 | from sklearn.linear_model import SGDClassifier, LogisticRegression 42 | from sklearn.ensemble import RandomForestClassifier 43 | 44 | from keras.utils.np_utils import to_categorical 45 | 46 | one_hot_train = to_categorical(train_gt,disp_dim) 47 | one_hot_test = to_categorical(test_gt,disp_dim) 48 | 49 | from keras.models import Sequential 50 | from keras.optimizers import SGD,Adam 51 | from keras.regularizers import * 52 | from keras.layers import Dense,Activation,Convolution1D,Flatten,Dropout,AveragePooling1D 53 | 54 | model = Sequential() 55 | model.add(Convolution1D(8,9,border_mode='same',input_dim=1,input_length=70)) 56 | model.add(Activation('relu')) 57 | model.add(Convolution1D(16,9,border_mode='same',subsample_length=2)) 58 | model.add(Activation('relu')) 59 | model.add(Convolution1D(32,9,border_mode='same',subsample_length=5)) 60 | model.add(Activation('relu')) 61 | model.add(Convolution1D(64,5,border_mode='same')) 62 | model.add(Activation('relu')) 63 | model.add(AveragePooling1D(7)) 64 | model.add(Flatten()) 65 | model.add(Dense(disp_dim)) 66 | model.add(Activation('softmax')) 67 | model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001),metrics=['accuracy']) 68 | 69 | X = -train_raw + 3000 70 | X = X.reshape((-1,70,1)) 71 | model.fit(X,one_hot_train,nb_epoch=10,batch_size=128,verbose=2) 72 | X = -test_raw + 3000 73 | X = X.reshape((-1,70,1)) 74 | pred = model.predict_classes(X,128) 75 | print '2lyer nn accuracy ', accuracy_score(test_gt,pred) 76 | 77 | json_string = model.to_json() 78 | open('newest_model.json', 'w').write(json_string) 79 | model.save_weights('newest_model.h5') 80 | 81 | X = -raw_orig + 3000 82 | X = X.reshape((-1,70,1)) 83 | pred = model.predict_classes(X,128) 84 | with open('1dcnn-nin.txt','w') as otf: 85 | for p in pred: 86 | otf.write(str(int(p)) + '\n') 87 | 88 | -------------------------------------------------------------------------------- /learning/converging-nn-basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import accuracy_score 4 | 5 | # load data 6 | gt = np.loadtxt('gt.csv',delimiter=',') 7 | sgbm = np.loadtxt('sgbm.csv',delimiter=',') 8 | raw = np.loadtxt('raw.csv',delimiter=',') 9 | 10 | # data shape 11 | disp_dim = raw.shape[1] 12 | N = gt.shape[0] 13 | 14 | # split things 15 | def split_data(data,pt): 16 | return data[:pt], data[pt:] 17 | rand_perm = np.random.permutation(N) 18 | RAND_FRAC = int(round(0.66 * N)) 19 | gt = gt[rand_perm] 20 | raw_orig = np.copy(raw) 21 | raw = raw[rand_perm,:] 22 | sgbm = sgbm[rand_perm,:] 23 | train_gt, test_gt = split_data(gt,RAND_FRAC) 24 | train_raw, test_raw = split_data(raw,RAND_FRAC) 25 | train_sgbm, test_sgbm = split_data(sgbm,RAND_FRAC) 26 | 27 | naive_raw = np.argmin(test_raw,1) 28 | naive_sgbm = np.argmin(test_sgbm,1) 29 | 30 | print 'raw accuracy ', accuracy_score(test_gt,naive_raw) 31 | print 'sgbm accuracy ', accuracy_score(test_gt,naive_sgbm) 32 | 33 | from sklearn.linear_model import SGDClassifier, LogisticRegression 34 | from sklearn.ensemble import RandomForestClassifier 35 | 36 | #clf = SGDClassifier(n_jobs=-1) 37 | #clf.fit(train_raw,train_gt) 38 | #pred = clf.predict(test_raw) 39 | #print 'linearsvm accuracy ', accuracy_score(test_gt,pred) 40 | 41 | #clf = LogisticRegression(n_jobs=-1) 42 | #clf.fit(train_raw,train_gt) 43 | #pred = clf.predict(test_raw) 44 | #print 'logistic accuracy ', accuracy_score(test_gt,pred) 45 | 46 | #clf = RandomForestClassifier(min_samples_leaf=20,n_jobs=-1) 47 | #clf.fit(train_raw,train_gt) 48 | #pred = clf.predict(test_raw) 49 | #print 'rfc accuracy ', accuracy_score(test_gt,pred) 50 | #pred = clf.predict(raw_orig) 51 | #with open('rfc.txt','w') as otf: 52 | # for p in pred: 53 | # otf.write(str(int(p)) + '\n') 54 | from keras.utils.np_utils import to_categorical 55 | 56 | one_hot_train = to_categorical(train_gt,disp_dim) 57 | one_hot_test = to_categorical(test_gt,disp_dim) 58 | 59 | from keras.models import Sequential 60 | from keras.optimizers import SGD,Adam 61 | from keras.layers import Dense,Activation 62 | 63 | model = Sequential() 64 | model.add(Dense(output_dim=disp_dim, input_dim=disp_dim,init='identity')) 65 | #model.add(Activation('relu')) 66 | model.add(Dense(output_dim=disp_dim,init='identity')) 67 | model.add(Activation('relu')) 68 | #model.add(Activation('tanh')) 69 | #model.add(Dense(output_dim=128)) 70 | #model.add(Activation('tanh')) 71 | #model.add(Dense(output_dim=disp_dim)) 72 | model.add(Activation('softmax')) 73 | model.compile(loss='hinge', optimizer=SGD(lr=0.0001,momentum=0.9,decay=1e-6,nesterov=True),metrics=['accuracy']) 74 | model.fit(-train_raw+train_raw.mean(),one_hot_train,nb_epoch=24,batch_size=128,verbose=2) 75 | pred = model.predict(-test_raw+train_raw.mean()) 76 | pred = np.argmin(pred,1) 77 | print '2lyer nn accuracy ', accuracy_score(test_gt,pred) 78 | pred = model.predict(-raw_orig+train_raw.mean()) 79 | pred = np.argmin(pred,1) 80 | with open('nn.txt','w') as otf: 81 | for p in pred: 82 | otf.write(str(int(p)) + '\n') 83 | 84 | -------------------------------------------------------------------------------- /learning/cosine-basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import accuracy_score 4 | 5 | # load data 6 | gt = np.loadtxt('gt.csv',delimiter=',') 7 | sgbm = np.loadtxt('sgbm.csv',delimiter=',') 8 | raw = np.loadtxt('raw.csv',delimiter=',') 9 | 10 | # data shape 11 | disp_dim = raw.shape[1] 12 | N = gt.shape[0] 13 | 14 | # split things 15 | def split_data(data,pt): 16 | return data[:pt], data[pt:] 17 | rand_perm = np.random.permutation(N) 18 | RAND_FRAC = int(round(0.66 * N)) 19 | gt = gt[rand_perm] 20 | raw_orig = np.copy(raw) 21 | raw = raw[rand_perm,:] 22 | sgbm = sgbm[rand_perm,:] 23 | train_gt, test_gt = split_data(gt,RAND_FRAC) 24 | train_raw, test_raw = split_data(raw,RAND_FRAC) 25 | train_sgbm, test_sgbm = split_data(sgbm,RAND_FRAC) 26 | 27 | naive_raw = np.argmin(test_raw,1) 28 | naive_sgbm = np.argmin(test_sgbm,1) 29 | 30 | print 'raw accuracy ', accuracy_score(test_gt,naive_raw) 31 | print 'sgbm accuracy ', accuracy_score(test_gt,naive_sgbm) 32 | 33 | from sklearn.linear_model import SGDClassifier, LogisticRegression 34 | from sklearn.ensemble import RandomForestClassifier 35 | 36 | #clf = SGDClassifier(n_jobs=-1) 37 | #clf.fit(train_raw,train_gt) 38 | #pred = clf.predict(test_raw) 39 | #print 'linearsvm accuracy ', accuracy_score(test_gt,pred) 40 | 41 | #clf = LogisticRegression(n_jobs=-1) 42 | #clf.fit(train_raw,train_gt) 43 | #pred = clf.predict(test_raw) 44 | #print 'logistic accuracy ', accuracy_score(test_gt,pred) 45 | 46 | #clf = RandomForestClassifier(min_samples_leaf=20,n_jobs=-1) 47 | #clf.fit(train_raw,train_gt) 48 | #pred = clf.predict(test_raw) 49 | #print 'rfc accuracy ', accuracy_score(test_gt,pred) 50 | #pred = clf.predict(raw_orig) 51 | #with open('rfc.txt','w') as otf: 52 | # for p in pred: 53 | # otf.write(str(int(p)) + '\n') 54 | from keras.utils.np_utils import to_categorical 55 | 56 | one_hot_train = to_categorical(train_gt,disp_dim) 57 | one_hot_test = to_categorical(test_gt,disp_dim) 58 | 59 | from keras.models import Sequential 60 | from keras.optimizers import SGD,Adam 61 | from keras.layers import Dense,Activation 62 | 63 | model = Sequential() 64 | model.add(Dense(output_dim=32, input_dim=disp_dim)) 65 | model.add(Activation('tanh')) 66 | #model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.3)) 67 | model.add(Dense(output_dim=disp_dim)) 68 | model.add(Activation('softmax')) 69 | #model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.06,momentum=0.9,decay=1e-6,nesterov=True),metrics=['accuracy']) 70 | model.compile(loss='cosine_proximity', optimizer=SGD(lr=0.06,momentum=0.9,decay=1e-6,nesterov=True),metrics=['accuracy']) 71 | model.fit(-train_raw+raw_orig.mean(),one_hot_train,nb_epoch=24,batch_size=128,verbose=2) 72 | #pred = model.predict(test_raw) 73 | #pred = np.argmin(pred,1) 74 | #print '2lyer nn accuracy ', accuracy_score(test_gt,pred) 75 | -------------------------------------------------------------------------------- /learning/nin-smaller-basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import accuracy_score 4 | 5 | # load data 6 | gt = np.loadtxt('gt.csv',delimiter=',') 7 | sgbm = np.loadtxt('sgbm.csv',delimiter=',') 8 | raw = np.loadtxt('raw.csv',delimiter=',') 9 | 10 | # data shape 11 | disp_dim = raw.shape[1] 12 | N = gt.shape[0] 13 | 14 | # split things 15 | def split_data(data,pt): 16 | return data[:pt], data[pt:] 17 | rand_perm = np.random.permutation(N) 18 | RAND_FRAC = int(round(0.66 * N)) 19 | gt = gt[rand_perm] 20 | raw_orig = np.copy(raw) 21 | raw = raw[rand_perm,:] 22 | sgbm = sgbm[rand_perm,:] 23 | train_gt, test_gt = split_data(gt,RAND_FRAC) 24 | train_raw, test_raw = split_data(raw,RAND_FRAC) 25 | train_sgbm, test_sgbm = split_data(sgbm,RAND_FRAC) 26 | 27 | naive_raw = np.argmin(test_raw,1) 28 | naive_sgbm = np.argmin(test_sgbm,1) 29 | 30 | print 'raw accuracy ', accuracy_score(test_gt,naive_raw) 31 | print 'sgbm accuracy ', accuracy_score(test_gt,naive_sgbm) 32 | 33 | from sklearn.linear_model import SGDClassifier, LogisticRegression 34 | from sklearn.ensemble import RandomForestClassifier 35 | 36 | #clf = SGDClassifier(n_jobs=-1) 37 | #clf.fit(train_raw,train_gt) 38 | #pred = clf.predict(test_raw) 39 | #print 'linearsvm accuracy ', accuracy_score(test_gt,pred) 40 | 41 | #clf = LogisticRegression(n_jobs=-1) 42 | #clf.fit(train_raw,train_gt) 43 | #pred = clf.predict(test_raw) 44 | #print 'logistic accuracy ', accuracy_score(test_gt,pred) 45 | 46 | #clf = RandomForestClassifier(min_samples_leaf=20,n_jobs=-1) 47 | #clf.fit(train_raw,train_gt) 48 | #pred = clf.predict(test_raw) 49 | #print 'rfc accuracy ', accuracy_score(test_gt,pred) 50 | #pred = clf.predict(raw_orig) 51 | #with open('rfc.txt','w') as otf: 52 | # for p in pred: 53 | # otf.write(str(int(p)) + '\n') 54 | from keras.utils.np_utils import to_categorical 55 | 56 | one_hot_train = to_categorical(train_gt,disp_dim) 57 | one_hot_test = to_categorical(test_gt,disp_dim) 58 | 59 | from keras.models import Sequential 60 | from keras.optimizers import SGD,Adam 61 | from keras.regularizers import * 62 | from keras.layers import Dense,Activation,Convolution1D,Flatten,Dropout,AveragePooling1D 63 | 64 | model = Sequential() 65 | model.add(Convolution1D(8,9,border_mode='same',input_dim=1,input_length=70)) 66 | model.add(Activation('relu')) 67 | model.add(Convolution1D(16,9,border_mode='same',subsample_length=2)) 68 | model.add(Activation('relu')) 69 | model.add(Convolution1D(32,9,border_mode='same',subsample_length=5)) 70 | model.add(Activation('relu')) 71 | model.add(Convolution1D(64,5,border_mode='same')) 72 | model.add(Activation('relu')) 73 | model.add(AveragePooling1D(7)) 74 | model.add(Flatten()) 75 | model.add(Dense(disp_dim)) 76 | model.add(Activation('softmax')) 77 | model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001),metrics=['accuracy']) 78 | X = -train_raw + train_raw.mean() 79 | X = X.reshape((-1,70,1)) 80 | model.fit(X,one_hot_train,nb_epoch=24,batch_size=128,verbose=2) 81 | X = -test_raw + test_raw.mean() 82 | X = X.reshape((-1,70,1)) 83 | pred = model.predict_classes(X) 84 | print '2lyer nn accuracy ', accuracy_score(test_gt,pred) 85 | X = -raw_orig + train_raw.mean() 86 | X = X.reshape((-1,70,1)) 87 | pred = model.predict_classes(X) 88 | with open('1dcnn-nin.txt','w') as otf: 89 | for p in pred: 90 | otf.write(str(int(p)) + '\n') 91 | 92 | -------------------------------------------------------------------------------- /learning/results.md: -------------------------------------------------------------------------------- 1 | raw accuracy 0.703764520427 2 | sgbm accuracy 0.743595999024 3 | rfc accuracy 0.839698237844 4 | 5 | 1d CNN: 0.7639 6 | 1d CNN NIN: 0.904948674161 7 | -------------------------------------------------------------------------------- /learning/run_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from keras.utils.np_utils import to_categorical 4 | from keras.models import Sequential 5 | from keras.optimizers import SGD,Adam 6 | from keras.regularizers import * 7 | from keras.layers import Dense,Activation,Convolution1D,Flatten,Dropout,AveragePooling1D 8 | from keras.models import model_from_json 9 | import os, sys 10 | from sklearn.metrics import accuracy_score 11 | 12 | # load data 13 | f = 'teddy/' 14 | model_name = 'moto_piano_pipes' 15 | if len(sys.argv) > 2: 16 | f = sys.argv[1] 17 | model_name = sys.argv[2] 18 | gt = [] 19 | sgbm = [] 20 | raw = [] 21 | gt.append(np.loadtxt(f + 'gt.csv',delimiter=',')) 22 | sgbm.append(np.loadtxt(f + 'sgbm.csv',delimiter=',')) 23 | raw.append(np.loadtxt(f + 'raw.csv',delimiter=',')) 24 | gt = np.hstack(gt) 25 | sgbm = np.vstack(sgbm) 26 | raw = np.vstack(raw) 27 | 28 | # data shape 29 | disp_dim = raw.shape[1] 30 | N = gt.shape[0] 31 | 32 | model = model_from_json(open(model_name + '.json').read()) 33 | model.load_weights(model_name + '.h5') 34 | model.compile(loss='categorical_crossentropy', optimizer='sgd') 35 | 36 | naive_raw = np.argmin(raw,1) 37 | naive_sgbm = np.argmin(sgbm,1) 38 | 39 | print 'raw accuracy ', accuracy_score(gt,naive_raw) 40 | print 'sgbm accuracy ', accuracy_score(gt,naive_sgbm) 41 | 42 | X = -raw + raw.mean() 43 | X = X.reshape((-1,70,1)) 44 | pred = model.predict_classes(X,128) 45 | print '2lyer nn accuracy ', accuracy_score(gt,pred) 46 | with open(f + '1dcnn-nin.txt','w') as otf: 47 | for p in pred: 48 | otf.write(str(int(p)) + '\n') 49 | -------------------------------------------------------------------------------- /learning/tflow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | s = tf.Session() 5 | 6 | x = np.zeros((1,2,4)).astype(np.float32) 7 | x[0,:,1] = 1 8 | y = 0*np.ones((2,)).astype(np.int32) 9 | #y[0] = -1 10 | a = tf.constant(x) 11 | aa = tf.squeeze(a) 12 | b = tf.constant(y) 13 | print s.run(tf.nn.sparse_softmax_cross_entropy_with_logits(aa,b)) 14 | 15 | #a = tf.constant(np.array([[.1, .3, .5, .9]])) 16 | #b = tf.constant(np.array([1.0])) 17 | #d = tf.nn.softmax(a) 18 | #cost = tf.reduce_mean(-tf.reduce_sum(c * tf.log(d), reduction_indices=[1])) 19 | # 20 | #b = tf.constant(np.array([[0.0,-1.0,0.0,0.0]])) 21 | #x = tf.constant(2) 22 | #y= tf.constant(5) 23 | #c2 = tf.select(tf.less(x,y),c,d) 24 | #s.run(d) 25 | 26 | -------------------------------------------------------------------------------- /learning/train_2d-orig.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.metrics import accuracy_score 4 | 5 | import sys 6 | import os 7 | import random 8 | 9 | class Neural_Network_2D: 10 | def __init__(self, maxiter=10, eta=1e-3, print_every=1): 11 | self.maxiter = maxiter 12 | self.eta = eta 13 | self.print_every = print_every 14 | #train_X should be ~400x70 15 | def fit(self, train_X,train_y, test_X,test_y): 16 | self.session = tf.InteractiveSession() 17 | self.x = tf.placeholder(tf.float32, [train_X.shape[1],train_X.shape[2]]) 18 | 19 | # output 20 | self.y = tf.placeholder(tf.int64, [train_X.shape[1]]) 21 | 22 | # weights 23 | conv1_weights = tf.Variable( 24 | tf.truncated_normal([5, 5, 1, 32], # 5x5 filter, depth 32. 25 | stddev=0.1)) 26 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[32])) 27 | conv2_weights = tf.Variable( 28 | tf.truncated_normal([5, 5, 32, 1], 29 | stddev=0.1)) 30 | conv2_biases = tf.Variable(tf.constant(0.0, shape=[1])) 31 | 32 | 33 | # model 34 | self.img = tf.reshape(self.x,[1,train_X.shape[1],train_X.shape[2],1]) 35 | 36 | conv = tf.nn.conv2d(self.img, 37 | conv1_weights, 38 | strides=[1, 1, 1, 1], 39 | padding='SAME') 40 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) 41 | pool = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 42 | padding='SAME', name='pool1') 43 | conv = tf.nn.conv2d(pool, 44 | conv2_weights, 45 | strides=[1, 1, 1, 1], 46 | padding='SAME') 47 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) 48 | conv = tf.nn.conv2d(relu, 49 | conv3_weights, 50 | strides=[1, 1, 1, 1], 51 | padding='SAME') 52 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases)) 53 | self.pred = tf.nn.bias_add(conv, conv2_biases) 54 | self.pred = tf.reshape(self.pred,[train_X.shape[1],train_X.shape[2]]) 55 | 56 | # loss 57 | cost = tf.nn.sparse_softmax_cross_entropy_with_logits(self.pred,self.y) 58 | 59 | # optimizer 60 | 61 | self.optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost) 62 | 63 | correct_prediction = tf.equal(tf.argmax(self.pred,1), self.y) 64 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 65 | 66 | init = tf.initialize_all_variables() 67 | loss_value = self.session.run(init) 68 | test_loss = 0 69 | 70 | #print self.pred.eval( 71 | # feed_dict={self.x: train_X[25,:,:], self.y: train_y[25,:]})[0,:] 72 | 73 | for epoch in xrange(self.maxiter): 74 | train_accuracy = accuracy.eval( 75 | feed_dict={self.x: train_X[0,:,:], self.y: train_y[0,:]}) 76 | #print("step %d, training accuracy %f, loss %f"%(epoch, train_accuracy,test_loss)) 77 | print epoch, train_accuracy 78 | #idxs = np.random.permutation(train1.shape[0]) 79 | for i in xrange(0, train_X.shape[0]): 80 | _,test_loss = self.session.run([self.optimizer,cost], 81 | feed_dict={self.x: train_X[i,:,:], self.y : train_y[i,:]}) 82 | test_accuracy = 0.0 83 | test_count = 0.0 84 | for i in xrange(0, test_X.shape[0]): 85 | test_accuracy += accuracy.eval( 86 | feed_dict={self.x: test_X[i,:,:], self.y : test_y[i,:]}) 87 | test_count += 1 88 | print 'test err', test_accuracy/test_count 89 | 90 | if len(sys.argv) > 1: 91 | mod = Neural_Network_2D(maxiter=int(sys.argv[1])) 92 | else: 93 | mod = Neural_Network_2D() 94 | 95 | if not os.path.isfile('gt.npy'): 96 | folders = ['moto2d/'] 97 | model_name = 'moto2d_model' 98 | gt = [] 99 | sgbm = [] 100 | raw = [] 101 | for f in folders: 102 | gt.append(np.loadtxt(f + 'gt.csv',delimiter=',')) 103 | sgbm.append(np.loadtxt(f + 'sgbm.csv',delimiter=',')) 104 | raw.append(np.loadtxt(f + 'raw.csv',delimiter=',')) 105 | gt = np.hstack(gt).astype(np.int32) 106 | sgbm = np.vstack(sgbm).astype(np.float32) 107 | raw = np.vstack(raw).astype(np.float32) 108 | sgbm = sgbm.reshape(gt.shape[0],gt.shape[1],-1) 109 | raw = raw.reshape(gt.shape[0],gt.shape[1],-1) 110 | np.save('gt.npy',gt) 111 | np.save('raw.npy',raw) 112 | np.save('sgbm.npy',sgbm) 113 | else: 114 | gt = np.load('gt.npy').astype(np.float32) 115 | raw = np.load('raw.npy').astype(np.float32) 116 | sgbm = np.load('sgbm.npy').astype(np.float32) 117 | 118 | N = gt.shape[0] 119 | 120 | # split things 121 | def split_data(data,pt): 122 | return data[:pt], data[pt:] 123 | rand_perm = np.random.permutation(N) 124 | RAND_FRAC = int(round(0.8 * N)) 125 | gt = gt[rand_perm] 126 | raw_orig = np.copy(raw) 127 | raw = raw[rand_perm,:] 128 | sgbm = sgbm[rand_perm,:] 129 | train_gt, test_gt = split_data(gt,RAND_FRAC) 130 | train_raw, test_raw = split_data(-raw+4000,RAND_FRAC) 131 | train_sgbm, test_sgbm = split_data(-sgbm+7500000,RAND_FRAC) 132 | 133 | mod.fit(train_raw,train_gt,test_raw,test_gt) -------------------------------------------------------------------------------- /learning/train_2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.metrics import accuracy_score 4 | 5 | import sys 6 | import os 7 | import random 8 | 9 | class Neural_Network_2D: 10 | def __init__(self, maxiter=10, eta=1e-3, print_every=1): 11 | self.maxiter = maxiter 12 | self.eta = eta 13 | self.print_every = print_every 14 | #train_X should be ~400x70 15 | def fit(self, train_X,train_y, test_X,test_y): 16 | self.session = tf.InteractiveSession() 17 | self.x = tf.placeholder(tf.float32, [train_X.shape[1],train_X.shape[2]]) 18 | 19 | # output 20 | self.y = tf.placeholder(tf.int64, [train_X.shape[1]]) 21 | 22 | # weights 23 | conv1_weights = tf.Variable( 24 | tf.truncated_normal([5, 5, 1, 32], # 5x5 filter, depth 32. 25 | stddev=0.1)) 26 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[32])) 27 | conv2_weights = tf.Variable( 28 | tf.truncated_normal([5, 5, 32, 64], 29 | stddev=0.1)) 30 | conv2_biases = tf.Variable(tf.constant(0.0, shape=[64])) 31 | conv3_weights = tf.Variable( 32 | tf.truncated_normal([5, 5, 64, 1], 33 | stddev=0.1)) 34 | conv3_biases = tf.Variable(tf.constant(0.0, shape=[1])) 35 | 36 | 37 | # model 38 | self.img = tf.reshape(self.x,[1,train_X.shape[1],train_X.shape[2],1]) 39 | 40 | conv = tf.nn.conv2d(self.img, 41 | conv1_weights, 42 | strides=[1, 1, 1, 1], 43 | padding='SAME') 44 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) 45 | conv = tf.nn.conv2d(relu, 46 | conv2_weights, 47 | strides=[1, 1, 1, 1], 48 | padding='SAME') 49 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) 50 | conv = tf.nn.conv2d(relu, 51 | conv3_weights, 52 | strides=[1, 1, 1, 1], 53 | padding='SAME') 54 | self.pred = tf.nn.bias_add(conv, conv3_biases) 55 | self.pred = tf.reshape(self.pred,[train_X.shape[1],train_X.shape[2]]) 56 | 57 | # loss 58 | cost = tf.nn.sparse_softmax_cross_entropy_with_logits(self.pred,self.y) 59 | 60 | # optimizer 61 | 62 | self.optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost) 63 | 64 | correct_prediction = tf.equal(tf.argmax(self.pred,1), self.y) 65 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 66 | 67 | init = tf.initialize_all_variables() 68 | loss_value = self.session.run(init) 69 | test_loss = 0 70 | 71 | #print self.pred.eval( 72 | # feed_dict={self.x: train_X[25,:,:], self.y: train_y[25,:]})[0,:] 73 | 74 | for epoch in xrange(self.maxiter): 75 | train_accuracy = accuracy.eval( 76 | feed_dict={self.x: train_X[0,:,:], self.y: train_y[0,:]}) 77 | #print("step %d, training accuracy %f, loss %f"%(epoch, train_accuracy,test_loss)) 78 | print epoch, train_accuracy 79 | #idxs = np.random.permutation(train1.shape[0]) 80 | for i in xrange(0, train_X.shape[0]): 81 | _,test_loss = self.session.run([self.optimizer,cost], 82 | feed_dict={self.x: train_X[i,:,:], self.y : train_y[i,:]}) 83 | test_accuracy = 0.0 84 | test_count = 0.0 85 | for i in xrange(0, test_X.shape[0]): 86 | test_accuracy += accuracy.eval( 87 | feed_dict={self.x: test_X[i,:,:], self.y : test_y[i,:]}) 88 | test_count += 1 89 | print 'test err', test_accuracy/test_count 90 | 91 | if len(sys.argv) > 1: 92 | mod = Neural_Network_2D(maxiter=int(sys.argv[1])) 93 | else: 94 | mod = Neural_Network_2D() 95 | 96 | if not os.path.isfile('gt.npy'): 97 | folders = ['moto2d/'] 98 | model_name = 'moto2d_model' 99 | gt = [] 100 | sgbm = [] 101 | raw = [] 102 | for f in folders: 103 | gt.append(np.loadtxt(f + 'gt.csv',delimiter=',')) 104 | sgbm.append(np.loadtxt(f + 'sgbm.csv',delimiter=',')) 105 | raw.append(np.loadtxt(f + 'raw.csv',delimiter=',')) 106 | gt = np.hstack(gt).astype(np.int32) 107 | sgbm = np.vstack(sgbm).astype(np.float32) 108 | raw = np.vstack(raw).astype(np.float32) 109 | sgbm = sgbm.reshape(gt.shape[0],gt.shape[1],-1) 110 | raw = raw.reshape(gt.shape[0],gt.shape[1],-1) 111 | np.save('gt.npy',gt) 112 | np.save('raw.npy',raw) 113 | np.save('sgbm.npy',sgbm) 114 | else: 115 | gt = np.load('gt.npy').astype(np.float32) 116 | raw = np.load('raw.npy').astype(np.float32) 117 | sgbm = np.load('sgbm.npy').astype(np.float32) 118 | 119 | N = gt.shape[0] 120 | 121 | # split things 122 | def split_data(data,pt): 123 | return data[:pt], data[pt:] 124 | rand_perm = np.random.permutation(N) 125 | RAND_FRAC = int(round(0.8 * N)) 126 | gt = gt[rand_perm] 127 | raw_orig = np.copy(raw) 128 | raw = raw[rand_perm,:] 129 | sgbm = sgbm[rand_perm,:] 130 | train_gt, test_gt = split_data(gt,RAND_FRAC) 131 | train_raw, test_raw = split_data(-raw+4000,RAND_FRAC) 132 | train_sgbm, test_sgbm = split_data(-sgbm+7500000,RAND_FRAC) 133 | 134 | mod.fit(train_raw,train_gt,test_raw,test_gt) -------------------------------------------------------------------------------- /msvc/CensusMatching.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {8136154D-6D04-0953-5698-79794205E5B7} 23 | true 24 | Win32Proj 25 | CensusMatching 26 | 8.1 27 | 28 | 29 | 30 | Application 31 | true 32 | MultiByte 33 | v140 34 | 35 | 36 | Application 37 | false 38 | MultiByte 39 | v140 40 | 41 | 42 | v140 43 | 44 | 45 | v140 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 71f5112c 58 | 59 | 60 | true 61 | bin\$(PlatformShortName)\$(Configuration)\ 62 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 63 | CensusMatching 64 | .exe 65 | 66 | 67 | false 68 | bin\$(PlatformShortName)\$(Configuration)\ 69 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 70 | CensusMatching 71 | .exe 72 | 73 | 74 | bin\$(PlatformShortName)\$(Configuration)\ 75 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 76 | CensusMatching 77 | 78 | 79 | bin\$(PlatformShortName)\$(Configuration)\ 80 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 81 | CensusMatching 82 | 83 | 84 | 85 | NotUsing 86 | Level3 87 | _DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 88 | ProgramDatabase 89 | Disabled 90 | MultiThreadedDebugDLL 91 | /FS %(AdditionalOptions) 92 | 93 | 94 | _DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 95 | 96 | 97 | Console 98 | true 99 | mainCRTStartup 100 | 101 | 102 | 103 | 104 | NotUsing 105 | Level3 106 | _CRT_SECURE_NO_WARNINGS;NDEBUG;%(PreprocessorDefinitions) 107 | ProgramDatabase 108 | MaxSpeed 109 | true 110 | true 111 | false 112 | true 113 | AdvancedVectorExtensions 114 | Fast 115 | false 116 | true 117 | /FS %(AdditionalOptions) 118 | 119 | 120 | _CRT_SECURE_NO_WARNINGS;NDEBUG;%(PreprocessorDefinitions) 121 | 122 | 123 | Console 124 | true 125 | true 126 | true 127 | mainCRTStartup 128 | 129 | 130 | 131 | 132 | Level3 133 | Disabled 134 | _DEBUG;%(PreprocessorDefinitions) 135 | MultiThreadedDebugDLL 136 | EnableFastChecks 137 | /FS %(AdditionalOptions) 138 | 139 | 140 | 141 | 142 | 143 | AdvancedVectorExtensions 144 | 145 | 146 | 147 | 148 | Fast 149 | 150 | 151 | 152 | 153 | false 154 | true 155 | true 156 | /FS %(AdditionalOptions) 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. 195 | 196 | 197 | -------------------------------------------------------------------------------- /msvc/CensusMatching.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | libs 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | libs 17 | 18 | 19 | libs 20 | 21 | 22 | libs 23 | 24 | 25 | libs 26 | 27 | 28 | libs 29 | 30 | 31 | libs 32 | 33 | 34 | libs 35 | 36 | 37 | libs 38 | 39 | 40 | libs 41 | 42 | 43 | 44 | 45 | 46 | 47 | libs 48 | 49 | 50 | 51 | 52 | {773551df-5833-48e2-8ec3-9bcd64fae195} 53 | 54 | 55 | 56 | 57 | libs 58 | 59 | 60 | -------------------------------------------------------------------------------- /msvc/CensusTesting.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CensusMatching", "CensusMatching.vcxproj", "{8136154D-6D04-0953-5698-79794205E5B7}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "librs_demo", "librs_demo.vcxproj", "{BB1C411C-71F2-4839-B447-18E4C6E67E75}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rms_error", "rsm_error.vcxproj", "{E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}" 11 | EndProject 12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vis_pfm", "vis_pfm.vcxproj", "{12618367-A31D-4BC9-B199-F7BCF2C0D77D}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Mixed Platforms = Debug|Mixed Platforms 17 | Debug|Win32 = Debug|Win32 18 | Debug|x64 = Debug|x64 19 | Release|Mixed Platforms = Release|Mixed Platforms 20 | Release|Win32 = Release|Win32 21 | Release|x64 = Release|x64 22 | EndGlobalSection 23 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 24 | {8136154D-6D04-0953-5698-79794205E5B7}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 25 | {8136154D-6D04-0953-5698-79794205E5B7}.Debug|Mixed Platforms.Build.0 = Debug|Win32 26 | {8136154D-6D04-0953-5698-79794205E5B7}.Debug|Win32.ActiveCfg = Debug|Win32 27 | {8136154D-6D04-0953-5698-79794205E5B7}.Debug|Win32.Build.0 = Debug|Win32 28 | {8136154D-6D04-0953-5698-79794205E5B7}.Debug|x64.ActiveCfg = Debug|x64 29 | {8136154D-6D04-0953-5698-79794205E5B7}.Debug|x64.Build.0 = Debug|x64 30 | {8136154D-6D04-0953-5698-79794205E5B7}.Release|Mixed Platforms.ActiveCfg = Release|x64 31 | {8136154D-6D04-0953-5698-79794205E5B7}.Release|Mixed Platforms.Build.0 = Release|x64 32 | {8136154D-6D04-0953-5698-79794205E5B7}.Release|Win32.ActiveCfg = Release|Win32 33 | {8136154D-6D04-0953-5698-79794205E5B7}.Release|Win32.Build.0 = Release|Win32 34 | {8136154D-6D04-0953-5698-79794205E5B7}.Release|x64.ActiveCfg = Release|x64 35 | {8136154D-6D04-0953-5698-79794205E5B7}.Release|x64.Build.0 = Release|x64 36 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 37 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Debug|Mixed Platforms.Build.0 = Debug|Win32 38 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Debug|Win32.ActiveCfg = Debug|Win32 39 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Debug|Win32.Build.0 = Debug|Win32 40 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Debug|x64.ActiveCfg = Debug|x64 41 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Debug|x64.Build.0 = Debug|x64 42 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Release|Mixed Platforms.ActiveCfg = Release|Win32 43 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Release|Mixed Platforms.Build.0 = Release|Win32 44 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Release|Win32.ActiveCfg = Release|Win32 45 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Release|Win32.Build.0 = Release|Win32 46 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Release|x64.ActiveCfg = Release|x64 47 | {BB1C411C-71F2-4839-B447-18E4C6E67E75}.Release|x64.Build.0 = Release|x64 48 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 49 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Debug|Mixed Platforms.Build.0 = Debug|Win32 50 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Debug|Win32.ActiveCfg = Debug|Win32 51 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Debug|Win32.Build.0 = Debug|Win32 52 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Debug|x64.ActiveCfg = Debug|x64 53 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Debug|x64.Build.0 = Debug|x64 54 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Release|Mixed Platforms.ActiveCfg = Release|Win32 55 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Release|Mixed Platforms.Build.0 = Release|Win32 56 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Release|Win32.ActiveCfg = Release|Win32 57 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Release|Win32.Build.0 = Release|Win32 58 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Release|x64.ActiveCfg = Release|x64 59 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4}.Release|x64.Build.0 = Release|x64 60 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 61 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Debug|Mixed Platforms.Build.0 = Debug|Win32 62 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Debug|Win32.ActiveCfg = Debug|Win32 63 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Debug|Win32.Build.0 = Debug|Win32 64 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Debug|x64.ActiveCfg = Debug|x64 65 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Debug|x64.Build.0 = Debug|x64 66 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Release|Mixed Platforms.ActiveCfg = Release|Win32 67 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Release|Mixed Platforms.Build.0 = Release|Win32 68 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Release|Win32.ActiveCfg = Release|Win32 69 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Release|Win32.Build.0 = Release|Win32 70 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Release|x64.ActiveCfg = Release|x64 71 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D}.Release|x64.Build.0 = Release|x64 72 | EndGlobalSection 73 | GlobalSection(SolutionProperties) = preSolution 74 | HideSolutionNode = FALSE 75 | EndGlobalSection 76 | EndGlobal 77 | -------------------------------------------------------------------------------- /msvc/librs_demo.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {BB1C411C-71F2-4839-B447-18E4C6E67E75} 23 | Win32Proj 24 | librs_demo 25 | 8.1 26 | 27 | 28 | 29 | Application 30 | true 31 | v140 32 | Unicode 33 | 34 | 35 | Application 36 | true 37 | v140 38 | Unicode 39 | 40 | 41 | Application 42 | false 43 | v140 44 | true 45 | Unicode 46 | 47 | 48 | Application 49 | false 50 | v140 51 | true 52 | Unicode 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | d2ddc4ff 75 | 76 | 77 | true 78 | bin\$(PlatformShortName)\$(Configuration)\ 79 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 80 | 81 | 82 | true 83 | bin\$(PlatformShortName)\$(Configuration)\ 84 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 85 | 86 | 87 | false 88 | bin\$(PlatformShortName)\$(Configuration)\ 89 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 90 | 91 | 92 | false 93 | bin\$(PlatformShortName)\$(Configuration)\ 94 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 95 | 96 | 97 | 98 | 99 | 100 | Level3 101 | Disabled 102 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 103 | /FS %(AdditionalOptions) 104 | 105 | 106 | Console 107 | true 108 | 109 | 110 | 111 | 112 | 113 | 114 | Level3 115 | Disabled 116 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 117 | /FS %(AdditionalOptions) 118 | 119 | 120 | Console 121 | true 122 | 123 | 124 | 125 | 126 | Level3 127 | 128 | 129 | MaxSpeed 130 | true 131 | true 132 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 133 | AdvancedVectorExtensions2 134 | Fast 135 | true 136 | true 137 | false 138 | true 139 | true 140 | true 141 | AVX2 142 | /FS %(AdditionalOptions) 143 | 144 | 145 | Console 146 | true 147 | true 148 | true 149 | 150 | 151 | 152 | 153 | Level3 154 | 155 | 156 | MaxSpeed 157 | true 158 | true 159 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 160 | AdvancedVectorExtensions2 161 | Fast 162 | true 163 | true 164 | false 165 | true 166 | true 167 | true 168 | AVX2 169 | Speed 170 | false 171 | FastCall 172 | /FS %(AdditionalOptions) 173 | 174 | 175 | Console 176 | true 177 | true 178 | true 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. 199 | 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /msvc/librs_demo.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /msvc/packages.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /msvc/rsm_error.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {E911E0D0-2AA2-438C-95B2-DFD8E56BF9C4} 23 | Win32Proj 24 | rsm_error 25 | 8.1 26 | rms_error 27 | 28 | 29 | 30 | Application 31 | true 32 | v140 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v140 39 | true 40 | Unicode 41 | 42 | 43 | Application 44 | true 45 | v140 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v140 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | true 75 | bin\$(PlatformShortName)\$(Configuration)\ 76 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 77 | 78 | 79 | true 80 | bin\$(PlatformShortName)\$(Configuration)\ 81 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 82 | 83 | 84 | false 85 | bin\$(PlatformShortName)\$(Configuration)\ 86 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 87 | 88 | 89 | false 90 | bin\$(PlatformShortName)\$(Configuration)\ 91 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 92 | 93 | 94 | 95 | 96 | 97 | Level3 98 | Disabled 99 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 100 | /FS %(AdditionalOptions) 101 | 102 | 103 | Console 104 | true 105 | 106 | 107 | 108 | 109 | 110 | 111 | Level3 112 | Disabled 113 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 114 | /FS %(AdditionalOptions) 115 | 116 | 117 | Console 118 | true 119 | 120 | 121 | 122 | 123 | Level3 124 | 125 | 126 | MaxSpeed 127 | true 128 | true 129 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 130 | /FS %(AdditionalOptions) 131 | 132 | 133 | Console 134 | true 135 | true 136 | true 137 | 138 | 139 | 140 | 141 | Level3 142 | 143 | 144 | MaxSpeed 145 | true 146 | true 147 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 148 | /FS %(AdditionalOptions) 149 | 150 | 151 | Console 152 | true 153 | true 154 | true 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /msvc/rsm_error.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /msvc/vis_pfm.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {12618367-A31D-4BC9-B199-F7BCF2C0D77D} 23 | Win32Proj 24 | vis_pfm 25 | 8.1 26 | 27 | 28 | 29 | Application 30 | true 31 | v140 32 | Unicode 33 | 34 | 35 | Application 36 | false 37 | v140 38 | true 39 | Unicode 40 | 41 | 42 | Application 43 | true 44 | v140 45 | Unicode 46 | 47 | 48 | Application 49 | false 50 | v140 51 | true 52 | Unicode 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | true 74 | bin\$(PlatformShortName)\$(Configuration)\ 75 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 76 | 77 | 78 | true 79 | bin\$(PlatformShortName)\$(Configuration)\ 80 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 81 | 82 | 83 | false 84 | bin\$(PlatformShortName)\$(Configuration)\ 85 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 86 | 87 | 88 | false 89 | bin\$(PlatformShortName)\$(Configuration)\ 90 | obj\$(PlatformShortName)\$(Configuration)\$(ProjectName) 91 | 92 | 93 | 94 | 95 | 96 | Level3 97 | Disabled 98 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 99 | /FS %(AdditionalOptions) 100 | 101 | 102 | Console 103 | true 104 | 105 | 106 | 107 | 108 | 109 | 110 | Level3 111 | Disabled 112 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 113 | /FS %(AdditionalOptions) 114 | 115 | 116 | Console 117 | true 118 | 119 | 120 | 121 | 122 | Level3 123 | 124 | 125 | MaxSpeed 126 | true 127 | true 128 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 129 | /FS %(AdditionalOptions) 130 | 131 | 132 | Console 133 | true 134 | true 135 | true 136 | 137 | 138 | 139 | 140 | Level3 141 | 142 | 143 | MaxSpeed 144 | true 145 | true 146 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 147 | /FS %(AdditionalOptions) 148 | 149 | 150 | Console 151 | true 152 | true 153 | true 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /msvc/vis_pfm.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/Main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "bmMatch.h" 3 | #include "cMatch.h" 4 | #include "imio.h" 5 | #include "r200Match.h" 6 | #include "sgbmMatch.h" 7 | 8 | #include "image_filter.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define JSON_H_IMPLEMENTATION 18 | #include "json.h" 19 | 20 | template void visit_fields(stereo::R200Match::alg_config & o, F f) { 21 | f("dispmul", o.dispmul); 22 | f("box_radius", o.box_radius); 23 | f("left_right_int", o.left_right_int); 24 | f("left_right_sub", o.left_right_sub); 25 | f("neighbor", o.neighbor); 26 | f("second_peak", o.second_peak); 27 | f("texture_diff", o.texture_diff); 28 | f("texture_count", o.texture_count); 29 | f("score_min", o.score_min); 30 | f("score_max", o.score_max); 31 | f("median_plus", o.median_plus); 32 | f("median_minus", o.median_minus); 33 | f("median_thresh", o.median_thresh); 34 | f("hole_fill", o.hole_fill); 35 | f("domain_transform", o.domain_transform); 36 | f("dt_scale", o.dt_scale); 37 | f("dt_iter", o.dt_iter); 38 | f("dt_space", o.dt_space); 39 | f("dt_range", o.dt_range); 40 | f("dt_range_disp", o.dt_range_disp); 41 | 42 | } 43 | template void visit_fields(stereo::sgbmMatch::alg_config & o, F f) { 44 | f("dispmul", o.dispmul); 45 | f("box_radius", o.box_radius); 46 | f("left_right_int", o.left_right_int); 47 | f("left_right_sub", o.left_right_sub); 48 | f("neighbor", o.neighbor); 49 | f("second_peak", o.second_peak); 50 | f("texture_diff", o.texture_diff); 51 | f("texture_count", o.texture_count); 52 | f("score_min", o.score_min); 53 | f("score_max", o.score_max); 54 | f("median_plus", o.median_plus); 55 | f("median_minus", o.median_minus); 56 | f("median_thresh", o.median_thresh); 57 | f("hole_fill", o.hole_fill); 58 | f("cost_abs", o.cost_abs); 59 | f("cost_ham", o.cost_ham); 60 | f("p1", o.p1); 61 | f("p2", o.p2); 62 | f("sgm", o.sgm); 63 | f("scale_p2", o.scale_p2); 64 | f("use_blf", o.use_blf); 65 | f("blf_range", o.blf_range); 66 | f("blf_space", o.blf_space); 67 | } 68 | int main(int argc, char* argv[]) 69 | { 70 | json::value doc; 71 | if (argc < 2) 72 | return 1; 73 | if (auto in = std::ifstream(argv[1])) 74 | { 75 | std::string str((std::istreambuf_iterator(in)), 76 | std::istreambuf_iterator()); 77 | doc = json::parse(str); 78 | } 79 | else { 80 | std::vector args(argv + 1, argv + argc); 81 | auto str = std::accumulate(begin(args), end(args), std::string()); 82 | doc = json::parse(str); 83 | } 84 | std::string leftFile = doc["left_rgb"].string(); 85 | std::string rightFile = doc["right_rgb"].string(); 86 | 87 | auto left = img::imread(leftFile.c_str()); 88 | auto right = img::imread(rightFile.c_str()); 89 | 90 | auto left_g = img::Rgb2grey(left); 91 | auto right_g = img::Rgb2grey(right); 92 | 93 | int bitshift = (int)log2(doc["minint"].number()+1); 94 | if (bitshift == 0) 95 | { 96 | //endian bug 97 | for (int i = 0; i < left.width*left.height; i++) { 98 | left_g(i) = (left_g(i) << 8) | (left_g(i) >> 8); 99 | right_g(i) = (right_g(i) << 8) | (right_g(i) >> 8); 100 | } 101 | } 102 | 103 | for (int i = 0; i < left.width*left.height; i++) { 104 | left_g(i) >>= bitshift; 105 | right_g(i) >>= bitshift; 106 | } 107 | 108 | std::unique_ptr cm(nullptr); 109 | float scale_disp = 4.f; 110 | float scale_conf = 1.f; 111 | if (doc["config"]["algorithm"].string() == "r200") { 112 | stereo::R200Match::alg_config cfg; 113 | from_json(cfg, doc["config"]); 114 | cm = std::make_unique(left.width, left.height,doc["maxdisp"].number()+1,cfg); 115 | scale_disp = (float)cfg.dispmul; 116 | } else if (doc["config"]["algorithm"].string() == "sgbm") { 117 | stereo::sgbmMatch::alg_config cfg; 118 | from_json(cfg, doc["config"]); 119 | cm = std::make_unique(left.width, left.height, doc["maxdisp"].number()+1, cfg); 120 | scale_disp = (float)cfg.dispmul; 121 | } else { 122 | cm = std::make_unique(left.width, left.height, doc["maxdisp"].number()+1, (int)scale_disp); 123 | } 124 | cm->costsName = doc["costs"].string(); 125 | auto startTime = std::chrono::steady_clock::now(); 126 | auto res = cm->match(left_g, right_g); 127 | auto disp = res.first; 128 | auto conf = res.second; 129 | auto endTime = std::chrono::steady_clock::now(); 130 | 131 | img::Img dispf(disp.width, disp.height, 0.f); 132 | img::Img conff(conf.width, conf.height, 0.f); 133 | 134 | for (int i = 0; i < disp.width*disp.height; i++) 135 | dispf(i) = static_cast(disp(i)) / scale_disp; 136 | for (int i = 0; i < conf.width*conf.height; i++) 137 | conff(i) = static_cast(conf(i)) / scale_conf; 138 | 139 | img::imwrite(doc["output_disp"].string().c_str(), dispf); 140 | img::imwrite(doc["output_conf"].string().c_str(), conff); 141 | 142 | 143 | return 0; 144 | } 145 | -------------------------------------------------------------------------------- /src/bmMatch.cpp: -------------------------------------------------------------------------------- 1 | #include "bmMatch.h" 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace stereo; 7 | 8 | // Box Filter Radius and Width 9 | #define B_R (3) 10 | #define B_W (2 * B_R + 1) 11 | 12 | // Left-Right Threshold 13 | #define LRT (2) 14 | 15 | BMatch::BMatch(int w, int h, int d, int m) 16 | : StereoMatch(w, h, d, m) 17 | , costs(w * d) 18 | , edgeLeft(w * h, 0) 19 | , edgeRight(w * h, 0) 20 | { 21 | } 22 | 23 | static void edgeDetect(uint16_t* in, int16_t* out, int w, int h) 24 | { 25 | for (int y = 1; y < h - 1; y++) { 26 | for (int x = 1; x < w - 1; x++) { 27 | auto dx = -in[(y - 1) * w + (x - 1)] 28 | + in[(y - 1) * w + (x + 1)] 29 | - 2 * in[(y - 0) * w + (x - 1)] 30 | + 2 * in[(y - 0) * w + (x + 1)] 31 | - in[(y + 1) * w + (x - 1)] 32 | + in[(y + 1) * w + (x + 1)]; 33 | auto dy = -in[(y - 1) * w + (x - 1)] 34 | - 2 * in[(y - 1) * w + (x - 0)] 35 | - in[(y - 1) * w + (x + 1)] 36 | + in[(y + 1) * w + (x - 1)] 37 | + 2 * in[(y + 1) * w + (x - 0)] 38 | + in[(y + 1) * w + (x + 1)]; 39 | out[y * w + x] = (int)std::round(sqrtf((float)(dx * dx + dy * dy))); 40 | } 41 | } 42 | } 43 | static void edgeDetectDummy(uint16_t* in, int16_t* out, int w, int h) 44 | { 45 | for (int y = 1; y < h - 1; y++) { 46 | for (int x = 1; x < w - 1; x++) { 47 | out[y * w + x] = in[y * w + x]; 48 | } 49 | } 50 | } 51 | 52 | static float subpixel(float costLeft, float costMiddle, float costRight) 53 | { 54 | if (costMiddle >= 0xfffe || costLeft >= 0xfffe || costRight >= 0xfffe) 55 | return 0.f; 56 | auto num = costRight - costLeft; 57 | auto den = (2 * (costRight + costLeft - 2 * costMiddle)); 58 | return den != 0 ? 0.5f * (num / den) : 0; 59 | } 60 | void BMatch::match(img::Img& left, img::Img& right, img::Img& disp, img::Img& conf) 61 | { 62 | auto lptr = left.data.get(); 63 | auto rptr = right.data.get(); 64 | auto dptr = disp.data.get(); 65 | 66 | edgeDetectDummy(lptr, edgeLeft.data(), width, height); 67 | edgeDetectDummy(rptr, edgeRight.data(), width, height); 68 | 69 | img::Img lc(left.width, left.height, (int16_t*)edgeLeft.data()); 70 | img::Img rc(left.width, left.height, (int16_t*)edgeRight.data()); 71 | img::Img costI(maxdisp, width, (float*)costs.data()); 72 | auto sqr = [](float diff) { return diff * diff; }; 73 | #define RIGHT_FRAME 74 | 75 | #ifdef RIGHT_FRAME 76 | for (int y = B_R; y < height - B_R; y++) { 77 | costs.assign(width * maxdisp, std::numeric_limits::max()); 78 | for (int x = B_R; x < width - B_R; x++) { 79 | auto ul = std::min(width - B_R, x + maxdisp); 80 | for (int d = x; d < ul; d++) { 81 | float cost = 0; 82 | for (int i = -B_R; i <= B_R; i++) { 83 | for (int j = -B_R; j <= B_R; j++) { 84 | auto pl = edgeLeft[(y + i) * width + (d + j)]; 85 | auto pr = edgeRight[(y + i) * width + (x + j)]; 86 | 87 | cost += sqr((float)(pl - pr)); 88 | } 89 | } 90 | costs[x * maxdisp + (d - x)] = cost; 91 | } 92 | } 93 | for (int x = B_R; x < width - B_R; x++) { 94 | auto minRVal = std::numeric_limits::max(); 95 | auto minRIdx = 0; 96 | auto minLVal = std::numeric_limits::max(); 97 | auto minLIdx = 0; 98 | for (int d = 0; d < maxdisp; d++) { 99 | auto cost = costs[x * maxdisp + d]; 100 | if (cost < minRVal) { 101 | minRVal = cost; 102 | minRIdx = d; 103 | } 104 | } 105 | auto xl = std::max(0, x - minRIdx); 106 | auto xu = std::min(width - 1, xl + maxdisp); 107 | for (int xd = xl; xd < xu; xd++) { 108 | auto d = x - xd + minRIdx; 109 | auto cost = costs[xd * maxdisp + d]; 110 | if (cost < minLVal) { 111 | minLVal = cost; 112 | minLIdx = d; 113 | } 114 | } 115 | 116 | dptr[y * width + x] = abs(minLIdx - minRIdx) < LRT ? minRIdx * muldisp : 0; 117 | } 118 | } 119 | #else 120 | auto sign = [](float x) { return x >= 0 ? 1 : -1; }; 121 | auto halfx = (width - 1.0f) / 2.0f; 122 | auto halfy = (height - 1.0f) / 2.0f; 123 | 124 | for (int y = B_R; y < height - B_R; y++) { 125 | costs.assign(width * maxdisp, std::numeric_limits::max()); 126 | for (int x = B_R; x < width - B_R; x++) { 127 | auto lb = std::max(B_R, x - maxdisp); 128 | auto search_limit = x - lb; 129 | for (int d = 0; d < search_limit; d++) { 130 | auto xf = (x - halfx) / halfx; 131 | auto df = (x - d - halfx) / halfx; 132 | auto yf = (y - halfy) / halfy; 133 | 134 | float cost = 0; 135 | float n = 0; 136 | 137 | for (int i = -B_R; i <= B_R; i++) { 138 | for (int j = -B_R; j <= B_R; j++) { 139 | auto pl = edgeLeft[(y + i) * width + (x + j)]; 140 | auto pr = edgeRight[(y + i) * width + (x + j - d)]; 141 | n++; 142 | cost += sqr(pl - pr); 143 | } 144 | } 145 | costs[x * maxdisp + d] = cost / n; 146 | } 147 | } 148 | for (int x = B_R; x < width - B_R; x++) { 149 | auto minRVal = std::numeric_limits::max(); 150 | auto minRIdx = 0; 151 | auto minLVal = std::numeric_limits::max(); 152 | auto minLIdx = 0; 153 | for (int d = 0; d < maxdisp; d++) { 154 | auto cost = costs[x * maxdisp + d]; 155 | if (cost < minLVal) { 156 | minLVal = cost; 157 | minLIdx = d; 158 | } 159 | } 160 | auto xl = std::max(0, x - minLIdx); 161 | auto xu = std::min(width - 1, xl + maxdisp); 162 | for (int xd = xl; xd < xu; xd++) { 163 | auto d = xd - x + minLIdx; 164 | auto cost = costs[xd * maxdisp + d]; 165 | if (cost < minRVal) { 166 | minRVal = cost; 167 | minRIdx = d; 168 | } 169 | } 170 | 171 | auto sp = (minLIdx > 0 && minLIdx < maxdisp - 1) ? subpixel(costs[x * maxdisp + std::max(minLIdx - 1, 0)], 172 | costs[x * maxdisp + minLIdx], 173 | costs[x * maxdisp + std::min(minLIdx + 1, maxdisp - 1)]) 174 | : 0; 175 | uint16_t res = std::round((minLIdx + sp) * muldisp); 176 | dptr[y * width + x] = abs(minLIdx - minRIdx) < LRT ? res : 0; 177 | } 178 | } 179 | #endif 180 | } 181 | -------------------------------------------------------------------------------- /src/bmMatch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "stereo.h" 4 | namespace stereo { 5 | 6 | class BMatch : public StereoMatch { 7 | public: 8 | using StereoMatch::match; 9 | 10 | BMatch(int w, int h, int d, int m); 11 | virtual void match(img::Img& left, img::Img& right, 12 | img::Img& disp, img::Img& conf) override; 13 | 14 | private: 15 | std::vector edgeLeft, edgeRight; 16 | std::vector costs; 17 | }; 18 | } 19 | -------------------------------------------------------------------------------- /src/cMatch.cpp: -------------------------------------------------------------------------------- 1 | #include "cMatch.h" 2 | #include 3 | #include 4 | 5 | using namespace stereo; 6 | 7 | // Census Radius and Width 8 | #define C_R (3) 9 | #define C_W (2 * C_R + 1) 10 | 11 | // Box Filter Radius and Width 12 | #define B_R (3) 13 | #define B_W (2 * B_R + 1) 14 | 15 | // Left-Right Threshold 16 | #define LRT (2) 17 | #define DS (1) 18 | // y,x 19 | const int samples[] = { 20 | -3, -2, 21 | -3, 0, 22 | -3, 2, 23 | -2, -3, 24 | -2, -1, 25 | -2, 1, 26 | -2, 3, 27 | -1, -2, 28 | -1, 0, 29 | -1, 2, 30 | 0, -3, 31 | 0, -1, 32 | 0, 1, 33 | 0, 3, 34 | 1, -2, 35 | 1, 0, 36 | 1, 2, 37 | 2, -3, 38 | 2, -1, 39 | 2, 1, 40 | 2, 3, 41 | 3, -2, 42 | 3, 0, 43 | 3, 2 44 | }; 45 | 46 | CensusMatch::CensusMatch(int w, int h, int d, int m) 47 | : StereoMatch(w, h, d, m) 48 | , costs(w * d) 49 | , censusLeft(w * h, 0) 50 | , censusRight(w * h, 0) 51 | { 52 | } 53 | 54 | static void censusTransform(uint16_t* in, uint32_t* out, int w, int h) 55 | { 56 | int ns = (int)(sizeof(samples) / sizeof(int)) / 2; 57 | for (int y = C_R; y < h - C_R; y++) { 58 | for (int x = C_R; x < w - C_R; x++) { 59 | uint32_t px = 0; 60 | auto center = in[y * w + x]; 61 | for (int p = 0; p < ns; p++) { 62 | auto yp = (y + samples[2 * p]); 63 | auto xp = (x + samples[2 * p + 1]); 64 | px |= (in[yp * w + xp] > center) << p; 65 | } 66 | out[y * w + x] = px; 67 | } 68 | } 69 | } 70 | 71 | #ifdef _WIN32 72 | #define popcount __popcnt 73 | #else 74 | #define popcount __builtin_popcount 75 | #endif 76 | static float subpixel(float costLeft, float costMiddle, float costRight) 77 | { 78 | if (costMiddle >= 0xfffe || costLeft >= 0xfffe || costRight >= 0xfffe) 79 | return 0.f; 80 | 81 | auto num = costRight - costLeft; 82 | auto den = (costRight < costLeft) ? (costMiddle - costLeft) : (costMiddle - costRight); 83 | return den != 0 ? 0.5f * (num / den) : 0; 84 | } 85 | 86 | void CensusMatch::match(img::Img& left, img::Img& right, img::Img& disp, img::Img& conf) 87 | { 88 | auto lptr = left.data.get(); 89 | auto rptr = right.data.get(); 90 | auto dptr = disp.data.get(); 91 | 92 | censusTransform(lptr, censusLeft.data(), width, height); 93 | censusTransform(rptr, censusRight.data(), width, height); 94 | img::Img lc(left.width, left.height, (uint32_t*)censusLeft.data()); 95 | img::Img rc(left.width, left.height, (uint32_t*)censusRight.data()); 96 | img::Img costI(maxdisp, width, (uint16_t*)costs.data()); 97 | 98 | //#define RIGHT_FRAME 99 | #ifdef RIGHT_FRAME 100 | for (int y = B_R; y < height - B_R; y++) { 101 | costs.assign(width * maxdisp, std::numeric_limits::max()); 102 | for (int x = B_R; x < width - B_R; x++) { 103 | auto ul = std::min(width - B_R, x + maxdisp); 104 | for (int d = x; d < ul; d++) { 105 | uint16_t cost = 0; 106 | for (int i = -B_R; i <= B_R; i++) { 107 | for (int j = -B_R; j <= B_R; j++) { 108 | auto pl = censusLeft[(y + i) * width + (d + j)]; 109 | auto pr = censusRight[(y + i) * width + (x + j)]; 110 | 111 | cost += popcount(pl ^ pr); 112 | } 113 | } 114 | costs[x * maxdisp + (d - x)] = cost; 115 | } 116 | } 117 | for (int x = B_R; x < width - B_R; x++) { 118 | auto minRVal = std::numeric_limits::max(); 119 | auto minRIdx = 0; 120 | auto minLVal = std::numeric_limits::max(); 121 | auto minLIdx = 0; 122 | for (int d = 0; d < maxdisp; d++) { 123 | auto cost = costs[x * maxdisp + d]; 124 | if (cost < minRVal) { 125 | minRVal = cost; 126 | minRIdx = d; 127 | } 128 | } 129 | auto xl = std::max(0, x - minRIdx); 130 | auto xu = std::min(width - 1, xl + maxdisp); 131 | for (int xd = xl; xd < xu; xd++) { 132 | auto d = x - xd + minRIdx; 133 | auto cost = costs[xd * maxdisp + d]; 134 | if (cost < minLVal) { 135 | minLVal = cost; 136 | minLIdx = d; 137 | } 138 | } 139 | 140 | dptr[y * width + x] = abs(minLIdx - minRIdx) < LRT ? minRIdx * muldisp : 0; 141 | } 142 | } 143 | #else 144 | for (int y = B_R; y < height - B_R; y++) { 145 | costs.assign(width * maxdisp, std::numeric_limits::max()); 146 | for (int x = B_R; x < width - B_R; x++) { 147 | auto lb = std::max(B_R, x - maxdisp); 148 | auto search_limit = x - lb; 149 | for (int d = 0; d < search_limit; d++) { 150 | uint16_t cost = 0; 151 | for (int i = -B_R; i <= B_R; i++) { 152 | for (int j = -B_R; j <= B_R; j++) { 153 | auto pl = censusLeft[(y + i) * width + (x + j)]; 154 | auto pr = censusRight[(y + i) * width + (x + j - d)]; 155 | 156 | cost += popcount(pl ^ pr); 157 | } 158 | } 159 | costs[x * maxdisp + d] = cost; 160 | } 161 | } 162 | for (int x = B_R; x < width - B_R; x++) { 163 | auto minRVal = std::numeric_limits::max(); 164 | auto minRIdx = 0; 165 | auto minLVal = std::numeric_limits::max(); 166 | auto minLIdx = 0; 167 | for (int d = 0; d < maxdisp; d++) { 168 | auto cost = costs[x * maxdisp + d]; 169 | if (cost < minLVal) { 170 | minLVal = cost; 171 | minLIdx = d; 172 | } 173 | } 174 | auto xl = std::max(0, x - minLIdx); 175 | auto xu = std::min(width - 1, xl + maxdisp); 176 | for (int xd = xl; xd < xu; xd++) { 177 | auto d = xd - x + minLIdx; 178 | auto cost = costs[xd * maxdisp + d]; 179 | if (cost < minRVal) { 180 | minRVal = cost; 181 | minRIdx = d; 182 | } 183 | } 184 | auto sp = (minLIdx > 0 && minLIdx < maxdisp - 1) ? subpixel(costs[x * maxdisp + std::max(minLIdx - 1, 0)], 185 | costs[x * maxdisp + minLIdx], 186 | costs[x * maxdisp + std::min(minLIdx + 1, maxdisp - 1)]) 187 | : 0; 188 | uint16_t res = (uint16_t)std::round((minLIdx + sp) * muldisp); 189 | dptr[y * width + x] = abs(minLIdx - minRIdx) < LRT ? res : 0; 190 | } 191 | } 192 | #endif 193 | } 194 | -------------------------------------------------------------------------------- /src/cMatch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "stereo.h" 4 | namespace stereo { 5 | 6 | class CensusMatch : public StereoMatch { 7 | public: 8 | using StereoMatch::match; 9 | 10 | CensusMatch(int w, int h, int d, int m); 11 | virtual void match(img::Img& left, img::Img& right, 12 | img::Img& disp, img::Img& conf) override; 13 | 14 | private: 15 | std::vector censusLeft, censusRight; 16 | std::vector costs; 17 | }; 18 | } 19 | -------------------------------------------------------------------------------- /src/cam_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | namespace util { 7 | template T clamp(T a, T mn, T mx) { return std::max(std::min(a, mx), mn); } 8 | 9 | template 10 | inline T remapInt(T value, float outputMin, float outputMax) 11 | { 12 | T invVal = 1.0f / (inputMax - inputMin); 13 | T outVal = (invVal*(value - inputMin) * (outputMax - outputMin) + outputMin); 14 | if (clamp) 15 | { 16 | if (outputMax < outputMin) 17 | { 18 | if (outVal < outputMax) outVal = outputMax; 19 | else if (outVal > outputMin) outVal = outputMin; 20 | } 21 | else 22 | { 23 | if (outVal > outputMax) outVal = outputMax; 24 | else if (outVal < outputMin) outVal = outputMin; 25 | } 26 | } 27 | return outVal; 28 | } 29 | 30 | std::array hsvToRgb(double h, double s, double v) { 31 | 32 | std::array rgb; 33 | 34 | double r, g, b; 35 | 36 | int i = int(h * 6); 37 | double f = h * 6 - i; 38 | double p = v * (1 - s); 39 | double q = v * (1 - f * s); 40 | double t = v * (1 - (1 - f) * s); 41 | 42 | switch (i % 6){ 43 | case 0: r = v, g = t, b = p; break; 44 | case 1: r = q, g = v, b = p; break; 45 | case 2: r = p, g = v, b = t; break; 46 | case 3: r = p, g = q, b = v; break; 47 | case 4: r = t, g = p, b = v; break; 48 | case 5: r = v, g = p, b = q; break; 49 | } 50 | 51 | rgb[0] = uint8_t(clamp((float)r * 255.0f, 0.0f, 255.0f)); 52 | rgb[1] = uint8_t(clamp((float)g * 255.0f, 0.0f, 255.0f)); 53 | rgb[2] = uint8_t(clamp((float)b * 255.0f, 0.0f, 255.0f)); 54 | 55 | return rgb; 56 | 57 | } 58 | 59 | // from Graphene 60 | void ConvertDepthToRGBUsingHistogram(uint8_t img[], const uint16_t depthImage[], int width, int height, const float nearHue, const float farHue) 61 | { 62 | // Produce a cumulative histogram of depth values 63 | int histogram[256 * 256] = { 1 }; 64 | 65 | for (int i = 0; i < width * height; ++i) 66 | { 67 | auto d = depthImage[i]; 68 | if (d && d != USHRT_MAX) ++histogram[d]; 69 | } 70 | 71 | for (int i = 1; i < 256 * 256; i++) 72 | { 73 | histogram[i] += histogram[i - 1]; 74 | } 75 | 76 | // Remap the cumulative histogram to the range [0-256] 77 | for (int i = 1; i < 256 * 256; i++) 78 | { 79 | histogram[i] = (histogram[i] << 8) / histogram[256 * 256 - 1]; 80 | } 81 | 82 | auto rgb = img; 83 | for (int i = 0; i < width * height; i++) 84 | { 85 | // For valid depth values (depth > 0) 86 | uint16_t d = depthImage[i]; 87 | if (d && d != USHRT_MAX) 88 | { 89 | auto t = histogram[d]; // Use the histogram entry (in the range of [0-256]) to interpolate between nearColor and farColor 90 | std::array returnRGB = { 0, 0, 0 }; 91 | returnRGB = hsvToRgb(remapInt((float)t, nearHue, farHue), 1.f, 1.f); 92 | *rgb++ = returnRGB[0]; 93 | *rgb++ = returnRGB[1]; 94 | *rgb++ = returnRGB[2]; 95 | } 96 | // Use black pixels for invalid values (depth == 0) 97 | else 98 | { 99 | *rgb++ = 0; 100 | *rgb++ = 0; 101 | *rgb++ = 0; 102 | } 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/cost_to_conf.cpp: -------------------------------------------------------------------------------- 1 | #include "r200Match.h" 2 | #include 3 | #include 4 | #include 5 | #include "bmMatch.h" 6 | #include "cMatch.h" 7 | #include "imio.h" 8 | #include "r200Match.h" 9 | #include "sgbmMatch.h" 10 | 11 | #include "image_filter.h" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #define JSON_H_IMPLEMENTATION 21 | #include "json.h" 22 | 23 | 24 | 25 | static float subpixel(float costLeft, float costMiddle, float costRight) 26 | { 27 | if (costMiddle >= 0xfffe || costLeft >= 0xfffe || costRight >= 0xfffe) 28 | return 0.f; 29 | 30 | auto num = costRight - costLeft; 31 | auto den = (costRight < costLeft) ? (costMiddle - costLeft) : (costMiddle - costRight); 32 | return den != 0 ? 0.5f * (num / den) : 0; 33 | } 34 | #define DS (1) 35 | 36 | void generateDispConf(stereo::R200Match::alg_config & config, img::Img & left, std::vector & costs, img::Img & disp, img::Img & conf) 37 | { 38 | const auto B_R = config.box_radius; 39 | auto height = left.height; 40 | auto width = left.width; 41 | int maxdisp = costs.size()/(width*height); 42 | const auto default_score = (24*(2*B_R+1)*(2*B_R+1) + config.dt_scale - 1) / config.dt_scale; 43 | for (int y = B_R; y < height - B_R; y++) { 44 | auto prevVal = 0; 45 | auto costX = costs.data() + y * (width*maxdisp); 46 | #pragma omp parallel for 47 | for (int x = B_R; x < width - B_R; x++) { 48 | auto minRVal = std::numeric_limits::max(); 49 | auto minRIdx = 0; 50 | auto minLVal = std::numeric_limits::max(); 51 | auto minLIdx = 0; 52 | for (int d = 0; d < maxdisp; d++) { 53 | auto cost = costX[x * maxdisp + d]; 54 | if (cost < minLVal) { 55 | minLVal = cost; 56 | minLIdx = d; 57 | } 58 | } 59 | auto xl = std::max(0, x - minLIdx); 60 | auto xu = std::min(width - 1, xl + maxdisp); 61 | for (int xd = xl; xd < xu; xd++) { 62 | auto d = xd - x + minLIdx; 63 | auto cost = costX[xd * maxdisp + d]; 64 | if (cost < minRVal) { 65 | minRVal = cost; 66 | minRIdx = d; 67 | } 68 | } 69 | // subpixel left 70 | auto nL = costX[x * maxdisp + std::max(minLIdx - 1, 0)]; 71 | auto nC = costX[x * maxdisp + minLIdx]; 72 | auto nR = costX[x * maxdisp + std::min(minLIdx + 1, maxdisp - 1)]; 73 | auto spL = (minLIdx > 0 && minLIdx < maxdisp - 1) ? subpixel(nL, nC, nR) : 0; 74 | // subpixel right 75 | auto rL = costX[std::max(0, (x - 1)) * maxdisp + std::max(minLIdx - 1, 0)]; 76 | auto rC = costX[(x)*maxdisp + minLIdx]; 77 | auto rR = costX[std::min(width - 1, (x + 1)) * maxdisp + std::min(minLIdx + 1, maxdisp - 1)]; 78 | auto spR = (minLIdx < maxdisp - 1) ? subpixel(rL, rC, rR) : 0; 79 | 80 | // disparity computation 81 | float res = std::max(0.f,minLIdx-DS + spL); 82 | uint16_t bitMask = 0; 83 | 84 | // left-right threshold 85 | bitMask |= (abs(minLIdx - minRIdx) <= config.left_right_int && abs(spR - spL) <= config.left_right_sub); 86 | 87 | // neighbor threshold 88 | auto diffL = (int)nL - (int)nC; 89 | auto diffR = (int)nR - (int)nC; 90 | bitMask |= (diffL >= config.neighbor || diffR >= config.neighbor) << 1; 91 | 92 | // second peak threshold 93 | auto minL2Val = std::numeric_limits::max(); 94 | for (int d = 0; d < maxdisp; d++) { 95 | auto cost = costX[x * maxdisp + d]; 96 | auto costNext = (d == maxdisp - 1) ? cost : costX[x * maxdisp + d+1]; 97 | auto costPrev = (d == 0) ? cost : costX[x * maxdisp + d - 1]; 98 | 99 | if (cost < costNext && cost < costPrev) { 100 | if (d == minLIdx) 101 | continue; 102 | if (cost < minL2Val) 103 | minL2Val = cost; 104 | } 105 | } 106 | auto diffSP = minL2Val - minLVal; 107 | bitMask |= (diffSP >= config.second_peak) << 2; 108 | 109 | // texture difference (waste of time?) 110 | auto tc = 0; 111 | int centerV = left(y,x); 112 | for (int i = -B_R; i <= B_R; i++) { 113 | for (int j = -B_R; j <= B_R; j++) { 114 | int v = left(y + i,x + j); 115 | tc += abs(centerV - v) > config.texture_diff ? 1 : 0; 116 | } 117 | } 118 | bitMask |= (tc >= config.texture_count) << 3; 119 | 120 | // score limits 121 | bitMask |= (minLVal >= config.score_min && minLVal <= config.score_max) << 4; 122 | 123 | 124 | // median threshold 125 | auto me = std::numeric_limits::max(); 126 | auto initialized = false; 127 | for (int d = 0; d < maxdisp; d++) { 128 | auto cost = costX[x * maxdisp + d]; 129 | if (!initialized && cost != default_score) { 130 | initialized = true; 131 | me = cost; 132 | } 133 | if (cost > me) 134 | me += config.median_plus; 135 | else if (cost < me) 136 | me -= config.median_minus; 137 | } 138 | bitMask |= (me - minLVal >= config.median_thresh) << 5; 139 | 140 | //mask 141 | conf(y,x) = (bitMask == 0x3F) ? 1 : 0; 142 | // hole filling 143 | //if (config.hole_fill) { 144 | // prevVal = res ? res : prevVal; 145 | // res = res ? res : prevVal; 146 | //} 147 | 148 | // final set 149 | disp(y,x) = res; 150 | } 151 | } 152 | } 153 | 154 | template void visit_fields(stereo::R200Match::alg_config & o, F f) { 155 | f("dispmul", o.dispmul); 156 | f("box_radius", o.box_radius); 157 | f("left_right_int", o.left_right_int); 158 | f("left_right_sub", o.left_right_sub); 159 | f("neighbor", o.neighbor); 160 | f("second_peak", o.second_peak); 161 | f("texture_diff", o.texture_diff); 162 | f("texture_count", o.texture_count); 163 | f("score_min", o.score_min); 164 | f("score_max", o.score_max); 165 | f("median_plus", o.median_plus); 166 | f("median_minus", o.median_minus); 167 | f("median_thresh", o.median_thresh); 168 | f("hole_fill", o.hole_fill); 169 | f("domain_transform", o.domain_transform); 170 | f("dt_scale", o.dt_scale); 171 | f("dt_iter", o.dt_iter); 172 | f("dt_space", o.dt_space); 173 | f("dt_range", o.dt_range); 174 | } 175 | template void visit_fields(stereo::sgbmMatch::alg_config & o, F f) { 176 | f("dispmul", o.dispmul); 177 | f("box_radius", o.box_radius); 178 | f("left_right_int", o.left_right_int); 179 | f("left_right_sub", o.left_right_sub); 180 | f("neighbor", o.neighbor); 181 | f("second_peak", o.second_peak); 182 | f("texture_diff", o.texture_diff); 183 | f("texture_count", o.texture_count); 184 | f("score_min", o.score_min); 185 | f("score_max", o.score_max); 186 | f("median_plus", o.median_plus); 187 | f("median_minus", o.median_minus); 188 | f("median_thresh", o.median_thresh); 189 | f("hole_fill", o.hole_fill); 190 | f("cost_abs", o.cost_abs); 191 | f("cost_ham", o.cost_ham); 192 | f("p1", o.p1); 193 | f("p2", o.p2); 194 | f("sgm", o.sgm); 195 | f("scale_p2", o.scale_p2); 196 | f("use_blf", o.use_blf); 197 | f("blf_range", o.blf_range); 198 | f("blf_space", o.blf_space); 199 | } 200 | int main(int argc, char* argv[]) 201 | { 202 | json::value doc; 203 | if (argc < 2) 204 | return 1; 205 | if (auto in = std::ifstream(argv[1])) 206 | { 207 | std::string str((std::istreambuf_iterator(in)), 208 | std::istreambuf_iterator()); 209 | doc = json::parse(str); 210 | } 211 | else { 212 | std::vector args(argv + 1, argv + argc); 213 | auto str = std::accumulate(begin(args), end(args), std::string()); 214 | doc = json::parse(str); 215 | } 216 | std::string leftFile = doc["left_rgb"].string(); 217 | std::string rightFile = doc["right_rgb"].string(); 218 | 219 | auto left = img::imread(leftFile.c_str()); 220 | auto right = img::imread(rightFile.c_str()); 221 | 222 | auto left_g = img::Rgb2grey(left); 223 | auto right_g = img::Rgb2grey(right); 224 | 225 | int bitshift = (int)log2(doc["minint"].number()+1); 226 | 227 | for (int i = 0; i < left.width*left.height; i++) { 228 | left_g(i) >>= bitshift; 229 | right_g(i) >>= bitshift; 230 | } 231 | 232 | stereo::R200Match::alg_config cfg; 233 | 234 | if (doc["config"]["algorithm"].string() == "r200") { 235 | from_json(cfg, doc["config"]); 236 | } 237 | auto costsName = doc["costs"].string(); 238 | struct raw_header {int w,h,c,bpp;}; 239 | raw_header hd; 240 | std::ifstream outn(costsName,std::ifstream::binary); 241 | outn.read((char*)&hd,sizeof(raw_header)); 242 | std::vector costs(hd.w*hd.h*hd.c); 243 | if(hd.bpp == 4) { 244 | outn.read((char*)costs.data(),costs.size()*sizeof(uint32_t)); 245 | } else if(hd.bpp == 2) { 246 | std::vector costst(hd.w*hd.h*hd.c); 247 | outn.read((char*)costst.data(),costst.size()*sizeof(uint16_t)); 248 | for(int i=0; i < costs.size(); i++) 249 | costs[i] = costst[i]; 250 | } 251 | img::Img dispf(hd.w, hd.h, 0.f); 252 | img::Img conff(hd.w, hd.h, 0.f); 253 | 254 | generateDispConf(cfg,left_g,costs,dispf,conff); 255 | 256 | img::imwrite(doc["output_disp"].string().c_str(), dispf); 257 | img::imwrite(doc["output_conf"].string().c_str(), conff); 258 | 259 | return 0; 260 | } 261 | -------------------------------------------------------------------------------- /src/image.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | static float clamp_f(float min, float max, float x) 7 | { 8 | return std::max(min, std::min(max, x)); 9 | } 10 | 11 | namespace img { 12 | 13 | template 14 | struct Image { 15 | std::shared_ptr data; 16 | int width, height; 17 | T * ptr; 18 | Image() 19 | : data(nullptr) 20 | , width(0) 21 | , height(0) 22 | , ptr(nullptr) 23 | { 24 | } 25 | Image(int width, int height) 26 | : data(new T[width * height * C], arr_d()) 27 | , width(width) 28 | , height(height) 29 | { 30 | ptr = data.get(); 31 | } 32 | Image(int width, int height, T d) 33 | : Image(width, height) 34 | { 35 | std::fill(ptr, ptr + width*height*C, d); 36 | } 37 | Image(int width, int height, T* d) 38 | : data(d, null_d()) 39 | , width(width) 40 | , height(height) 41 | , ptr(d) 42 | { 43 | } 44 | 45 | struct null_d { 46 | void operator()(T const* p) {} 47 | }; 48 | struct arr_d { 49 | void operator()(T const* p) { delete[] p; } 50 | }; 51 | int size() const {return width*height;} 52 | int sizebytes() const {return width*height*C*sizeof(T);} 53 | T& operator()(int i) { return ptr[i]; } 54 | T& operator()(int y, int x) { return ptr[y*width + x]; } 55 | T& operator()(int y, int x, int c) { return ptr[C*(y*width + x) + c]; } 56 | 57 | 58 | inline T sample(const float x, const float y, const int chan) 59 | { 60 | auto pixX = [this](float x) { return (int)clamp_f(0.0f, (float)(width - 1), std::round(x)); }; 61 | auto pixY = [this](float y) { return (int)clamp_f(0.0f, (float)(height - 1), std::round(y)); }; 62 | 63 | auto xm = pixX(x - 0.5f); 64 | auto xp = pixX(x + 0.5f); 65 | auto ym = pixY(y - 0.5f); 66 | auto yp = pixY(y + 0.5f); 67 | auto ptr = data.get(); 68 | 69 | auto tl = ptr[C * (ym * width + xm) + chan]; 70 | auto tr = ptr[C * (ym * width + xp) + chan]; 71 | auto bl = ptr[C * (yp * width + xm) + chan]; 72 | auto br = ptr[C * (yp * width + xp) + chan]; 73 | 74 | float dx = x - xm; 75 | float dy = y - ym; 76 | 77 | auto sample = tl * (1.f - dx) * (1.f - dy) + tr * dx * (1.f - dy) + bl * (1.f - dx) * dy + br * dx * dy; 78 | return (T)sample; 79 | } 80 | img::Image copy() 81 | { 82 | img::Image res(width, height); 83 | memcpy(res.data.get(), this->data.get(), width*height*sizeof(T)*C); 84 | res.ptr = res.data.get(); 85 | return res; 86 | } 87 | }; 88 | 89 | template 90 | using Img = Image; 91 | } 92 | -------------------------------------------------------------------------------- /src/image_io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "image.h" 3 | 4 | namespace img { 5 | //imio.cpp 6 | template 7 | Image imread(const char * name); 8 | 9 | template 10 | void imwrite(const char * name, const Image &img); 11 | 12 | //imshow.cpp 13 | template 14 | void imshow(const char * name, const Image &img); 15 | char getKey(bool wait = false); 16 | } -------------------------------------------------------------------------------- /src/imio.cpp: -------------------------------------------------------------------------------- 1 | #include "image.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma warning(disable : 4996) 8 | #define STB_IMAGE_WRITE_IMPLEMENTATION 9 | #include "stb_image_write.h" 10 | #define STB_IMAGE_IMPLEMENTATION 11 | #include "stb_image.h" 12 | 13 | namespace img { 14 | std::string getExtension(const std::string & str) { 15 | auto found = str.find_last_of("."); 16 | auto ext = (found == std::string::npos) ? "" : str.substr(found); 17 | std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); 18 | return ext; 19 | } 20 | 21 | template 22 | Image imread(const char * name) { 23 | auto ext = getExtension(name); 24 | 25 | Image returnImage = {}; 26 | int channels; 27 | if (ext == ".pfm") { 28 | std::ifstream file(name, std::ios::binary); 29 | std::string fileType; 30 | int width, height; 31 | float max; 32 | char tmp; 33 | 34 | file >> fileType >> width >> height >> max; 35 | auto res = img::Image(width, height); 36 | returnImage = img::Image(width, height); 37 | 38 | file.read(&tmp, 1); //eat the newline 39 | file.read((char*)res.data.get(), width*height * sizeof(T)*C); 40 | 41 | for (int y = 0; y < height; y++) { 42 | for (int x = 0; x < width; x++) { 43 | returnImage.ptr[y*width + x] = res.ptr[(height - 1 - y)*width + x]; 44 | } 45 | } 46 | } else { 47 | if (sizeof(T) == 2) { 48 | returnImage.data = std::shared_ptr((T*)stbi_load_16(name, &returnImage.width, &returnImage.height, &channels, C)); 49 | returnImage.ptr = returnImage.data.get(); 50 | } 51 | else { 52 | returnImage.data = std::shared_ptr((T*)stbi_load(name, &returnImage.width, &returnImage.height, &channels, C)); 53 | returnImage.ptr = returnImage.data.get(); 54 | } 55 | } 56 | return returnImage; 57 | } 58 | 59 | template 60 | void imwrite(const char * name, Image &img) { 61 | auto ext = getExtension(name); 62 | 63 | if (ext == ".png") { 64 | stbi_write_png(name, img.width, img.height, C, (unsigned char*)img.data.get(), 0); 65 | } else if (ext == ".tga") { 66 | stbi_write_tga(name, img.width, img.height, C, (unsigned char*)img.data.get()); 67 | } else if (ext == ".bmp") { 68 | stbi_write_bmp(name, img.width, img.height, C, (unsigned char*)img.data.get()); 69 | } else if (ext == ".hdr") { 70 | stbi_write_hdr(name, img.width, img.height, C, (float*)img.data.get()); 71 | } else if (ext == ".pfm") { 72 | auto res = img.copy(); 73 | auto width = img.width; 74 | auto height = img.height; 75 | for (int y = 0; y < height; y++) { 76 | for (int x = 0; x < width; x++) { 77 | res.ptr[y*width + x] = img.ptr[(height - 1 - y)*width + x]; 78 | } 79 | } 80 | std::ofstream output(name, std::ofstream::binary | std::ofstream::out); 81 | output << "Pf\n"; 82 | output << width << ' ' << height << '\n'; 83 | output << std::fixed << std::setprecision(2) << -1.0 << '\n'; 84 | 85 | output.write((char*)res.ptr, width*height * sizeof(float)); 86 | } 87 | } 88 | template void imwrite(const char * name, Image &img); 89 | template void imwrite(const char * name, Image &img); 90 | template void imwrite(const char * name, Image &img); 91 | template void imwrite(const char * name, Image &img); 92 | template void imwrite(const char * name, Image &img);// pfm only 93 | 94 | template Image imread(const char * name); 95 | template Image imread(const char * name); 96 | template Image imread(const char * name); 97 | template Image imread(const char * name); 98 | template Image imread(const char * name); 99 | template Image imread(const char * name); 100 | template Image imread(const char * name); // pfm only 101 | 102 | } -------------------------------------------------------------------------------- /src/imio.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "image.h" 3 | 4 | namespace img { 5 | //imio.cpp 6 | template 7 | Image imread(const char* name); 8 | 9 | template 10 | void imwrite(const char* name, Image& img); 11 | 12 | //imshow.cpp 13 | template 14 | void imshow(const char* name, const Image& img); 15 | char getKey(bool wait = false); 16 | } -------------------------------------------------------------------------------- /src/imshow.cpp: -------------------------------------------------------------------------------- 1 | #include "image.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #pragma comment( lib, "opengl32" ) 13 | namespace img { 14 | template 15 | inline int getGLChan(); 16 | 17 | template <> 18 | inline int getGLChan<1>() { return GL_LUMINANCE; } 19 | template <> 20 | inline int getGLChan<2>() { return GL_LUMINANCE_ALPHA; } 21 | template <> 22 | inline int getGLChan<3>() { return GL_RGB; } 23 | template <> 24 | inline int getGLChan<4>() { return GL_RGBA; } 25 | 26 | template 27 | inline int getGLType(); 28 | 29 | template <> 30 | inline int getGLType() { return GL_UNSIGNED_BYTE; } 31 | template <> 32 | inline int getGLType() { return GL_BYTE; } 33 | template <> 34 | inline int getGLType() { return GL_UNSIGNED_SHORT; } 35 | template <> 36 | inline int getGLType() { return GL_SHORT; } 37 | template <> 38 | inline int getGLType() { return GL_UNSIGNED_INT; } 39 | template <> 40 | inline int getGLType() { return GL_INT; } 41 | template <> 42 | inline int getGLType() { return GL_FLOAT; } 43 | template <> 44 | inline int getGLType() { return GL_DOUBLE; } 45 | struct glfwState { 46 | glfwState() { glfwInit(); } 47 | ~glfwState() { glfwTerminate(); } 48 | }; 49 | 50 | glfwState g_glfw_state; 51 | 52 | char g_key; 53 | bool g_key_update = false; 54 | 55 | std::mutex g_mutex; 56 | struct winState { 57 | GLFWwindow* win; 58 | GLuint tex; 59 | int width, height; 60 | }; 61 | std::unordered_map g_windows; 62 | 63 | static void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) 64 | { 65 | std::unique_lock lock(g_mutex); 66 | 67 | if (action == GLFW_PRESS) { 68 | g_key = tolower(key); 69 | g_key_update = true; 70 | } 71 | } 72 | static void framebuffer_size_callback(GLFWwindow* window, int width, int height) 73 | { 74 | glfwMakeContextCurrent(window); 75 | for (const auto & win : g_windows) { 76 | if (win.second.win == window) { 77 | auto wScale = ((float)width) / ((float)win.second.width); 78 | auto hScale = ((float)height) / ((float)win.second.height); 79 | auto minScale = (wScale < hScale) ? wScale : hScale; 80 | int wShift = (int)nearbyint((width - minScale*win.second.width) / 2.0f); 81 | int hShift = (int)nearbyint((height - minScale*win.second.height) / 2.0f); 82 | 83 | glViewport(wShift, hShift, (int)nearbyint(win.second.width*minScale), (int)nearbyint(win.second.height*minScale)); 84 | } 85 | } 86 | } 87 | template 88 | void imshow(const char * name, const Image & img){ 89 | std::unique_lock lock(g_mutex); 90 | std::string s_name(name); 91 | if (img.data.get() == NULL || img.height*img.width == 0) 92 | return; 93 | if (g_windows.find(s_name) == g_windows.end()) 94 | { 95 | GLuint tex; 96 | glGenTextures(1, &tex); 97 | g_windows[s_name] = { glfwCreateWindow(img.width, img.height, s_name.c_str(), NULL, NULL), tex, img.width, img.height }; 98 | glfwSetKeyCallback(g_windows[s_name].win, key_callback); 99 | glfwSetFramebufferSizeCallback(g_windows[s_name].win, framebuffer_size_callback); 100 | } 101 | auto window = g_windows[s_name]; 102 | glfwMakeContextCurrent(window.win); 103 | glPushAttrib(GL_ALL_ATTRIB_BITS); 104 | glPushMatrix(); 105 | glEnable(GL_TEXTURE_2D); 106 | glEnable(GL_ALPHA); 107 | glClearColor(0, 0, 0, 1.0f); 108 | glClear(GL_COLOR_BUFFER_BIT); 109 | glBindTexture(GL_TEXTURE_2D, window.tex); 110 | 111 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, img.width, img.height, 0, getGLChan(), getGLType(), img.data.get()); 112 | 113 | glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 114 | glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 115 | glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); 116 | glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP); 117 | 118 | glBegin(GL_QUADS); 119 | glTexCoord2f(0, 1); glVertex2f(-1, -1); 120 | glTexCoord2f(1, 1); glVertex2f(1, -1); 121 | glTexCoord2f(1, 0); glVertex2f(1, 1); 122 | glTexCoord2f(0, 0); glVertex2f(-1, 1); 123 | glEnd(); 124 | glDisable(GL_TEXTURE_2D); 125 | glDisable(GL_ALPHA); 126 | glPopMatrix(); 127 | glPopAttrib(); 128 | } 129 | 130 | 131 | char getKey(bool wait) 132 | { 133 | std::unique_lock lock(g_mutex); 134 | 135 | std::vector toDelete; 136 | for (const auto & win : g_windows) { 137 | if (glfwWindowShouldClose(win.second.win)) { 138 | toDelete.push_back(win.first); 139 | } 140 | else { 141 | glfwMakeContextCurrent(win.second.win); 142 | glfwSwapBuffers(win.second.win); 143 | lock.unlock(); 144 | if (wait) 145 | do { 146 | glfwWaitEvents(); 147 | } while (!g_key_update); 148 | else { 149 | glfwPollEvents(); 150 | } 151 | lock.lock(); 152 | 153 | if (!g_key_update) { 154 | g_key = '\0'; 155 | } 156 | } 157 | } 158 | g_key_update = false; 159 | for (const auto & name : toDelete) { 160 | glfwDestroyWindow(g_windows[name].win); 161 | glDeleteTextures(1, &g_windows[name].tex); 162 | g_windows.erase(name); 163 | } 164 | return g_key; 165 | } 166 | template void imshow(const char * name, const Image &img); 167 | template void imshow(const char * name, const Image &img); 168 | template void imshow(const char * name, const Image &img); 169 | template void imshow(const char * name, const Image &img); 170 | template void imshow(const char * name, const Image &img); 171 | 172 | } 173 | -------------------------------------------------------------------------------- /src/r200Match.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "stereo.h" 4 | namespace stereo { 5 | 6 | class R200Match : public StereoMatch { 7 | public: 8 | using StereoMatch::match; 9 | // Using the parameters from 10 | // https://github.com/IntelRealSense/librealsense/blob/master/include/librealsense/rsutil.h 11 | 12 | struct alg_config { 13 | int dispmul = 4; 14 | int box_radius = 3; 15 | int left_right_int = 1; 16 | float left_right_sub = 0.75; 17 | int neighbor = 7; 18 | int second_peak = 10; 19 | int texture_diff = 4; 20 | int texture_count = 6; 21 | int score_min = 1; 22 | int score_max = 512; 23 | int median_plus = 5; 24 | int median_minus = 5; 25 | int median_thresh = 192; 26 | bool hole_fill = false; 27 | bool domain_transform = false; 28 | int dt_scale = 1; 29 | int dt_iter = 1; 30 | float dt_space = 10; 31 | float dt_range = 90; 32 | float dt_range_disp = 0.000f; 33 | }; 34 | R200Match(int w, int h, int d, int m); 35 | R200Match(int w, int h, int maxdisp, const alg_config & cfg); 36 | 37 | virtual void match(img::Img& left, img::Img& right, 38 | img::Img& disp, img::Img& conf) override; 39 | 40 | private: 41 | alg_config config; 42 | std::vector censusLeft, censusRight; 43 | std::vector costs; 44 | }; 45 | } 46 | -------------------------------------------------------------------------------- /src/rms_error.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "bmMatch.h" 3 | #include "cMatch.h" 4 | #include "imio.h" 5 | #include "r200Match.h" 6 | #include "sgbmMatch.h" 7 | 8 | #include "image_filter.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #define JSON_H_IMPLEMENTATION 19 | #include "json.h" 20 | template 21 | std::string to_string_with_precision(const T a_value, const int n = 6) 22 | { 23 | std::ostringstream out; 24 | out << std::setprecision(n) << a_value; 25 | return out.str(); 26 | } 27 | 28 | std::string pretty_print(const json::value & v) { std::ostringstream ss; ss << tabbed(v, 4); return ss.str(); } 29 | 30 | int main(int argc, char* argv[]) 31 | { 32 | json::value doc; 33 | if (argc < 2) 34 | return 1; 35 | if (auto in = std::ifstream(argv[1])) 36 | { 37 | std::string str((std::istreambuf_iterator(in)), 38 | std::istreambuf_iterator()); 39 | doc = json::parse(str); 40 | } 41 | else { 42 | std::vector args(argv + 1, argv + argc); 43 | auto str = std::accumulate(begin(args), end(args), std::string()); 44 | doc = json::parse(str); 45 | } 46 | std::string leftFile = doc["left_rgb"].string(); 47 | std::string rightFile = doc["right_rgb"].string(); 48 | 49 | auto left = img::imread(leftFile.c_str()); 50 | auto right = img::imread(rightFile.c_str()); 51 | 52 | auto left_g = img::Rgb2grey(left); 53 | auto right_g = img::Rgb2grey(right); 54 | 55 | int bitshift = (int)log2(doc["minint"].number()+1); 56 | 57 | for (int i = 0; i < left.width*left.height; i++) { 58 | left_g(i) >>= bitshift; 59 | right_g(i) >>= bitshift; 60 | } 61 | 62 | auto disp = img::imread(doc["output_disp"].string().c_str()); 63 | auto conf = img::imread(doc["output_conf"].string().c_str()); 64 | 65 | auto gt_disp = img::imread(doc["gt"].string().c_str()); 66 | auto gt_mask = img::imread(doc["gt_mask"].string().c_str()); 67 | json::object results; 68 | 69 | // sweep robust loss 70 | for(const auto & thresh : {0.5f, 0.75f, 1.0f, 2.0f, 3.0f}) { 71 | double err = 0.0; 72 | double count = 0.0; 73 | 74 | double err_n = 0.0; 75 | double count_n = 0.0; 76 | for(int i=0; i < disp.size(); i++) { 77 | auto isValid = gt_mask(i) > 0.5 ? true : false; 78 | if(!isValid) 79 | continue; 80 | err += std::min(thresh,std::abs(gt_disp(i) - disp(i))); 81 | count += 1; 82 | 83 | 84 | auto thinksValid = conf(i) > 0.5 ? true : false; 85 | if(!thinksValid) 86 | continue; 87 | 88 | err_n += std::min(thresh,std::abs(gt_disp(i) - disp(i))); 89 | count_n += 1; 90 | } 91 | err_n /= count_n ? count_n : 1; 92 | err /= count ? count : 1; 93 | 94 | json::object res; 95 | res["name"] = std::string("err_") +to_string_with_precision(thresh,3) ; 96 | res["result"] = err; 97 | res["threshold"] = thresh; 98 | res["description"] = json::value{std::string("Robust loss over all pixels with t=") + std::to_string(thresh)}; 99 | results[res["name"].string()] = res; 100 | 101 | res["name"] = std::string("errn_") +to_string_with_precision(thresh,3) ; 102 | res["result"] = err_n; 103 | res["threshold"] = thresh; 104 | res["description"] = json::value{std::string("Robust loss over valid pixels with t=") + std::to_string(thresh)}; 105 | results[res["name"].string()] = res; 106 | 107 | json::object res2; 108 | res2["name"] = std::string("density") ; 109 | res2["result"] = count_n/count; 110 | res2["description"] = json::value{std::string("Fraction of valid ground truth pixels recovered by the algorithm")}; 111 | results[res2["name"].string()] = res2; 112 | // sweep f# scores 113 | double tp = 0.0f; 114 | double fp = 0.0f; 115 | double tn = 0.0f; 116 | double fn = 0.0f; 117 | auto t_s = to_string_with_precision(thresh,3) ; 118 | for(int i=0; i < disp.size(); i++) { 119 | if (gt_mask(i) < 0.5) 120 | continue; 121 | auto isValid = std::abs(gt_disp(i) - disp(i)) < thresh ? true : false; 122 | auto thinksValid = conf(i) > 0.5 ? true : false; 123 | 124 | if(isValid) { 125 | if(thinksValid) 126 | tp++; 127 | else 128 | fn++; 129 | } else { 130 | if(thinksValid) 131 | fp++; 132 | else 133 | tn++; 134 | } 135 | } 136 | res = json::object(); 137 | auto tpr = tp/(tp+fn); 138 | auto fpr = fp/(fp+tn); 139 | res["name"] = std::string("tpr_") + t_s ; 140 | res["result"] = tpr; 141 | res["threshold"] = thresh; 142 | res["description"] = json::value{std::string("True Positive Rate with t=") + t_s}; 143 | results[res["name"].string()] = res; 144 | res["name"] = std::string("fpr_") + t_s ; 145 | res["result"] = fpr; 146 | res["threshold"] = thresh; 147 | res["description"] = json::value{std::string("False Positive Rate with t=") + t_s}; 148 | results[res["name"].string()] = res; 149 | for(const auto & beta : {0.125f, 0.25f, 0.5f, 1.0f, 2.0f, 4.0f,8.0f}) 150 | { 151 | auto b2 = (beta*beta); 152 | auto b2p1 = (b2 + 1.0); 153 | auto score = (b2p1*tp)/(b2p1*tp+b2*fn+fp); 154 | json::object res; 155 | res["name"] = std::string("f_") +to_string_with_precision(beta,3) + std::string("_") + t_s ; 156 | res["result"] = score; 157 | res["beta"] = beta; 158 | res["threshold"] = thresh; 159 | res["description"] = json::value{std::string("F score with B=") + std::to_string(beta) + std::string(" and t=") + t_s}; 160 | results[res["name"].string()] = res; 161 | 162 | } 163 | } 164 | 165 | std::cout << pretty_print(results); 166 | //std::ofstream outputfile(doc["output"].string()); 167 | //outputfile << pretty_print(results); 168 | return 0; 169 | } 170 | -------------------------------------------------------------------------------- /src/rs_demo.cpp: -------------------------------------------------------------------------------- 1 | #include "bmMatch.h" 2 | #include "cMatch.h" 3 | #include "imio.h" 4 | #include "r200Match.h" 5 | #include "sgbmMatch.h" 6 | 7 | #include "image_filter.h" 8 | #include "cam_util.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | const int MAXDISP = 64; 19 | const int MULDISP = 32; 20 | int main(int argc, char* argv[]) 21 | { 22 | rs::context ctx; 23 | if (ctx.get_device_count() == 0) { printf("No device detected. Is it plugged in?\n"); return 1; } 24 | rs::device & dev = *ctx.get_device(0); 25 | try { 26 | dev.enable_stream(rs::stream::depth, 480, 360, rs::format::disparity16, 30); 27 | dev.enable_stream(rs::stream::color, 640, 480, rs::format::rgb8, 30); 28 | dev.enable_stream(rs::stream::infrared, 492, 372, rs::format::y16, 30); 29 | dev.enable_stream(rs::stream::infrared2, 492, 372, rs::format::y16, 30); 30 | 31 | // Start our device 32 | dev.start(); 33 | 34 | dev.set_option(rs::option::r200_emitter_enabled, true); 35 | dev.set_option(rs::option::r200_lr_auto_exposure_enabled, true); 36 | //rs_apply_depth_control_preset((rs_device*)&dev, 2); // 2 = low, 4=opt, 5 = high 37 | } 38 | catch (std::exception & e) { 39 | std::cout << e.what() << std::endl; 40 | return 1; 41 | } 42 | 43 | auto intrin = dev.get_stream_intrinsics(rs::stream::depth); 44 | auto extrin = dev.get_extrinsics(rs::stream::infrared2, rs::stream::infrared); 45 | 46 | auto fB = intrin.fx * extrin.translation[0] * 1000.0f; 47 | int counter = 0; 48 | auto w = dev.get_stream_width(rs::stream::depth); 49 | auto h = dev.get_stream_height(rs::stream::depth); 50 | auto iw = dev.get_stream_width(rs::stream::infrared); 51 | auto ih = dev.get_stream_height(rs::stream::infrared); 52 | stereo::sgbmMatch cm(iw, ih, MAXDISP, MULDISP); 53 | auto himg4 = img::Image(w, h); 54 | auto himg5 = img::Image(iw, ih); 55 | img::Img disp_s(iw, ih); 56 | memset(disp_s.ptr, 0, iw*ih*sizeof(uint16_t)); 57 | do { 58 | dev.wait_for_frames(); 59 | //depth based 60 | { 61 | auto disp = dev.get_frame_data(rs::stream::depth); 62 | auto left = dev.get_frame_data(rs::stream::infrared); 63 | auto right = dev.get_frame_data(rs::stream::infrared2); 64 | 65 | 66 | auto dimg = img::Img(w, h, (uint16_t*)disp); 67 | auto limg = img::Img(iw, ih, (uint16_t*)left).copy(); 68 | auto rimg = img::Img(iw, ih, (uint16_t*)right).copy(); 69 | 70 | for (int i = 0; i < iw*ih; i++) 71 | limg(i) >>= 6; 72 | for (int i = 0; i < iw*ih; i++) 73 | rimg(i) >>= 6; 74 | auto res = cm.match(limg, rimg); 75 | for (int i = 0; i < iw*ih; i++) 76 | disp_s(i) = res.second(i) > 0.5 ? res.first(i) : 0; 77 | 78 | util::ConvertDepthToRGBUsingHistogram(himg4.ptr, dimg.ptr, w, h, 0.1f, 0.625f); 79 | util::ConvertDepthToRGBUsingHistogram(himg5.ptr, disp_s.ptr, iw, ih, 0.1f, 0.625f); 80 | img::imshow("ds4", himg4); 81 | img::imshow("sw", himg5); 82 | img::imshow("ir", limg); 83 | 84 | } 85 | 86 | } while (img::getKey() != 'q'); 87 | 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /src/sgbmMatch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "stereo.h" 4 | namespace stereo { 5 | 6 | class sgbmMatch : public StereoMatch { 7 | public: 8 | using StereoMatch::match; 9 | struct alg_config { 10 | int dispmul = 4; 11 | int box_radius = 3; 12 | int left_right_int = 1; 13 | float left_right_sub = 0.75; 14 | int neighbor = 20; 15 | int second_peak = 50; 16 | int texture_diff = 0; 17 | int texture_count = 0; 18 | int score_min = 0; 19 | int score_max = 20000; 20 | int median_plus = 10; 21 | int median_minus = 10; 22 | int median_thresh = 500; 23 | bool hole_fill = false; 24 | int cost_abs = 1; 25 | int cost_ham = 3; 26 | int p1 = 2000; 27 | int p2 = 8000; 28 | bool sgm = true; 29 | bool scale_p2 = true; 30 | bool use_blf = false; 31 | float blf_range = 25.0f; 32 | float blf_space = 2.25f; 33 | }; 34 | sgbmMatch(int w, int h, int d, int m); 35 | sgbmMatch(int w, int h, int maxdisp, const alg_config & cfg); 36 | 37 | virtual void match(img::Img& left, img::Img& right, 38 | img::Img& disp, img::Img& conf) override; 39 | 40 | private: 41 | std::vector censusLeft, censusRight; 42 | std::vector costs; 43 | std::vector costsSummed; 44 | alg_config config; 45 | }; 46 | } 47 | -------------------------------------------------------------------------------- /src/stereo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "image.h" 4 | #include // memset 5 | #include 6 | #include 7 | #include 8 | namespace stereo { 9 | 10 | class StereoMatch { 11 | public: 12 | StereoMatch(int w, int h, int d, int m) 13 | : width(w) 14 | , height(h) 15 | , maxdisp(d) 16 | , muldisp(m){}; 17 | virtual void match(img::Img& left, img::Img& right, 18 | img::Img& disp, img::Img& conf) 19 | = 0; 20 | std::pair, img::Img> match(img::Img& left, img::Img& right) 21 | { 22 | img::Img disp(left.width, left.height,uint16_t(0)); 23 | img::Img conf(left.width, left.height,uint8_t(0)); 24 | 25 | this->match(left, right, disp, conf); 26 | return std::make_pair(disp,conf); 27 | } 28 | 29 | public: // but please don't set! 30 | std::string costsName; 31 | int width, height, maxdisp, muldisp; 32 | }; 33 | } 34 | -------------------------------------------------------------------------------- /src/subpixel_extract.cpp: -------------------------------------------------------------------------------- 1 | #include "r200Match.h" 2 | #include 3 | #include 4 | #include 5 | #include "bmMatch.h" 6 | #include "cMatch.h" 7 | #include "imio.h" 8 | #include "r200Match.h" 9 | #include "sgbmMatch.h" 10 | 11 | #include "image_filter.h" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define JSON_H_IMPLEMENTATION 22 | #include "json.h" 23 | 24 | 25 | 26 | static float subpixel(float costLeft, float costMiddle, float costRight) 27 | { 28 | if (costMiddle >= 0xfffe || costLeft >= 0xfffe || costRight >= 0xfffe) 29 | return 0.f; 30 | 31 | auto num = costRight - costLeft; 32 | auto den = (costRight < costLeft) ? (costMiddle - costLeft) : (costMiddle - costRight); 33 | return den != 0 ? 0.5f * (num / den) : 0; 34 | } 35 | #define DS (1) 36 | 37 | void generateDispConf( 38 | stereo::R200Match::alg_config & config, 39 | img::Img & left, std::vector & costs, 40 | std::string out_name, 41 | img::Img gt_disp, img::Img gt_mask) 42 | { 43 | std::ofstream outf(out_name,std::ios::binary); 44 | const auto B_R = config.box_radius; 45 | auto height = left.height; 46 | auto width = left.width; 47 | int maxdisp = costs.size()/(width*height); 48 | const auto default_score = (24*(2*B_R+1)*(2*B_R+1) + config.dt_scale - 1) / config.dt_scale; 49 | for (int y = B_R; y < height - B_R; y++) { 50 | auto prevVal = 0; 51 | auto costX = costs.data() + y * (width*maxdisp); 52 | for (int x = B_R; x < width - B_R; x++) { 53 | auto minRVal = std::numeric_limits::max(); 54 | auto minRIdx = 0; 55 | auto minLVal = std::numeric_limits::max(); 56 | auto minLIdx = 0; 57 | for (int d = 0; d < maxdisp; d++) { 58 | auto cost = costX[x * maxdisp + d]; 59 | if (cost < minLVal) { 60 | minLVal = cost; 61 | minLIdx = d; 62 | } 63 | } 64 | 65 | // subpixel left 66 | auto nL = costX[x * maxdisp + std::max(minLIdx - 1, 0)]; 67 | auto nC = costX[x * maxdisp + minLIdx]; 68 | auto nR = costX[x * maxdisp + std::min(minLIdx + 1, maxdisp - 1)]; 69 | auto spL = (minLIdx > 0 && minLIdx < maxdisp - 1) ? subpixel(nL, nC, nR) : 0; 70 | // subpixel right 71 | auto rL = costX[std::max(0, (x - 1)) * maxdisp + std::max(minLIdx - 1, 0)]; 72 | auto rC = costX[(x)*maxdisp + minLIdx]; 73 | auto rR = costX[std::min(width - 1, (x + 1)) * maxdisp + std::min(minLIdx + 1, maxdisp - 1)]; 74 | auto spR = (minLIdx < maxdisp - 1) ? subpixel(rL, rC, rR) : 0; 75 | 76 | // disparity computation 77 | float res = std::max(0,minLIdx-DS); 78 | auto shift = gt_disp(y,x) - res; 79 | if (gt_mask(y,x) > 0.5f && std::abs(shift) <= 0.5f) { 80 | outf << nL << ' ' << nC << ' ' << nR << ' ' 81 | << rL << ' ' << rC << ' ' << rR << ' ' 82 | << shift << '\n'; 83 | } 84 | } 85 | } 86 | } 87 | 88 | template void visit_fields(stereo::R200Match::alg_config & o, F f) { 89 | f("dispmul", o.dispmul); 90 | f("box_radius", o.box_radius); 91 | f("left_right_int", o.left_right_int); 92 | f("left_right_sub", o.left_right_sub); 93 | f("neighbor", o.neighbor); 94 | f("second_peak", o.second_peak); 95 | f("texture_diff", o.texture_diff); 96 | f("texture_count", o.texture_count); 97 | f("score_min", o.score_min); 98 | f("score_max", o.score_max); 99 | f("median_plus", o.median_plus); 100 | f("median_minus", o.median_minus); 101 | f("median_thresh", o.median_thresh); 102 | f("hole_fill", o.hole_fill); 103 | f("domain_transform", o.domain_transform); 104 | f("dt_scale", o.dt_scale); 105 | f("dt_iter", o.dt_iter); 106 | f("dt_space", o.dt_space); 107 | f("dt_range", o.dt_range); 108 | } 109 | template void visit_fields(stereo::sgbmMatch::alg_config & o, F f) { 110 | f("dispmul", o.dispmul); 111 | f("box_radius", o.box_radius); 112 | f("left_right_int", o.left_right_int); 113 | f("left_right_sub", o.left_right_sub); 114 | f("neighbor", o.neighbor); 115 | f("second_peak", o.second_peak); 116 | f("texture_diff", o.texture_diff); 117 | f("texture_count", o.texture_count); 118 | f("score_min", o.score_min); 119 | f("score_max", o.score_max); 120 | f("median_plus", o.median_plus); 121 | f("median_minus", o.median_minus); 122 | f("median_thresh", o.median_thresh); 123 | f("hole_fill", o.hole_fill); 124 | f("cost_abs", o.cost_abs); 125 | f("cost_ham", o.cost_ham); 126 | f("p1", o.p1); 127 | f("p2", o.p2); 128 | f("sgm", o.sgm); 129 | f("scale_p2", o.scale_p2); 130 | f("use_blf", o.use_blf); 131 | f("blf_range", o.blf_range); 132 | f("blf_space", o.blf_space); 133 | } 134 | int main(int argc, char* argv[]) 135 | { 136 | json::value doc; 137 | if (argc < 2) 138 | return 1; 139 | if (auto in = std::ifstream(argv[1])) 140 | { 141 | std::string str((std::istreambuf_iterator(in)), 142 | std::istreambuf_iterator()); 143 | doc = json::parse(str); 144 | } 145 | else { 146 | std::vector args(argv + 1, argv + argc); 147 | auto str = std::accumulate(begin(args), end(args), std::string()); 148 | doc = json::parse(str); 149 | } 150 | std::string leftFile = doc["left_rgb"].string(); 151 | std::string rightFile = doc["right_rgb"].string(); 152 | 153 | auto left = img::imread(leftFile.c_str()); 154 | auto right = img::imread(rightFile.c_str()); 155 | 156 | auto left_g = img::Rgb2grey(left); 157 | auto right_g = img::Rgb2grey(right); 158 | 159 | int bitshift = (int)log2(doc["minint"].number()+1); 160 | 161 | for (int i = 0; i < left.width*left.height; i++) { 162 | left_g(i) >>= bitshift; 163 | right_g(i) >>= bitshift; 164 | } 165 | auto gt_disp = img::imread(doc["gt"].string().c_str()); 166 | auto gt_mask = img::imread(doc["gt_mask"].string().c_str()); 167 | json::object results; 168 | 169 | stereo::R200Match::alg_config cfg; 170 | 171 | if (doc["config"]["algorithm"].string() == "r200") { 172 | from_json(cfg, doc["config"]); 173 | } 174 | auto costsName = doc["costs"].string(); 175 | struct raw_header {int w,h,c,bpp;}; 176 | raw_header hd; 177 | std::ifstream outn(costsName,std::ifstream::binary); 178 | outn.read((char*)&hd,sizeof(raw_header)); 179 | std::vector costs(hd.w*hd.h*hd.c); 180 | if(hd.bpp == 4) { 181 | outn.read((char*)costs.data(),costs.size()*sizeof(uint32_t)); 182 | } else if(hd.bpp == 2) { 183 | std::vector costst(hd.w*hd.h*hd.c); 184 | outn.read((char*)costst.data(),costst.size()*sizeof(uint16_t)); 185 | for(int i=0; i < costs.size(); i++) 186 | costs[i] = costst[i]; 187 | } 188 | 189 | generateDispConf(cfg,left_g,costs, 190 | doc["output_disp"].string() + ".txt",gt_disp,gt_mask); 191 | 192 | return 0; 193 | } 194 | -------------------------------------------------------------------------------- /src/vecmatquat_minimal.h: -------------------------------------------------------------------------------- 1 | // 2 | // minimal set of well understood vec,quat,mat 3d math routines as needed. 3 | // Following/extending hlsl conventions. 4 | // 5 | // defines the same set of structs as the alternative file vecmatquat.h. 6 | // This vecmatquat_minimal.h version is meant to enable copy-paste just the code snippets that are needed. 7 | // 8 | 9 | #ifdef VECMATQUAT_FULL_H 10 | #error 11 | #endif 12 | #pragma once 13 | 14 | #ifndef VECMATQUAT_H 15 | #define VECMATQUAT_H 16 | #define VECMATQUAT_MINIMAL_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include // for std::pair 22 | 23 | struct int2 { 24 | int x, y; 25 | int& operator[](int i) { return (&x)[i]; } 26 | const int& operator[](int i) const { return (&x)[i]; } 27 | }; 28 | inline bool operator==(const int2& a, const int2& b) { return a.x == b.x && a.y == b.y; } 29 | 30 | struct int3 { 31 | int x, y, z; 32 | int& operator[](int i) { return (&x)[i]; } 33 | const int& operator[](int i) const { return (&x)[i]; } 34 | }; 35 | inline bool operator==(const int3& a, const int3& b) { return a.x == b.x && a.y == b.y && a.z == b.z; } 36 | 37 | struct int4 { 38 | int x, y, z, w; 39 | int& operator[](int i) { return (&x)[i]; } 40 | const int& operator[](int i) const { return (&x)[i]; } 41 | }; 42 | 43 | struct float2 { 44 | float x, y; 45 | }; 46 | class float3 { 47 | public: 48 | float x, y, z; 49 | float3(float x, float y, float z) 50 | : x(x) 51 | , y(y) 52 | , z(z) 53 | { 54 | } 55 | float3() 56 | : x(0) 57 | , y(0) 58 | , z(0) 59 | { 60 | } 61 | float& operator[](int i) { return (&x)[i]; } 62 | const float& operator[](int i) const { return (&x)[i]; } 63 | }; 64 | 65 | inline bool operator==(const float3& a, const float3& b) { return a.x == b.x && a.y == b.y && a.z == b.z; } 66 | inline bool operator!=(const float3& a, const float3& b) { return !(a == b); } 67 | inline float3 operator+(const float3& a, const float3& b) { return { a.x + b.x, a.y + b.y, a.z + b.z }; } 68 | inline float3 operator-(const float3& v) { return { -v.x, -v.y, -v.z }; } 69 | inline float3 operator-(const float3& a, const float3& b) { return { a.x - b.x, a.y - b.y, a.z - b.z }; } 70 | inline float3 operator*(const float3& v, float s) { return { v.x * s, v.y * s, v.z * s }; } 71 | inline float3 operator*(float s, const float3& v) { return v * s; } 72 | inline float3 operator/(const float3& v, float s) { return v * (1.0f / s); } 73 | inline float3 operator+=(float3& a, const float3& b) { return a = a + b; } 74 | inline float3 operator-=(float3& a, const float3& b) { return a = a - b; } 75 | inline float3 operator*=(float3& v, const float& s) { return v = v * s; } 76 | inline float3 operator/=(float3& v, const float& s) { return v = v / s; } 77 | inline float dot(const float3& a, const float3& b) { return a.x * b.x + a.y * b.y + a.z * b.z; } 78 | inline float3 cross(const float3& a, const float3& b) { return { a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x }; } 79 | inline float magnitude(const float3& v) { return sqrtf(dot(v, v)); } 80 | inline float3 normalize(const float3& v) { return v / magnitude(v); } 81 | inline float3 cmin(const float3& a, const float3& b) { return { std::min(a.x, b.x), std::min(a.y, b.y), std::min(a.z, b.z) }; } 82 | inline float3 cmax(const float3& a, const float3& b) { return { std::max(a.x, b.x), std::max(a.y, b.y), std::max(a.z, b.z) }; } 83 | inline float3 vabs(const float3& v) { return { std::abs(v.x), std::abs(v.y), std::abs(v.z) }; } 84 | 85 | class float3x3 { 86 | public: 87 | float3 x, y, z; 88 | float3x3() 89 | { 90 | x = float3(1.0f, 0.0f, 0.0f); 91 | y = float3(0.0f, 1.0f, 0.0f); 92 | z = float3(0.0f, 0.0f, 1.0f); 93 | }; 94 | float3x3(float3 x, float3 y, float3 z) 95 | : x(x) 96 | , y(y) 97 | , z(z) 98 | { 99 | } 100 | float3& operator[](int i) { return (&x)[i]; } 101 | const float3& operator[](int i) const { return (&x)[i]; } 102 | }; 103 | 104 | inline float3x3 operator*(const float3x3& m, float s) { return { m.x * s, m.y * s, m.z * s }; } 105 | inline float3x3 transpose(const float3x3& m) 106 | { 107 | return float3x3(float3(m.x.x, m.y.x, m.z.x), 108 | float3(m.x.y, m.y.y, m.z.y), 109 | float3(m.x.z, m.y.z, m.z.z)); 110 | } 111 | inline float3 mul(const float3x3& m, const float3& v) { return m.x * v.x + m.y * v.y + m.z * v.z; } // m is assumed to be column major 112 | inline float determinant(const float3x3& a) { return a.x.x * (a.y.y * a.z.z - a.z.y * a.y.z) + a.x.y * (a.y.z * a.z.x - a.z.z * a.y.x) + a.x.z * (a.y.x * a.z.y - a.z.x * a.y.y); } 113 | inline float3x3 adjoint(const float3x3& a) 114 | { 115 | return { { a.y.y * a.z.z - a.z.y * a.y.z, a.z.y * a.x.z - a.x.y * a.z.z, a.x.y * a.y.z - a.y.y * a.x.z }, 116 | { a.y.z * a.z.x - a.z.z * a.y.x, a.z.z * a.x.x - a.x.z * a.z.x, a.x.z * a.y.x - a.y.z * a.x.x }, 117 | { a.y.x * a.z.y - a.z.x * a.y.y, a.z.x * a.x.y - a.x.x * a.z.y, a.x.x * a.y.y - a.y.x * a.x.y } }; 118 | } 119 | inline float3x3 inverse(float3x3& a) { return adjoint(a) * (1.0f / determinant(a)); } 120 | inline float3x3 outerprod(const float3& a, const float3& b) { return { a * b.x, a * b.y, a * b.z }; } 121 | 122 | class float4 { 123 | public: 124 | float x, y, z, w; 125 | const float3& xyz() const { return *((float3*)&x); } 126 | float3& xyz() { return *((float3*)&x); } 127 | //float4(){ x = y = z = 0.0f; w = 1.0f; }; 128 | float4(float _x, float _y, float _z, float _w) 129 | { 130 | x = _x; 131 | y = _y; 132 | z = _z; 133 | w = _w; 134 | }; 135 | float& operator[](int i) { return (&x)[i]; } 136 | const float& operator[](int i) const { return (&x)[i]; } 137 | }; 138 | 139 | inline float4 operator*(const float4& a, float b) { return { a.x * b, a.y * b, a.z * b, a.w * b }; } 140 | inline float4 operator+(const float4& a, const float4& b) { return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w }; } 141 | inline float4 operator-(const float4& v) { return { -v.x, -v.y, -v.z, -v.w }; } 142 | inline float dot(const float4& a, const float4& b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } 143 | inline float4 slerp(const float4& v0, const float4& v1, float t) 144 | { 145 | float a = (float)acos(dot(v0, v1)); 146 | if (a == 0.0f) { 147 | return (v0); 148 | } 149 | return v0 * (float)(sin(a - t * a) / sin(a)) + v1 * (float)(sin(t * a) / sin(a)); 150 | } 151 | 152 | // Quaternion library: 153 | inline float4 qmul(const float4& a, const float4& b) 154 | { 155 | return { 156 | a.w * b.x + a.x * b.w + a.y * b.z - a.z * b.y, 157 | a.w * b.y - a.x * b.z + a.y * b.w + a.z * b.x, 158 | a.w * b.z + a.x * b.y - a.y * b.x + a.z * b.w, 159 | a.w * b.w - a.x * b.x - a.y * b.y - a.z * b.z 160 | }; 161 | } 162 | inline float4 qconj(const float4& q) { return { -q.x, -q.y, -q.z, q.w }; } 163 | inline float3 qrot(const float4& q, const float3& v) { return qmul(qmul(q, float4(v.x, v.y, v.z, 0)), qconj(q)).xyz(); } // q*v*conj(q) 164 | inline float3 qxdir(const float4& q) { return float3(1 - 2 * (q.y * q.y + q.z * q.z), 2 * (q.x * q.y + q.w * q.z), 2 * (q.x * q.z - q.w * q.y)); } 165 | inline float3 qydir(const float4& q) { return float3(2 * (q.x * q.y - q.w * q.z), 1 - 2 * (q.x * q.x + q.z * q.z), 2 * (q.y * q.z + q.w * q.x)); } 166 | inline float3 qzdir(const float4& q) { return float3(2 * (q.x * q.z + q.w * q.y), 2 * (q.y * q.z - q.w * q.x), 1 - 2 * (q.x * q.x + q.y * q.y)); } 167 | //inline float3x3 qmatrix(const float4 &q) { return float3x3(qxdir(q), qydir(q), qzdir(q)); } 168 | 169 | inline float4 qslerp(const float4& q0, const float4& q1, float t) 170 | { 171 | return slerp(((dot(q0, q1) < 0.0) ? -q0 : q0), q1, t); // slerp between two on the same hemisphere (common usage) 172 | } 173 | inline float4 QuatFromAxisAngle(const float3& axis, float t) 174 | { 175 | auto v = normalize(axis) * sinf(t / 2.0f); 176 | return { v.x, v.y, v.z, cosf(t / 2.0f) }; 177 | } 178 | inline std::pair AxisAngleFromQuat(const float4& q) 179 | { 180 | auto a = acos(q.w) * 2.0f; 181 | return std::make_pair(q.xyz() / sinf(a / 2.0f), a); 182 | } 183 | 184 | struct float4x4 { 185 | float4 x, y, z, w; 186 | }; 187 | 188 | //-------- copied from geometric.h ------- 189 | 190 | inline float3 PlaneLineIntersection(const float3& n, const float d, const float3& p0, const float3& p1) // returns the point where the line p0-p2 intersects the plane n&d 191 | { 192 | float3 dif = p1 - p0; 193 | float dn = dot(n, dif); 194 | float t = -(d + dot(n, p0)) / dn; 195 | return p0 + (dif * t); 196 | } 197 | inline int argmax(const float a[], int n) 198 | { 199 | int r = 0; 200 | for (int i = 1; i < n; i++) { 201 | if (a[i] > a[r]) { 202 | r = i; 203 | } 204 | } 205 | return r; 206 | } 207 | inline float3 Orth(const float3& v) 208 | { 209 | float3 absv = vabs(v); 210 | float3 u(1, 1, 1); 211 | u[argmax(&absv[0], 3)] = 0.0f; 212 | return normalize(cross(u, v)); 213 | } 214 | inline float4 RotationArc(const float3& v0_, const float3& v1_) 215 | { 216 | auto v0 = normalize(v0_); // Comment these two lines out if you know its not needed. 217 | auto v1 = normalize(v1_); // If vector is already unit length then why do it again? 218 | auto c = cross(v0, v1); 219 | auto d = dot(v0, v1); 220 | if (d <= -1.0f) { 221 | float3 a = Orth(v0); 222 | return float4(a.x, a.y, a.z, 0); 223 | } // 180 about any orthogonal axis axis 224 | auto s = sqrtf((1 + d) * 2); 225 | return { c.x / s, c.y / s, c.z / s, s / 2.0f }; 226 | } 227 | inline float4 VirtualTrackBall(const float3& cop, const float3& cor, const float3& dir1, const float3& dir2) 228 | { 229 | // Simple track ball functionality to spin stuf on the screen. 230 | // cop center of projection cor center of rotation 231 | // dir1 old mouse direction dir2 new mouse direction 232 | // Pretend there is a sphere around cor. Take rotation 233 | // between apprx points where dir1 and dir2 intersect sphere. 234 | float3 nrml = cor - cop; // compute plane 235 | float fudgefactor = 1.0f / (magnitude(nrml) * 0.25f); // since trackball proportional to distance from cop 236 | nrml = normalize(nrml); 237 | float dist = -dot(nrml, cor); 238 | float3 u = (PlaneLineIntersection(nrml, dist, cop, cop + dir1) - cor) * fudgefactor; 239 | float m = magnitude(u); 240 | u = (m > 1) ? u / m : u - (nrml * sqrtf(1 - m * m)); 241 | float3 v = (PlaneLineIntersection(nrml, dist, cop, cop + dir2) - cor) * fudgefactor; 242 | m = magnitude(v); 243 | v = (m > 1) ? v / m : v - (nrml * sqrtf(1 - m * m)); 244 | return RotationArc(u, v); 245 | } 246 | //-------------------------------- 247 | 248 | #endif // VECMATQUAT_H 249 | -------------------------------------------------------------------------------- /src/vis_pfm.cpp: -------------------------------------------------------------------------------- 1 | #include "image.h" 2 | #include "imio.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include "cam_util.h" 8 | 9 | 10 | int main(int argc, char* argv[]) 11 | { 12 | if (argc < 2) 13 | return 1; 14 | auto in = img::imread(argv[1]); 15 | auto disc = img::Img(in.width, in.height); 16 | auto hist = img::Image(in.width, in.height); 17 | if (argc >= 3) { 18 | auto conf = img::imread(argv[2]); 19 | for (int i = 0; i < in.width*in.height; i++) 20 | in.ptr[i] = conf.ptr[i] > 0.5 ? in.ptr[i] : 0.0f; 21 | } 22 | for (int i = 0; i < in.width*in.height; i++) 23 | disc.ptr[i] = (uint16_t)( in.ptr[i] * 8 + 0.5f); 24 | 25 | util::ConvertDepthToRGBUsingHistogram(hist.ptr, disc.ptr, in.width, in.height, 0, 0.625f); 26 | 27 | //std::string newfile(argv[1]); 28 | //newfile[newfile.size() - 1] = 'g'; 29 | //newfile[newfile.size() - 2] = 'n'; 30 | //newfile[newfile.size() - 3] = 'p'; 31 | //img::imwrite(newfile.c_str(), hist); 32 | img::imwrite("out.png", hist); 33 | return 0; 34 | } 35 | --------------------------------------------------------------------------------