├── CMakeLists.txt ├── CUDADefines.h ├── Cholesky.h ├── Dummy.cpp ├── Image.h ├── LexicalCast.h ├── MathUtils.h ├── Matrix.h ├── MemoryBlock.h ├── MemoryBlockPersister.h ├── MetalContext.h ├── MetalContext.mm ├── PlatformIndependence.h ├── README └── Vector.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Specify the project files # 3 | ############################# 4 | 5 | ## 6 | SET(ORUTILS_HEADERS 7 | Vector.h 8 | Matrix.h 9 | Cholesky.h 10 | MathUtils.h 11 | Image.h 12 | CUDADefines.h 13 | LexicalCast.h 14 | MemoryBlock.h 15 | MemoryBlockPersister.h 16 | PlatformIndependence.h 17 | ) 18 | 19 | ################################################################# 20 | # Collect the project files into common, CPU-only and CUDA-only # 21 | ################################################################# 22 | 23 | set(ORUTILS_OBJECTS 24 | Dummy.cpp 25 | ${ORUTILS_HEADERS} 26 | ) 27 | 28 | ############################# 29 | # Specify the source groups # 30 | ############################# 31 | 32 | SOURCE_GROUP("" FILES ${ORUTILS_HEADERS}) 33 | 34 | ############################################################## 35 | # Specify the include directories, target and link libraries # 36 | ############################################################## 37 | 38 | add_library(ORUtils ${ORUTILS_OBJECTS}) 39 | 40 | IF(WITH_CUDA) 41 | # include_directories(${CUDA_INCLUDE_DIRS}) 42 | # cuda_add_library(ITMLib 43 | # ${ITMLIB_CPU_OBJECTS} 44 | # ${ITMLIB_CUDA_OBJECTS} 45 | # ${ITMLIB_COMMON_OBJECTS} 46 | # OPTIONS -gencode arch=compute_11,code=compute_11 -gencode arch=compute_30,code=compute_30) 47 | # target_link_libraries(ITMLib ${CUDA_LIBRARY}) 48 | ELSE() 49 | # add_definitions(-DCOMPILE_WITHOUT_CUDA) 50 | # add_library(ITMLib ${ITMLIB_CPU_OBJECTS} ${ITMLIB_COMMON_OBJECTS}) 51 | ENDIF() 52 | 53 | #target_link_libraries(ITMLib Utils) 54 | -------------------------------------------------------------------------------- /CUDADefines.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #ifndef COMPILE_WITHOUT_CUDA 6 | 7 | #if (!defined USING_CMAKE) && (defined _MSC_VER) 8 | #pragma comment( lib, "cuda.lib" ) 9 | #pragma comment( lib, "cudart.lib" ) 10 | #pragma comment( lib, "cublas.lib" ) 11 | #pragma comment( lib, "cufft.lib" ) 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | #ifdef _WIN32 22 | # define WINDOWS_LEAN_AND_MEAN 23 | # include 24 | #endif 25 | 26 | #ifndef ORcudaSafeCall 27 | #define ORcudaSafeCall(err) ORUtils::__cudaSafeCall(err, __FILE__, __LINE__) 28 | 29 | namespace ORUtils { 30 | 31 | inline void __cudaSafeCall( cudaError err, const char *file, const int line ) 32 | { 33 | if( cudaSuccess != err) { 34 | printf("%s(%i) : cudaSafeCall() Runtime API error : %s.\n", 35 | file, line, cudaGetErrorString(err) ); 36 | exit(-1); 37 | } 38 | } 39 | 40 | } 41 | 42 | #endif 43 | 44 | #endif 45 | 46 | -------------------------------------------------------------------------------- /Cholesky.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | namespace ORUtils 8 | { 9 | class Cholesky 10 | { 11 | private: 12 | std::vector cholesky; 13 | int size, rank; 14 | 15 | public: 16 | Cholesky(const float *mat, int size) 17 | { 18 | this->size = size; 19 | this->cholesky.resize(size*size); 20 | 21 | for (int i = 0; i < size * size; i++) cholesky[i] = mat[i]; 22 | 23 | for (int c = 0; c < size; c++) 24 | { 25 | float inv_diag = 1; 26 | for (int r = c; r < size; r++) 27 | { 28 | float val = cholesky[c + r * size]; 29 | for (int c2 = 0; c2 < c; c2++) 30 | val -= cholesky[c + c2 * size] * cholesky[c2 + r * size]; 31 | 32 | if (r == c) 33 | { 34 | cholesky[c + r * size] = val; 35 | if (val == 0) { rank = r; } 36 | inv_diag = 1.0f / val; 37 | } 38 | else 39 | { 40 | cholesky[r + c * size] = val; 41 | cholesky[c + r * size] = val * inv_diag; 42 | } 43 | } 44 | } 45 | 46 | rank = size; 47 | } 48 | 49 | void Backsub(float *result, const float *v) const 50 | { 51 | std::vector y(size); 52 | for (int i = 0; i < size; i++) 53 | { 54 | float val = v[i]; 55 | for (int j = 0; j < i; j++) val -= cholesky[j + i * size] * y[j]; 56 | y[i] = val; 57 | } 58 | 59 | for (int i = 0; i < size; i++) y[i] /= cholesky[i + i * size]; 60 | 61 | for (int i = size - 1; i >= 0; i--) 62 | { 63 | float val = y[i]; 64 | for (int j = i + 1; j < size; j++) val -= cholesky[i + j * size] * result[j]; 65 | result[i] = val; 66 | } 67 | } 68 | 69 | ~Cholesky(void) 70 | { 71 | } 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /Dummy.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | void dummy_with_external_linkage() {} 4 | 5 | -------------------------------------------------------------------------------- /Image.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | 6 | #include "MemoryBlock.h" 7 | 8 | #ifndef __METALC__ 9 | 10 | namespace ORUtils 11 | { 12 | /** \brief 13 | Represents images, templated on the pixel type 14 | */ 15 | template 16 | class Image : public MemoryBlock < T > 17 | { 18 | public: 19 | /** Size of the image in pixels. */ 20 | Vector2 noDims; 21 | 22 | /** Initialize an empty image of the given size, either 23 | on CPU only or on both CPU and GPU. 24 | */ 25 | Image(Vector2 noDims, bool allocate_CPU, bool allocate_CUDA, bool metalCompatible = true) 26 | : MemoryBlock(noDims.x * noDims.y, allocate_CPU, allocate_CUDA, metalCompatible) 27 | { 28 | this->noDims = noDims; 29 | } 30 | 31 | Image(bool allocate_CPU, bool allocate_CUDA, bool metalCompatible = true) 32 | : MemoryBlock(0, allocate_CPU, allocate_CUDA, metalCompatible) 33 | { 34 | this->noDims = Vector2(0, 0); 35 | } 36 | 37 | Image(Vector2 noDims, MemoryDeviceType memoryType) 38 | : MemoryBlock(noDims.x * noDims.y, memoryType) 39 | { 40 | this->noDims = noDims; 41 | } 42 | 43 | /** Resize an image, loosing all old image data. 44 | Essentially any previously allocated data is 45 | released, new memory is allocated. 46 | */ 47 | void ChangeDims(Vector2 newDims) 48 | { 49 | if (newDims != noDims) 50 | { 51 | this->noDims = newDims; 52 | 53 | bool allocate_CPU = this->isAllocated_CPU; 54 | bool allocate_CUDA = this->isAllocated_CUDA; 55 | bool metalCompatible = this->isMetalCompatible; 56 | 57 | this->Free(); 58 | this->Allocate(newDims.x * newDims.y, allocate_CPU, allocate_CUDA, metalCompatible); 59 | } 60 | } 61 | 62 | // Suppress the default copy constructor and assignment operator 63 | Image(const Image&); 64 | Image& operator=(const Image&); 65 | }; 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /LexicalCast.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | namespace ORUtils 8 | { 9 | 10 | /** 11 | * \brief Performs a lexical conversion from the source type to the target type. 12 | * 13 | * This is a lightweight replacement for boost::lexical_cast. It's not as 14 | * sophisticated as that, but it works well enough. Note that we can't use 15 | * exceptions, since they're not well supported on Android. 16 | * 17 | * \param src The source value to convert. 18 | * \param target A location into which to store the converted value. 19 | * \return true, if the conversion succeeded, or false otherwise. 20 | */ 21 | template 22 | bool lexical_cast(const Source& src, Target& target) 23 | { 24 | std::stringstream ss; 25 | ss << src; 26 | return ss >> target && ss.eof(); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /MathUtils.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #ifndef MIN 6 | #define MIN(a,b) ((a < b) ? a : b) 7 | #endif 8 | 9 | #ifndef MAX 10 | #define MAX(a,b) ((a < b) ? b : a) 11 | #endif 12 | 13 | #ifndef ABS 14 | #define ABS(a) ((a < 0) ? -a : a) 15 | #endif 16 | 17 | #ifndef CLAMP 18 | #define CLAMP(x,a,b) MAX((a), MIN((b), (x))) 19 | #endif 20 | 21 | #ifndef ROUND 22 | #define ROUND(x) ((x < 0) ? (x - 0.5f) : (x + 0.5f)) 23 | #endif 24 | 25 | #ifndef PI 26 | #define PI float(3.1415926535897932384626433832795) 27 | #endif 28 | 29 | #ifndef DEGTORAD 30 | #define DEGTORAD float(0.017453292519943295769236907684886) 31 | #endif 32 | 33 | #ifndef MY_INF 34 | #define MY_INF 0x7f800000 35 | #endif 36 | 37 | #ifndef __METALC__ 38 | 39 | inline bool portable_finite(float a) 40 | { 41 | volatile float temp = a; 42 | if (temp != a) return false; 43 | if ((temp - a) != 0.0) return false; 44 | return true; 45 | } 46 | 47 | inline void matmul(const float *A, const float *b, float *x, int numRows, int numCols) 48 | { 49 | for (int r = 0; r < numRows; ++r) 50 | { 51 | float res = 0.0f; 52 | for (int c = 0; c < numCols; ++c) res += A[r*numCols + c] * b[c]; 53 | x[r] = res; 54 | } 55 | } 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /Matrix.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | #pragma once 3 | 4 | #include 5 | #include 6 | 7 | /************************************************************************/ 8 | /* WARNING: the following 3x3 and 4x4 matrix are using column major, to */ 9 | /* be consistent with OpenGL default rather than most C/C++ default. */ 10 | /* In all other parts of the code, we still use row major order. */ 11 | /************************************************************************/ 12 | 13 | namespace ORUtils { 14 | template class Vector2; 15 | template class Vector3; 16 | template class Vector4; 17 | template class VectorX; 18 | 19 | ////////////////////////////////////////////////////////////////////////// 20 | // Basic Matrix Structure 21 | ////////////////////////////////////////////////////////////////////////// 22 | 23 | template struct Matrix4_{ 24 | union { 25 | struct { // Warning: see the header in this file for the special matrix order 26 | T m00, m01, m02, m03; // |0, 4, 8, 12| |m00, m10, m20, m30| 27 | T m10, m11, m12, m13; // |1, 5, 9, 13| |m01, m11, m21, m31| 28 | T m20, m21, m22, m23; // |2, 6, 10, 14| |m02, m12, m22, m32| 29 | T m30, m31, m32, m33; // |3, 7, 11, 15| |m03, m13, m23, m33| 30 | }; 31 | T m[16]; 32 | }; 33 | }; 34 | 35 | template struct Matrix3_{ 36 | union { // Warning: see the header in this file for the special matrix order 37 | struct { 38 | T m00, m01, m02; // |0, 3, 6| |m00, m10, m20| 39 | T m10, m11, m12; // |1, 4, 7| |m01, m11, m21| 40 | T m20, m21, m22; // |2, 5, 8| |m02, m12, m22| 41 | }; 42 | T m[9]; 43 | }; 44 | }; 45 | 46 | template struct MatrixSQX_{ 47 | int dim; 48 | int sq; 49 | T m[s*s]; 50 | }; 51 | 52 | ////////////////////////////////////////////////////////////////////////// 53 | // Matrix class with math operators 54 | ////////////////////////////////////////////////////////////////////////// 55 | template 56 | class Matrix4 : public Matrix4_ < T > 57 | { 58 | public: 59 | _CPU_AND_GPU_CODE_ Matrix4() {} 60 | _CPU_AND_GPU_CODE_ Matrix4(T t) { setValues(t); } 61 | _CPU_AND_GPU_CODE_ Matrix4(const T *m) { setValues(m); } 62 | _CPU_AND_GPU_CODE_ Matrix4(T a00, T a01, T a02, T a03, T a10, T a11, T a12, T a13, T a20, T a21, T a22, T a23, T a30, T a31, T a32, T a33) { 63 | this->m00 = a00; this->m01 = a01; this->m02 = a02; this->m03 = a03; 64 | this->m10 = a10; this->m11 = a11; this->m12 = a12; this->m13 = a13; 65 | this->m20 = a20; this->m21 = a21; this->m22 = a22; this->m23 = a23; 66 | this->m30 = a30; this->m31 = a31; this->m32 = a32; this->m33 = a33; 67 | } 68 | 69 | _CPU_AND_GPU_CODE_ inline void getValues(T *mp) const { memcpy(mp, this->m, sizeof(T) * 16); } 70 | _CPU_AND_GPU_CODE_ inline const T *getValues() const { return this->m; } 71 | _CPU_AND_GPU_CODE_ inline Vector3 getScale() const { return Vector3(this->m00, this->m11, this->m22); } 72 | 73 | // Element access 74 | _CPU_AND_GPU_CODE_ inline T &operator()(int x, int y) { return at(x, y); } 75 | _CPU_AND_GPU_CODE_ inline const T &operator()(int x, int y) const { return at(x, y); } 76 | _CPU_AND_GPU_CODE_ inline T &operator()(Vector2 pnt) { return at(pnt.x, pnt.y); } 77 | _CPU_AND_GPU_CODE_ inline const T &operator()(Vector2 pnt) const { return at(pnt.x, pnt.y); } 78 | _CPU_AND_GPU_CODE_ inline T &at(int x, int y) { return this->m[y | (x << 2)]; } 79 | _CPU_AND_GPU_CODE_ inline const T &at(int x, int y) const { return this->m[y | (x << 2)]; } 80 | 81 | // set values 82 | _CPU_AND_GPU_CODE_ inline void setValues(const T *mp) { memcpy(this->m, mp, sizeof(T) * 16); } 83 | _CPU_AND_GPU_CODE_ inline void setValues(T r) { for (int i = 0; i < 16; i++) this->m[i] = r; } 84 | _CPU_AND_GPU_CODE_ inline void setZeros() { memset(this->m, 0, sizeof(T) * 16); } 85 | _CPU_AND_GPU_CODE_ inline void setIdentity() { setZeros(); this->m00 = this->m11 = this->m22 = this->m33 = 1; } 86 | _CPU_AND_GPU_CODE_ inline void setScale(T s) { this->m00 = this->m11 = this->m22 = s; } 87 | _CPU_AND_GPU_CODE_ inline void setScale(const Vector3_ &s) { this->m00 = s[0]; this->m11 = s[1]; this->m22 = s[2]; } 88 | _CPU_AND_GPU_CODE_ inline void setTranslate(const Vector3_ &t) { for (int y = 0; y < 3; y++) at(3, y) = t[y]; } 89 | _CPU_AND_GPU_CODE_ inline void setRow(int r, const Vector4_ &t){ for (int x = 0; x < 4; x++) at(x, r) = t[x]; } 90 | _CPU_AND_GPU_CODE_ inline void setColumn(int c, const Vector4_ &t) { memcpy(this->m + 4 * c, t.v, sizeof(T) * 4); } 91 | 92 | // get values 93 | _CPU_AND_GPU_CODE_ inline Vector4 getRow(int r) const { Vector4 v; for (int x = 0; x < 4; x++) v[x] = at(x, r); return v; } 94 | _CPU_AND_GPU_CODE_ inline Vector4 getColumn(int c) const { Vector4 v; memcpy(v.v, this->m + 4 * c, sizeof(T) * 4); return v; } 95 | _CPU_AND_GPU_CODE_ inline Matrix4 t() { // transpose 96 | Matrix4 mtrans; 97 | for (int x = 0; x < 4; x++) for (int y = 0; y < 4; y++) 98 | mtrans(x, y) = at(y, x); 99 | return mtrans; 100 | } 101 | 102 | _CPU_AND_GPU_CODE_ inline friend Matrix4 operator * (const Matrix4 &lhs, const Matrix4 &rhs) { 103 | Matrix4 r; 104 | r.setZeros(); 105 | for (int x = 0; x < 4; x++) for (int y = 0; y < 4; y++) for (int k = 0; k < 4; k++) 106 | r(x, y) += lhs(k, y) * rhs(x, k); 107 | return r; 108 | } 109 | 110 | _CPU_AND_GPU_CODE_ inline friend Matrix4 operator + (const Matrix4 &lhs, const Matrix4 &rhs) { 111 | Matrix4 res(lhs.m); 112 | return res += rhs; 113 | } 114 | 115 | _CPU_AND_GPU_CODE_ inline Vector4 operator *(const Vector4 &rhs) const { 116 | Vector4 r; 117 | r[0] = this->m[0] * rhs[0] + this->m[4] * rhs[1] + this->m[8] * rhs[2] + this->m[12] * rhs[3]; 118 | r[1] = this->m[1] * rhs[0] + this->m[5] * rhs[1] + this->m[9] * rhs[2] + this->m[13] * rhs[3]; 119 | r[2] = this->m[2] * rhs[0] + this->m[6] * rhs[1] + this->m[10] * rhs[2] + this->m[14] * rhs[3]; 120 | r[3] = this->m[3] * rhs[0] + this->m[7] * rhs[1] + this->m[11] * rhs[2] + this->m[15] * rhs[3]; 121 | return r; 122 | } 123 | 124 | // Used as a projection matrix to multiply with the Vector3 125 | _CPU_AND_GPU_CODE_ inline Vector3 operator *(const Vector3 &rhs) const { 126 | Vector3 r; 127 | r[0] = this->m[0] * rhs[0] + this->m[4] * rhs[1] + this->m[8] * rhs[2] + this->m[12]; 128 | r[1] = this->m[1] * rhs[0] + this->m[5] * rhs[1] + this->m[9] * rhs[2] + this->m[13]; 129 | r[2] = this->m[2] * rhs[0] + this->m[6] * rhs[1] + this->m[10] * rhs[2] + this->m[14]; 130 | return r; 131 | } 132 | 133 | _CPU_AND_GPU_CODE_ inline friend Vector4 operator *(const Vector4 &lhs, const Matrix4 &rhs){ 134 | Vector4 r; 135 | for (int x = 0; x < 4; x++) 136 | r[x] = lhs[0] * rhs(x, 0) + lhs[1] * rhs(x, 1) + lhs[2] * rhs(x, 2) + lhs[3] * rhs(x, 3); 137 | return r; 138 | } 139 | 140 | _CPU_AND_GPU_CODE_ inline Matrix4& operator += (const T &r) { for (int i = 0; i < 16; ++i) this->m[i] += r; return *this; } 141 | _CPU_AND_GPU_CODE_ inline Matrix4& operator -= (const T &r) { for (int i = 0; i < 16; ++i) this->m[i] -= r; return *this; } 142 | _CPU_AND_GPU_CODE_ inline Matrix4& operator *= (const T &r) { for (int i = 0; i < 16; ++i) this->m[i] *= r; return *this; } 143 | _CPU_AND_GPU_CODE_ inline Matrix4& operator /= (const T &r) { for (int i = 0; i < 16; ++i) this->m[i] /= r; return *this; } 144 | _CPU_AND_GPU_CODE_ inline Matrix4 &operator += (const Matrix4 &mat) { for (int i = 0; i < 16; ++i) this->m[i] += mat.m[i]; return *this; } 145 | _CPU_AND_GPU_CODE_ inline Matrix4 &operator -= (const Matrix4 &mat) { for (int i = 0; i < 16; ++i) this->m[i] -= mat.m[i]; return *this; } 146 | 147 | _CPU_AND_GPU_CODE_ inline friend bool operator == (const Matrix4 &lhs, const Matrix4 &rhs) { 148 | bool r = lhs[0] == rhs[0]; 149 | for (int i = 1; i < 16; i++) 150 | r &= lhs[i] == rhs[i]; 151 | return r; 152 | } 153 | 154 | _CPU_AND_GPU_CODE_ inline friend bool operator != (const Matrix4 &lhs, const Matrix4 &rhs) { 155 | bool r = lhs[0] != rhs[0]; 156 | for (int i = 1; i < 16; i++) 157 | r |= lhs[i] != rhs[i]; 158 | return r; 159 | } 160 | 161 | // The inverse matrix for float/double type 162 | _CPU_AND_GPU_CODE_ inline bool inv(Matrix4 &out) const { 163 | T tmp[12], src[16], det; 164 | T *dst = out.m; 165 | for (int i = 0; i < 4; i++) { 166 | src[i] = this->m[i * 4]; 167 | src[i + 4] = this->m[i * 4 + 1]; 168 | src[i + 8] = this->m[i * 4 + 2]; 169 | src[i + 12] = this->m[i * 4 + 3]; 170 | } 171 | 172 | tmp[0] = src[10] * src[15]; 173 | tmp[1] = src[11] * src[14]; 174 | tmp[2] = src[9] * src[15]; 175 | tmp[3] = src[11] * src[13]; 176 | tmp[4] = src[9] * src[14]; 177 | tmp[5] = src[10] * src[13]; 178 | tmp[6] = src[8] * src[15]; 179 | tmp[7] = src[11] * src[12]; 180 | tmp[8] = src[8] * src[14]; 181 | tmp[9] = src[10] * src[12]; 182 | tmp[10] = src[8] * src[13]; 183 | tmp[11] = src[9] * src[12]; 184 | 185 | dst[0] = (tmp[0] * src[5] + tmp[3] * src[6] + tmp[4] * src[7]) - (tmp[1] * src[5] + tmp[2] * src[6] + tmp[5] * src[7]); 186 | dst[1] = (tmp[1] * src[4] + tmp[6] * src[6] + tmp[9] * src[7]) - (tmp[0] * src[4] + tmp[7] * src[6] + tmp[8] * src[7]); 187 | dst[2] = (tmp[2] * src[4] + tmp[7] * src[5] + tmp[10] * src[7]) - (tmp[3] * src[4] + tmp[6] * src[5] + tmp[11] * src[7]); 188 | dst[3] = (tmp[5] * src[4] + tmp[8] * src[5] + tmp[11] * src[6]) - (tmp[4] * src[4] + tmp[9] * src[5] + tmp[10] * src[6]); 189 | 190 | det = src[0] * dst[0] + src[1] * dst[1] + src[2] * dst[2] + src[3] * dst[3]; 191 | if (det == 0.0f) 192 | return false; 193 | 194 | dst[4] = (tmp[1] * src[1] + tmp[2] * src[2] + tmp[5] * src[3]) - (tmp[0] * src[1] + tmp[3] * src[2] + tmp[4] * src[3]); 195 | dst[5] = (tmp[0] * src[0] + tmp[7] * src[2] + tmp[8] * src[3]) - (tmp[1] * src[0] + tmp[6] * src[2] + tmp[9] * src[3]); 196 | dst[6] = (tmp[3] * src[0] + tmp[6] * src[1] + tmp[11] * src[3]) - (tmp[2] * src[0] + tmp[7] * src[1] + tmp[10] * src[3]); 197 | dst[7] = (tmp[4] * src[0] + tmp[9] * src[1] + tmp[10] * src[2]) - (tmp[5] * src[0] + tmp[8] * src[1] + tmp[11] * src[2]); 198 | 199 | tmp[0] = src[2] * src[7]; 200 | tmp[1] = src[3] * src[6]; 201 | tmp[2] = src[1] * src[7]; 202 | tmp[3] = src[3] * src[5]; 203 | tmp[4] = src[1] * src[6]; 204 | tmp[5] = src[2] * src[5]; 205 | tmp[6] = src[0] * src[7]; 206 | tmp[7] = src[3] * src[4]; 207 | tmp[8] = src[0] * src[6]; 208 | tmp[9] = src[2] * src[4]; 209 | tmp[10] = src[0] * src[5]; 210 | tmp[11] = src[1] * src[4]; 211 | 212 | dst[8] = (tmp[0] * src[13] + tmp[3] * src[14] + tmp[4] * src[15]) - (tmp[1] * src[13] + tmp[2] * src[14] + tmp[5] * src[15]); 213 | dst[9] = (tmp[1] * src[12] + tmp[6] * src[14] + tmp[9] * src[15]) - (tmp[0] * src[12] + tmp[7] * src[14] + tmp[8] * src[15]); 214 | dst[10] = (tmp[2] * src[12] + tmp[7] * src[13] + tmp[10] * src[15]) - (tmp[3] * src[12] + tmp[6] * src[13] + tmp[11] * src[15]); 215 | dst[11] = (tmp[5] * src[12] + tmp[8] * src[13] + tmp[11] * src[14]) - (tmp[4] * src[12] + tmp[9] * src[13] + tmp[10] * src[14]); 216 | dst[12] = (tmp[2] * src[10] + tmp[5] * src[11] + tmp[1] * src[9]) - (tmp[4] * src[11] + tmp[0] * src[9] + tmp[3] * src[10]); 217 | dst[13] = (tmp[8] * src[11] + tmp[0] * src[8] + tmp[7] * src[10]) - (tmp[6] * src[10] + tmp[9] * src[11] + tmp[1] * src[8]); 218 | dst[14] = (tmp[6] * src[9] + tmp[11] * src[11] + tmp[3] * src[8]) - (tmp[10] * src[11] + tmp[2] * src[8] + tmp[7] * src[9]); 219 | dst[15] = (tmp[10] * src[10] + tmp[4] * src[8] + tmp[9] * src[9]) - (tmp[8] * src[9] + tmp[11] * src[10] + tmp[5] * src[8]); 220 | 221 | out *= 1 / det; 222 | return true; 223 | } 224 | 225 | friend std::ostream& operator<<(std::ostream& os, const Matrix4& dt) { 226 | for (int y = 0; y < 4; y++) 227 | os << dt(0, y) << ", " << dt(1, y) << ", " << dt(2, y) << ", " << dt(3, y) << "\n"; 228 | return os; 229 | } 230 | }; 231 | 232 | template 233 | class Matrix3 : public Matrix3_ < T > 234 | { 235 | public: 236 | _CPU_AND_GPU_CODE_ Matrix3() {} 237 | _CPU_AND_GPU_CODE_ Matrix3(T t) { setValues(t); } 238 | _CPU_AND_GPU_CODE_ Matrix3(const T *m) { setValues(m); } 239 | _CPU_AND_GPU_CODE_ Matrix3(T a00, T a01, T a02, T a10, T a11, T a12, T a20, T a21, T a22) { 240 | this->m00 = a00; this->m01 = a01; this->m02 = a02; 241 | this->m10 = a10; this->m11 = a11; this->m12 = a12; 242 | this->m20 = a20; this->m21 = a21; this->m22 = a22; 243 | } 244 | 245 | _CPU_AND_GPU_CODE_ inline void getValues(T *mp) const { memcpy(mp, this->m, sizeof(T) * 9); } 246 | _CPU_AND_GPU_CODE_ inline const T *getValues() const { return this->m; } 247 | _CPU_AND_GPU_CODE_ inline Vector3 getScale() const { return Vector3(this->m00, this->m11, this->m22); } 248 | 249 | // Element access 250 | _CPU_AND_GPU_CODE_ inline T &operator()(int x, int y) { return at(x, y); } 251 | _CPU_AND_GPU_CODE_ inline const T &operator()(int x, int y) const { return at(x, y); } 252 | _CPU_AND_GPU_CODE_ inline T &operator()(Vector2 pnt) { return at(pnt.x, pnt.y); } 253 | _CPU_AND_GPU_CODE_ inline const T &operator()(Vector2 pnt) const { return at(pnt.x, pnt.y); } 254 | _CPU_AND_GPU_CODE_ inline T &at(int x, int y) { return this->m[x * 3 + y]; } 255 | _CPU_AND_GPU_CODE_ inline const T &at(int x, int y) const { return this->m[x * 3 + y]; } 256 | 257 | // set values 258 | _CPU_AND_GPU_CODE_ inline void setValues(const T *mp) { memcpy(this->m, mp, sizeof(T) * 9); } 259 | _CPU_AND_GPU_CODE_ inline void setValues(const T r) { for (int i = 0; i < 9; i++) this->m[i] = r; } 260 | _CPU_AND_GPU_CODE_ inline void setZeros() { memset(this->m, 0, sizeof(T) * 9); } 261 | _CPU_AND_GPU_CODE_ inline void setIdentity() { setZeros(); this->m00 = this->m11 = this->m22 = 1; } 262 | _CPU_AND_GPU_CODE_ inline void setScale(T s) { this->m00 = this->m11 = this->m22 = s; } 263 | _CPU_AND_GPU_CODE_ inline void setScale(const Vector3_ &s) { this->m00 = s[0]; this->m11 = s[1]; this->m22 = s[2]; } 264 | _CPU_AND_GPU_CODE_ inline void setRow(int r, const Vector3_ &t){ for (int x = 0; x < 3; x++) at(x, r) = t[x]; } 265 | _CPU_AND_GPU_CODE_ inline void setColumn(int c, const Vector3_ &t) { memcpy(this->m + 3 * c, t.v, sizeof(T) * 3); } 266 | 267 | // get values 268 | _CPU_AND_GPU_CODE_ inline Vector3 getRow(int r) const { Vector3 v; for (int x = 0; x < 3; x++) v[x] = at(x, r); return v; } 269 | _CPU_AND_GPU_CODE_ inline Vector3 getColumn(int c) const { Vector3 v; memcpy(v.v, this->m + 3 * c, sizeof(T) * 3); return v; } 270 | _CPU_AND_GPU_CODE_ inline Matrix3 t() { // transpose 271 | Matrix3 mtrans; 272 | for (int x = 0; x < 3; x++) for (int y = 0; y < 3; y++) 273 | mtrans(x, y) = at(y, x); 274 | return mtrans; 275 | } 276 | 277 | _CPU_AND_GPU_CODE_ inline friend Matrix3 operator * (const Matrix3 &lhs, const Matrix3 &rhs) { 278 | Matrix3 r; 279 | r.setZeros(); 280 | for (int x = 0; x < 3; x++) for (int y = 0; y < 3; y++) for (int k = 0; k < 3; k++) 281 | r(x, y) += lhs(k, y) * rhs(x, k); 282 | return r; 283 | } 284 | 285 | _CPU_AND_GPU_CODE_ inline friend Matrix3 operator + (const Matrix3 &lhs, const Matrix3 &rhs) { 286 | Matrix3 res(lhs.m); 287 | return res += rhs; 288 | } 289 | 290 | _CPU_AND_GPU_CODE_ inline Vector3 operator *(const Vector3 &rhs) const { 291 | Vector3 r; 292 | r[0] = this->m[0] * rhs[0] + this->m[3] * rhs[1] + this->m[6] * rhs[2]; 293 | r[1] = this->m[1] * rhs[0] + this->m[4] * rhs[1] + this->m[7] * rhs[2]; 294 | r[2] = this->m[2] * rhs[0] + this->m[5] * rhs[1] + this->m[8] * rhs[2]; 295 | return r; 296 | } 297 | 298 | _CPU_AND_GPU_CODE_ inline Matrix3& operator *(const T &r) const { 299 | Matrix3 res(this->m); 300 | return res *= r; 301 | } 302 | 303 | _CPU_AND_GPU_CODE_ inline friend Vector3 operator *(const Vector3 &lhs, const Matrix3 &rhs){ 304 | Vector3 r; 305 | for (int x = 0; x < 3; x++) 306 | r[x] = lhs[0] * rhs(x, 0) + lhs[1] * rhs(x, 1) + lhs[2] * rhs(x, 2); 307 | return r; 308 | } 309 | 310 | _CPU_AND_GPU_CODE_ inline Matrix3& operator += (const T &r) { for (int i = 0; i < 9; ++i) this->m[i] += r; return *this; } 311 | _CPU_AND_GPU_CODE_ inline Matrix3& operator -= (const T &r) { for (int i = 0; i < 9; ++i) this->m[i] -= r; return *this; } 312 | _CPU_AND_GPU_CODE_ inline Matrix3& operator *= (const T &r) { for (int i = 0; i < 9; ++i) this->m[i] *= r; return *this; } 313 | _CPU_AND_GPU_CODE_ inline Matrix3& operator /= (const T &r) { for (int i = 0; i < 9; ++i) this->m[i] /= r; return *this; } 314 | _CPU_AND_GPU_CODE_ inline Matrix3& operator += (const Matrix3 &mat) { for (int i = 0; i < 9; ++i) this->m[i] += mat.m[i]; return *this; } 315 | _CPU_AND_GPU_CODE_ inline Matrix3& operator -= (const Matrix3 &mat) { for (int i = 0; i < 9; ++i) this->m[i] -= mat.m[i]; return *this; } 316 | 317 | _CPU_AND_GPU_CODE_ inline friend bool operator == (const Matrix3 &lhs, const Matrix3 &rhs) { 318 | bool r = lhs[0] == rhs[0]; 319 | for (int i = 1; i < 9; i++) 320 | r &= lhs[i] == rhs[i]; 321 | return r; 322 | } 323 | 324 | _CPU_AND_GPU_CODE_ inline friend bool operator != (const Matrix3 &lhs, const Matrix3 &rhs) { 325 | bool r = lhs[0] != rhs[0]; 326 | for (int i = 1; i < 9; i++) 327 | r |= lhs[i] != rhs[i]; 328 | return r; 329 | } 330 | 331 | // Matrix determinant 332 | _CPU_AND_GPU_CODE_ inline T det() const { 333 | return (this->m11*this->m22 - this->m12*this->m21)*this->m00 + (this->m12*this->m20 - this->m10*this->m22)*this->m01 + (this->m10*this->m21 - this->m11*this->m20)*this->m02; 334 | } 335 | 336 | // The inverse matrix for float/double type 337 | _CPU_AND_GPU_CODE_ inline bool inv(Matrix3 &out) const { 338 | T determinant = det(); 339 | if (determinant == 0) { 340 | out.setZeros(); 341 | return false; 342 | } 343 | 344 | out.m00 = (this->m11*this->m22 - this->m12*this->m21) / determinant; 345 | out.m01 = (this->m02*this->m21 - this->m01*this->m22) / determinant; 346 | out.m02 = (this->m01*this->m12 - this->m02*this->m11) / determinant; 347 | out.m10 = (this->m12*this->m20 - this->m10*this->m22) / determinant; 348 | out.m11 = (this->m00*this->m22 - this->m02*this->m20) / determinant; 349 | out.m12 = (this->m02*this->m10 - this->m00*this->m12) / determinant; 350 | out.m20 = (this->m10*this->m21 - this->m11*this->m20) / determinant; 351 | out.m21 = (this->m01*this->m20 - this->m00*this->m21) / determinant; 352 | out.m22 = (this->m00*this->m11 - this->m01*this->m10) / determinant; 353 | return true; 354 | } 355 | 356 | friend std::ostream& operator<<(std::ostream& os, const Matrix3& dt) { 357 | for (int y = 0; y < 3; y++) 358 | os << dt(0, y) << ", " << dt(1, y) << ", " << dt(2, y) << "\n"; 359 | return os; 360 | } 361 | }; 362 | 363 | template 364 | class MatrixSQX : public MatrixSQX_ < T, s > 365 | { 366 | public: 367 | _CPU_AND_GPU_CODE_ MatrixSQX() { this->dim = s; this->sq = s*s; } 368 | _CPU_AND_GPU_CODE_ MatrixSQX(T t) { this->dim = s; this->sq = s*s; setValues(t); } 369 | _CPU_AND_GPU_CODE_ MatrixSQX(const T *m) { this->dim = s; this->sq = s*s; setValues(m); } 370 | 371 | _CPU_AND_GPU_CODE_ inline void getValues(T *mp) const { memcpy(mp, this->m, sizeof(T) * 16); } 372 | _CPU_AND_GPU_CODE_ inline const T *getValues() const { return this->m; } 373 | 374 | // Element access 375 | _CPU_AND_GPU_CODE_ inline T &operator()(int x, int y) { return at(x, y); } 376 | _CPU_AND_GPU_CODE_ inline const T &operator()(int x, int y) const { return at(x, y); } 377 | _CPU_AND_GPU_CODE_ inline T &operator()(Vector2 pnt) { return at(pnt.x, pnt.y); } 378 | _CPU_AND_GPU_CODE_ inline const T &operator()(Vector2 pnt) const { return at(pnt.x, pnt.y); } 379 | _CPU_AND_GPU_CODE_ inline T &at(int x, int y) { return this->m[y * s + x]; } 380 | _CPU_AND_GPU_CODE_ inline const T &at(int x, int y) const { return this->m[y * s + x]; } 381 | 382 | // set values 383 | _CPU_AND_GPU_CODE_ inline void setValues(const T *mp) { for (int i = 0; i < s*s; i++) this->m[i] = mp[i]; } 384 | _CPU_AND_GPU_CODE_ inline void setValues(T r) { for (int i = 0; i < s*s; i++) this->m[i] = r; } 385 | _CPU_AND_GPU_CODE_ inline void setZeros() { for (int i = 0; i < s*s; i++) this->m[i] = 0; } 386 | _CPU_AND_GPU_CODE_ inline void setIdentity() { setZeros(); for (int i = 0; i < s*s; i++) this->m[i + i*s] = 1; } 387 | 388 | // get values 389 | _CPU_AND_GPU_CODE_ inline VectorX getRow(int r) const { VectorX v; for (int x = 0; x < s; x++) v[x] = at(x, r); return v; } 390 | _CPU_AND_GPU_CODE_ inline VectorX getColumn(int c) const { Vector4 v; for (int x = 0; x < s; x++) v[x] = at(c, x); return v; } 391 | _CPU_AND_GPU_CODE_ inline MatrixSQX getTranspose() 392 | { // transpose 393 | MatrixSQX mtrans; 394 | for (int x = 0; x < s; x++) for (int y = 0; y < s; y++) 395 | mtrans(x, y) = at(y, x); 396 | return mtrans; 397 | } 398 | 399 | _CPU_AND_GPU_CODE_ inline friend MatrixSQX operator * (const MatrixSQX &lhs, const MatrixSQX &rhs) { 400 | MatrixSQX r; 401 | r.setZeros(); 402 | for (int x = 0; x < s; x++) for (int y = 0; y < s; y++) for (int k = 0; k < s; k++) 403 | r(x, y) += lhs(k, y) * rhs(x, k); 404 | return r; 405 | } 406 | 407 | _CPU_AND_GPU_CODE_ inline friend MatrixSQX operator + (const MatrixSQX &lhs, const MatrixSQX &rhs) { 408 | MatrixSQX res(lhs.m); 409 | return res += rhs; 410 | } 411 | 412 | _CPU_AND_GPU_CODE_ inline MatrixSQX& operator += (const T &r) { for (int i = 0; i < s*s; ++i) this->m[i] += r; return *this; } 413 | _CPU_AND_GPU_CODE_ inline MatrixSQX& operator -= (const T &r) { for (int i = 0; i < s*s; ++i) this->m[i] -= r; return *this; } 414 | _CPU_AND_GPU_CODE_ inline MatrixSQX& operator *= (const T &r) { for (int i = 0; i < s*s; ++i) this->m[i] *= r; return *this; } 415 | _CPU_AND_GPU_CODE_ inline MatrixSQX& operator /= (const T &r) { for (int i = 0; i < s*s; ++i) this->m[i] /= r; return *this; } 416 | _CPU_AND_GPU_CODE_ inline MatrixSQX &operator += (const MatrixSQX &mat) { for (int i = 0; i < s*s; ++i) this->m[i] += mat.m[i]; return *this; } 417 | _CPU_AND_GPU_CODE_ inline MatrixSQX &operator -= (const MatrixSQX &mat) { for (int i = 0; i < s*s; ++i) this->m[i] -= mat.m[i]; return *this; } 418 | 419 | _CPU_AND_GPU_CODE_ inline friend bool operator == (const MatrixSQX &lhs, const MatrixSQX &rhs) { 420 | bool r = lhs[0] == rhs[0]; 421 | for (int i = 1; i < s*s; i++) 422 | r &= lhs[i] == rhs[i]; 423 | return r; 424 | } 425 | 426 | _CPU_AND_GPU_CODE_ inline friend bool operator != (const MatrixSQX &lhs, const MatrixSQX &rhs) { 427 | bool r = lhs[0] != rhs[0]; 428 | for (int i = 1; i < s*s; i++) 429 | r |= lhs[i] != rhs[i]; 430 | return r; 431 | } 432 | 433 | friend std::ostream& operator<<(std::ostream& os, const MatrixSQX& dt) { 434 | for (int y = 0; y < s; y++) 435 | { 436 | for (int x = 0; x < s; x++) os << dt(x, y) << "\t"; 437 | os << "\n"; 438 | } 439 | return os; 440 | } 441 | }; 442 | 443 | 444 | }; 445 | -------------------------------------------------------------------------------- /MemoryBlock.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #include "PlatformIndependence.h" 6 | 7 | #ifndef COMPILE_WITHOUT_CUDA 8 | #include "CUDADefines.h" 9 | #endif 10 | 11 | #ifndef __METALC__ 12 | 13 | #ifdef COMPILE_WITH_METAL 14 | #include "MetalContext.h" 15 | #endif 16 | 17 | #include 18 | #include 19 | 20 | #endif 21 | 22 | #ifndef MEMORY_DEVICE_TYPE 23 | #define MEMORY_DEVICE_TYPE 24 | enum MemoryDeviceType { MEMORYDEVICE_CPU, MEMORYDEVICE_CUDA }; 25 | #endif 26 | 27 | namespace ORUtils 28 | { 29 | /** \brief 30 | Represents memory blocks, templated on the data type 31 | */ 32 | template 33 | class MemoryBlock 34 | { 35 | protected: 36 | #ifndef __METALC__ 37 | bool isAllocated_CPU, isAllocated_CUDA, isMetalCompatible; 38 | #endif 39 | /** Pointer to memory on CPU host. */ 40 | DEVICEPTR(T)* data_cpu; 41 | 42 | /** Pointer to memory on GPU, if available. */ 43 | DEVICEPTR(T)* data_cuda; 44 | 45 | #ifndef __METALC__ 46 | 47 | #ifdef COMPILE_WITH_METAL 48 | void *data_metalBuffer; 49 | #endif 50 | 51 | #endif 52 | public: 53 | enum MemoryCopyDirection { CPU_TO_CPU, CPU_TO_CUDA, CUDA_TO_CPU, CUDA_TO_CUDA }; 54 | 55 | /** Total number of allocated entries in the data array. */ 56 | size_t dataSize; 57 | 58 | /** Get the data pointer on CPU or GPU. */ 59 | inline DEVICEPTR(T)* GetData(MemoryDeviceType memoryType) 60 | { 61 | switch (memoryType) 62 | { 63 | case MEMORYDEVICE_CPU: return data_cpu; 64 | case MEMORYDEVICE_CUDA: return data_cuda; 65 | } 66 | 67 | return 0; 68 | } 69 | 70 | /** Get the data pointer on CPU or GPU. */ 71 | inline const DEVICEPTR(T)* GetData(MemoryDeviceType memoryType) const 72 | { 73 | switch (memoryType) 74 | { 75 | case MEMORYDEVICE_CPU: return data_cpu; 76 | case MEMORYDEVICE_CUDA: return data_cuda; 77 | } 78 | 79 | return 0; 80 | } 81 | 82 | #ifndef __METALC__ 83 | 84 | #ifdef COMPILE_WITH_METAL 85 | inline const void *GetMetalBuffer() const { return data_metalBuffer; } 86 | #endif 87 | 88 | /** Initialize an empty memory block of the given size, 89 | on CPU only or GPU only or on both. CPU might also use the 90 | Metal compatible allocator (i.e. with 16384 alignment). 91 | */ 92 | MemoryBlock(size_t dataSize, bool allocate_CPU, bool allocate_CUDA, bool metalCompatible = true) 93 | { 94 | this->isAllocated_CPU = false; 95 | this->isAllocated_CUDA = false; 96 | this->isMetalCompatible = false; 97 | 98 | Allocate(dataSize, allocate_CPU, allocate_CUDA, metalCompatible); 99 | Clear(); 100 | } 101 | 102 | /** Initialize an empty memory block of the given size, either 103 | on CPU only or on GPU only. CPU will be Metal compatible if Metal 104 | is enabled. 105 | */ 106 | MemoryBlock(size_t dataSize, MemoryDeviceType memoryType) 107 | { 108 | this->isAllocated_CPU = false; 109 | this->isAllocated_CUDA = false; 110 | this->isMetalCompatible = false; 111 | 112 | switch (memoryType) 113 | { 114 | case MEMORYDEVICE_CPU: Allocate(dataSize, true, false, true); break; 115 | case MEMORYDEVICE_CUDA: Allocate(dataSize, false, true, true); break; 116 | } 117 | 118 | Clear(); 119 | } 120 | 121 | /** Set all image data to the given @p defaultValue. */ 122 | void Clear(unsigned char defaultValue = 0) 123 | { 124 | if (isAllocated_CPU) memset(data_cpu, defaultValue, dataSize * sizeof(T)); 125 | #ifndef COMPILE_WITHOUT_CUDA 126 | if (isAllocated_CUDA) ORcudaSafeCall(cudaMemset(data_cuda, defaultValue, dataSize * sizeof(T))); 127 | #endif 128 | } 129 | 130 | /** Transfer data from CPU to GPU, if possible. */ 131 | void UpdateDeviceFromHost() const { 132 | #ifndef COMPILE_WITHOUT_CUDA 133 | if (isAllocated_CUDA && isAllocated_CPU) 134 | ORcudaSafeCall(cudaMemcpy(data_cuda, data_cpu, dataSize * sizeof(T), cudaMemcpyHostToDevice)); 135 | #endif 136 | } 137 | /** Transfer data from GPU to CPU, if possible. */ 138 | void UpdateHostFromDevice() const { 139 | #ifndef COMPILE_WITHOUT_CUDA 140 | if (isAllocated_CUDA && isAllocated_CPU) 141 | ORcudaSafeCall(cudaMemcpy(data_cpu, data_cuda, dataSize * sizeof(T), cudaMemcpyDeviceToHost)); 142 | #endif 143 | } 144 | 145 | /** Copy data */ 146 | void SetFrom(const MemoryBlock *source, MemoryCopyDirection memoryCopyDirection) 147 | { 148 | switch (memoryCopyDirection) 149 | { 150 | case CPU_TO_CPU: 151 | memcpy(this->data_cpu, source->data_cpu, source->dataSize * sizeof(T)); 152 | break; 153 | #ifndef COMPILE_WITHOUT_CUDA 154 | case CPU_TO_CUDA: 155 | ORcudaSafeCall(cudaMemcpyAsync(this->data_cuda, source->data_cpu, source->dataSize * sizeof(T), cudaMemcpyHostToDevice)); 156 | break; 157 | case CUDA_TO_CPU: 158 | ORcudaSafeCall(cudaMemcpy(this->data_cpu, source->data_cuda, source->dataSize * sizeof(T), cudaMemcpyDeviceToHost)); 159 | break; 160 | case CUDA_TO_CUDA: 161 | ORcudaSafeCall(cudaMemcpyAsync(this->data_cuda, source->data_cuda, source->dataSize * sizeof(T), cudaMemcpyDeviceToDevice)); 162 | break; 163 | #endif 164 | default: break; 165 | } 166 | } 167 | 168 | virtual ~MemoryBlock() { this->Free(); } 169 | 170 | /** Allocate image data of the specified size. If the 171 | data has been allocated before, the data is freed. 172 | */ 173 | void Allocate(size_t dataSize, bool allocate_CPU, bool allocate_CUDA, bool metalCompatible) 174 | { 175 | Free(); 176 | 177 | this->dataSize = dataSize; 178 | if (dataSize == 0) return; 179 | 180 | if (allocate_CPU) 181 | { 182 | int allocType = 0; 183 | 184 | #ifndef COMPILE_WITHOUT_CUDA 185 | if (allocate_CUDA) allocType = 1; 186 | #endif 187 | #ifdef COMPILE_WITH_METAL 188 | if (metalCompatible) allocType = 2; 189 | #endif 190 | switch (allocType) 191 | { 192 | case 0: 193 | data_cpu = new T[dataSize]; 194 | break; 195 | case 1: 196 | #ifndef COMPILE_WITHOUT_CUDA 197 | ORcudaSafeCall(cudaMallocHost((void**)&data_cpu, dataSize * sizeof(T))); 198 | #endif 199 | break; 200 | case 2: 201 | #ifdef COMPILE_WITH_METAL 202 | allocateMetalData((void**)&data_cpu, (void**)&data_metalBuffer, dataSize * sizeof(T), true); 203 | #endif 204 | break; 205 | } 206 | 207 | this->isAllocated_CPU = allocate_CPU; 208 | this->isMetalCompatible = metalCompatible; 209 | } 210 | 211 | if (allocate_CUDA) 212 | { 213 | #ifndef COMPILE_WITHOUT_CUDA 214 | ORcudaSafeCall(cudaMalloc((void**)&data_cuda, dataSize * sizeof(T))); 215 | this->isAllocated_CUDA = allocate_CUDA; 216 | #endif 217 | } 218 | } 219 | 220 | void Free() 221 | { 222 | if (isAllocated_CPU) 223 | { 224 | int allocType = 0; 225 | 226 | #ifndef COMPILE_WITHOUT_CUDA 227 | if (isAllocated_CUDA) allocType = 1; 228 | #endif 229 | #ifdef COMPILE_WITH_METAL 230 | if (isMetalCompatible) allocType = 2; 231 | #endif 232 | switch (allocType) 233 | { 234 | case 0: 235 | delete[] data_cpu; 236 | break; 237 | case 1: 238 | #ifndef COMPILE_WITHOUT_CUDA 239 | ORcudaSafeCall(cudaFreeHost(data_cpu)); 240 | #endif 241 | break; 242 | case 2: 243 | #ifdef COMPILE_WITH_METAL 244 | freeMetalData((void**)&data_cpu, (void**)&data_metalBuffer, dataSize * sizeof(T), true); 245 | #endif 246 | break; 247 | } 248 | 249 | isMetalCompatible = false; 250 | isAllocated_CPU = false; 251 | } 252 | 253 | if (isAllocated_CUDA) 254 | { 255 | #ifndef COMPILE_WITHOUT_CUDA 256 | ORcudaSafeCall(cudaFree(data_cuda)); 257 | #endif 258 | isAllocated_CUDA = false; 259 | } 260 | } 261 | 262 | // Suppress the default copy constructor and assignment operator 263 | MemoryBlock(const MemoryBlock&); 264 | MemoryBlock& operator=(const MemoryBlock&); 265 | #endif 266 | }; 267 | } 268 | -------------------------------------------------------------------------------- /MemoryBlockPersister.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #include "MemoryBlock.h" 9 | 10 | namespace ORUtils 11 | { 12 | 13 | /** 14 | * \brief This class provides functions for loading and saving memory blocks. 15 | */ 16 | class MemoryBlockPersister 17 | { 18 | //#################### PUBLIC STATIC MEMBER FUNCTIONS #################### 19 | public: 20 | /** 21 | * \brief Loads data from a file on disk into a memory block. 22 | * 23 | * \param filename The name of the file. 24 | * \param block The memory block into which to load the data. 25 | * \param memoryDeviceType The type of memory device on which to load the data. 26 | */ 27 | template 28 | static void LoadMemoryBlock(const std::string& filename, ORUtils::MemoryBlock& block, MemoryDeviceType memoryDeviceType) 29 | { 30 | int blockSize = ReadBlockSize(filename); 31 | if(memoryDeviceType == MEMORYDEVICE_CUDA) 32 | { 33 | // If we're loading into a block on the GPU, first try and read the data into a temporary block on the CPU. 34 | ORUtils::MemoryBlock cpuBlock(block.dataSize, MEMORYDEVICE_CPU); 35 | ReadBlockData(filename, cpuBlock, blockSize); 36 | 37 | // Then copy the data across to the GPU. 38 | block.SetFrom(&cpuBlock, ORUtils::MemoryBlock::CPU_TO_CUDA); 39 | } 40 | else 41 | { 42 | // If we're loading into a block on the CPU, read the data directly into the block. 43 | ReadBlockData(filename, block, blockSize); 44 | } 45 | } 46 | 47 | /** 48 | * \brief Loads data from a file on disk into a memory block newly-allocated on the CPU with the appropriate size. 49 | * 50 | * \param filename The name of the file. 51 | * \param dummy An optional dummy parameter that can be used for type inference. 52 | * \return The loaded memory block. 53 | */ 54 | template 55 | static ORUtils::MemoryBlock *LoadMemoryBlock(const std::string& filename, ORUtils::MemoryBlock *dummy = NULL) 56 | { 57 | int blockSize = ReadBlockSize(filename); 58 | ORUtils::MemoryBlock *block = new ORUtils::MemoryBlock(blockSize, MEMORYDEVICE_CPU); 59 | ReadBlockData(filename, *block, blockSize); 60 | return block; 61 | } 62 | 63 | /** 64 | * \brief Attempts to read the size of a memory block from a file containing data for a single block. 65 | * 66 | * The size is stored as a single integer and precedes the data for the block. 67 | * 68 | * \param filename The name of the file. 69 | * \return The size of the memory block in the file. 70 | * \throws std::runtime_error If the read is unsuccessful. 71 | */ 72 | static int ReadBlockSize(const std::string& filename) 73 | { 74 | std::ifstream fs(filename.c_str(), std::ios::binary); 75 | if(!fs) throw std::runtime_error("Could not open " + filename + " for reading"); 76 | return ReadBlockSize(fs); 77 | } 78 | 79 | /** 80 | * \brief Saves a memory block to a file on disk. 81 | * 82 | * \param filename The name of the file. 83 | * \param block The memory block to save. 84 | * \param memoryDeviceType The type of memory device from which to save the data. 85 | */ 86 | template 87 | static void SaveMemoryBlock(const std::string& filename, const ORUtils::MemoryBlock& block, MemoryDeviceType memoryDeviceType) 88 | { 89 | std::ofstream fs(filename.c_str(), std::ios::binary); 90 | if(!fs) throw std::runtime_error("Could not open " + filename + " for writing"); 91 | 92 | if(memoryDeviceType == MEMORYDEVICE_CUDA) 93 | { 94 | // If we are saving the memory block from the GPU, first make a CPU copy of it. 95 | ORUtils::MemoryBlock cpuBlock(block.dataSize, MEMORYDEVICE_CPU); 96 | cpuBlock.SetFrom(&block, ORUtils::MemoryBlock::CUDA_TO_CPU); 97 | 98 | // Then write the CPU copy to disk. 99 | WriteBlock(fs, cpuBlock); 100 | } 101 | else 102 | { 103 | // If we are saving the memory block from the CPU, write it directly to disk. 104 | WriteBlock(fs, block); 105 | } 106 | } 107 | 108 | //#################### PRIVATE STATIC MEMBER FUNCTIONS #################### 109 | private: 110 | /** 111 | * \brief Attempts to read data into a memory block allocated on the CPU from an input stream. 112 | * 113 | * The memory block must have the specified size (which should have been obtained by a call to ReadBlockSize). 114 | * 115 | * \param is The input stream. 116 | * \param block The memory block into which to read. 117 | * \param blockSize The required size for the memory block. 118 | * \throws std::runtime_error If the read is unsuccessful. 119 | */ 120 | template 121 | static void ReadBlockData(std::istream& is, ORUtils::MemoryBlock& block, int blockSize) 122 | { 123 | // Try and read the block's size. 124 | if(block.dataSize != blockSize) 125 | { 126 | throw std::runtime_error("Could not read data into a memory block of the wrong size"); 127 | } 128 | 129 | // Try and read the block's data. 130 | if(!is.read(reinterpret_cast(block.GetData(MEMORYDEVICE_CPU)), blockSize * sizeof(T))) 131 | { 132 | throw std::runtime_error("Could not read memory block data"); 133 | } 134 | } 135 | 136 | /** 137 | * \brief Attempts to read data into a memory block allocated on the CPU from a file that contains data for a single block. 138 | * 139 | * The memory block must have the specified size (which should have been obtained by a call to ReadBlockSize). 140 | * 141 | * \param filename The name of the file. 142 | * \param block The memory block into which to read. 143 | * \param blockSize The required size for the memory block. 144 | * \throws std::runtime_error If the read is unsuccessful. 145 | */ 146 | template 147 | static void ReadBlockData(const std::string& filename, ORUtils::MemoryBlock& block, int blockSize) 148 | { 149 | std::ifstream fs(filename.c_str(), std::ios::binary); 150 | if(!fs) throw std::runtime_error("Could not open " + filename + " for reading"); 151 | 152 | // Try and skip the block's size. 153 | if(!fs.seekg(sizeof(int))) throw std::runtime_error("Could not skip memory block size"); 154 | 155 | // Try and read the block's data. 156 | ReadBlockData(fs, block, blockSize); 157 | } 158 | 159 | /** 160 | * \brief Attempts to read the size of a memory block from an input stream. 161 | * 162 | * The size is stored as a single integer and precedes the data for the block. 163 | * 164 | * \param is The input stream. 165 | * \return The size of the memory block. 166 | * \throws std::runtime_error If the read is unsuccesssful. 167 | */ 168 | static int ReadBlockSize(std::istream& is) 169 | { 170 | int blockSize; 171 | if(is.read(reinterpret_cast(&blockSize), sizeof(int))) return blockSize; 172 | else throw std::runtime_error("Could not read memory block size"); 173 | } 174 | 175 | /** 176 | * \brief Attempts to write a memory block allocated on the CPU to an output stream. 177 | * 178 | * A single integer containing the number of elements in the block is written prior to the block itself. 179 | * 180 | * \param os The output stream. 181 | * \param block The memory block to write. 182 | * \throws std::runtime_error If the write is unsuccessful. 183 | */ 184 | template 185 | static void WriteBlock(std::ostream& os, const ORUtils::MemoryBlock& block) 186 | { 187 | // Try and write the block's size. 188 | if(!os.write(reinterpret_cast(&block.dataSize), sizeof(block.dataSize))) 189 | { 190 | throw std::runtime_error("Could not write memory block size"); 191 | } 192 | 193 | // Try and write the block's data. 194 | if(!os.write(reinterpret_cast(block.GetData(MEMORYDEVICE_CPU)), block.dataSize * sizeof(T))) 195 | { 196 | throw std::runtime_error("Could not write memory block data"); 197 | } 198 | } 199 | }; 200 | 201 | } 202 | -------------------------------------------------------------------------------- /MetalContext.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Isis Innovation Limited and the authors of InfiniTAM 2 | #pragma once 3 | 4 | #ifdef __OBJC__ 5 | 6 | #import 7 | #import 8 | #include 9 | #include 10 | 11 | #ifndef BUFFERNOCOPY 12 | #define BUFFERNOCOPY(x, marime) [[[MetalContext instance]device] newBufferWithBytesNoCopy:(x) length:marime options:MTLResourceOptionCPUCacheModeDefault deallocator:nil] 13 | #endif 14 | 15 | #ifndef BUFFERCOPY 16 | #define BUFFERCOPY(x, marime) [[[MetalContext instance]device] newBufferWithBytes:(x) length:marime options:MTLResourceOptionCPUCacheModeDefault] 17 | #endif 18 | 19 | #ifndef BUFFEREMPTY 20 | #define BUFFEREMPTY(marime) [[[MetalContext instance]device] newBufferWithLength:marime options:MTLResourceOptionCPUCacheModeDefault] 21 | #endif 22 | 23 | @protocol MTLDevice, MTLLibrary, MTLCommandQueue; 24 | 25 | @interface MetalContext : NSObject 26 | 27 | @property (strong) id device; 28 | @property (strong) id library; 29 | @property (strong) id commandQueue; 30 | @property (strong) id commandBuffer; 31 | 32 | +(MetalContext *) instance; 33 | +(int)roundUpTo16384 : (int) size; 34 | 35 | @end 36 | 37 | #endif 38 | 39 | void allocateMetalData(void **data, void **metalBuffer, int size, bool roundUp); 40 | void freeMetalData(void **data, void **metalBufber, int size, bool roundUp); 41 | -------------------------------------------------------------------------------- /MetalContext.mm: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #import "MetalContext.h" 4 | #import 5 | 6 | @implementation MetalContext 7 | 8 | +(MetalContext*) instance 9 | { 10 | static MetalContext *gInstance = NULL; 11 | @synchronized(self) 12 | { 13 | if (gInstance == NULL) gInstance = [[self alloc]initWithDevice:nil]; 14 | return gInstance; 15 | } 16 | } 17 | 18 | +(int)roundUpTo16384:(int)size 19 | { 20 | float size_f = (float)size; 21 | float size_div = size_f / 16384.0f; 22 | float size_ceil = ceilf(size_div); 23 | return (int)(size_ceil * 16384.0f); 24 | } 25 | 26 | - (instancetype)initWithDevice:(id)device 27 | { 28 | if ((self = [super init])) 29 | { 30 | _device = device ?: MTLCreateSystemDefaultDevice(); 31 | _library = [_device newDefaultLibrary]; 32 | _commandQueue = [_device newCommandQueue]; 33 | _commandBuffer = [_commandQueue commandBuffer]; 34 | } 35 | return self; 36 | } 37 | 38 | @end 39 | 40 | void allocateMetalData(void **data, void **metalBuffer, int size, bool roundUp) 41 | { 42 | int allocSize; 43 | if (roundUp) allocSize = [MetalContext roundUpTo16384:size]; 44 | else allocSize = size; 45 | 46 | data[0] = mmap(0, allocSize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); 47 | metalBuffer[0] = (void*)CFBridgingRetain(BUFFERNOCOPY(data[0], allocSize)); 48 | } 49 | 50 | void freeMetalData(void **data, void **metalBuffer, int size, bool roundUp) 51 | { 52 | int allocSize; 53 | if (roundUp) allocSize = [MetalContext roundUpTo16384:size]; 54 | else allocSize = size; 55 | 56 | munmap(data[0], allocSize); 57 | CFBridgingRelease(metalBuffer[0]); 58 | } -------------------------------------------------------------------------------- /PlatformIndependence.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 9 | #define _CPU_AND_GPU_CODE_ __device__ // for CUDA device code 10 | #else 11 | #define _CPU_AND_GPU_CODE_ 12 | #endif 13 | 14 | #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 15 | #define _CPU_AND_GPU_CONSTANT_ __constant__ // for CUDA device code 16 | #else 17 | #define _CPU_AND_GPU_CONSTANT_ 18 | #endif 19 | 20 | #if defined(__METALC__) // for METAL device code 21 | #define THREADPTR(x) thread x 22 | #define DEVICEPTR(x) device x 23 | #define THREADGRPPTR(x) threadgroup x 24 | #define CONSTPTR(x) constant x 25 | #else 26 | #define THREADPTR(x) x 27 | #define DEVICEPTR(x) x 28 | #define THREADGROUPPTR(x) x 29 | #define CONSTPTR(x) x 30 | #endif 31 | 32 | #ifdef ANDROID 33 | #define DIEWITHEXCEPTION(x) { fprintf(stderr, "%s\n", x); exit(-1); } 34 | #else 35 | #define DIEWITHEXCEPTION(x) throw std::runtime_error(x) 36 | #endif 37 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This is a set of vector, matrix and image classes for CPU and GPU use, InfiniTAM is based on this set of basic classes -------------------------------------------------------------------------------- /Vector.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | namespace ORUtils { 9 | ////////////////////////////////////////////////////////////////////////// 10 | // Basic Vector Structure 11 | ////////////////////////////////////////////////////////////////////////// 12 | 13 | template struct Vector2_{ 14 | union { 15 | struct { T x, y; }; // standard names for components 16 | struct { T s, t; }; // standard names for components 17 | struct { T width, height; }; 18 | T v[2]; // array access 19 | }; 20 | }; 21 | 22 | template struct Vector3_{ 23 | union { 24 | struct{ T x, y, z; }; // standard names for components 25 | struct{ T r, g, b; }; // standard names for components 26 | struct{ T s, t, p; }; // standard names for components 27 | T v[3]; 28 | }; 29 | }; 30 | 31 | template struct Vector4_ { 32 | union { 33 | struct { T x, y, z, w; }; // standard names for components 34 | struct { T r, g, b, a; }; // standard names for components 35 | struct { T s, t, p, q; }; // standard names for components 36 | T v[4]; 37 | }; 38 | }; 39 | 40 | template struct Vector6_ { 41 | //union { 42 | T v[6]; 43 | //}; 44 | }; 45 | 46 | template struct VectorX_ 47 | { 48 | int vsize; 49 | T v[s]; 50 | }; 51 | 52 | ////////////////////////////////////////////////////////////////////////// 53 | // Vector class with math operators: +, -, *, /, +=, -=, /=, [], ==, !=, T*(), etc. 54 | ////////////////////////////////////////////////////////////////////////// 55 | template class Vector2 : public Vector2_ < T > 56 | { 57 | public: 58 | typedef T value_type; 59 | _CPU_AND_GPU_CODE_ inline int size() const { return 2; } 60 | 61 | //////////////////////////////////////////////////////// 62 | // Constructors 63 | //////////////////////////////////////////////////////// 64 | _CPU_AND_GPU_CODE_ Vector2(){} // Default constructor 65 | _CPU_AND_GPU_CODE_ Vector2(const T &t) { this->x = t; this->y = t; } // Scalar constructor 66 | _CPU_AND_GPU_CODE_ Vector2(const T *tp) { this->x = tp[0]; this->y = tp[1]; } // Construct from array 67 | _CPU_AND_GPU_CODE_ Vector2(const T v0, const T v1) { this->x = v0; this->y = v1; } // Construct from explicit values 68 | _CPU_AND_GPU_CODE_ Vector2(const Vector2_ &v) { this->x = v.x; this->y = v.y; }// copy constructor 69 | 70 | _CPU_AND_GPU_CODE_ explicit Vector2(const Vector3_ &u) { this->x = u.x; this->y = u.y; } 71 | _CPU_AND_GPU_CODE_ explicit Vector2(const Vector4_ &u) { this->x = u.x; this->y = u.y; } 72 | 73 | _CPU_AND_GPU_CODE_ inline Vector2 toInt() const { 74 | return Vector2((int)ROUND(this->x), (int)ROUND(this->y)); 75 | } 76 | 77 | _CPU_AND_GPU_CODE_ inline Vector2 toIntFloor() const { 78 | return Vector2((int)floor(this->x), (int)floor(this->y)); 79 | } 80 | 81 | _CPU_AND_GPU_CODE_ inline Vector2 toUChar() const { 82 | Vector2 vi = toInt(); return Vector2((unsigned char)CLAMP(vi.x, 0, 255), (unsigned char)CLAMP(vi.y, 0, 255)); 83 | } 84 | 85 | _CPU_AND_GPU_CODE_ inline Vector2 toFloat() const { 86 | return Vector2((float)this->x, (float)this->y); 87 | } 88 | 89 | _CPU_AND_GPU_CODE_ const T *getValues() const { return this->v; } 90 | _CPU_AND_GPU_CODE_ Vector2 &setValues(const T *rhs) { this->x = rhs[0]; this->y = rhs[1]; return *this; } 91 | 92 | // indexing operators 93 | _CPU_AND_GPU_CODE_ T &operator [](int i) { return this->v[i]; } 94 | _CPU_AND_GPU_CODE_ const T &operator [](int i) const { return this->v[i]; } 95 | 96 | // type-cast operators 97 | _CPU_AND_GPU_CODE_ operator T *() { return this->v; } 98 | _CPU_AND_GPU_CODE_ operator const T *() const { return this->v; } 99 | 100 | //////////////////////////////////////////////////////// 101 | // Math operators 102 | //////////////////////////////////////////////////////// 103 | 104 | // scalar multiply assign 105 | _CPU_AND_GPU_CODE_ friend Vector2 &operator *= (const Vector2 &lhs, T d) { 106 | lhs.x *= d; lhs.y *= d; return lhs; 107 | } 108 | 109 | // component-wise vector multiply assign 110 | _CPU_AND_GPU_CODE_ friend Vector2 &operator *= (Vector2 &lhs, const Vector2 &rhs) { 111 | lhs.x *= rhs.x; lhs.y *= rhs.y; return lhs; 112 | } 113 | 114 | // scalar divide assign 115 | _CPU_AND_GPU_CODE_ friend Vector2 &operator /= (Vector2 &lhs, T d) { 116 | if (d == 0) return lhs; lhs.x /= d; lhs.y /= d; return lhs; 117 | } 118 | 119 | // component-wise vector divide assign 120 | _CPU_AND_GPU_CODE_ friend Vector2 &operator /= (Vector2 &lhs, const Vector2 &rhs) { 121 | lhs.x /= rhs.x; lhs.y /= rhs.y; return lhs; 122 | } 123 | 124 | // component-wise vector add assign 125 | _CPU_AND_GPU_CODE_ friend Vector2 &operator += (Vector2 &lhs, const Vector2 &rhs) { 126 | lhs.x += rhs.x; lhs.y += rhs.y; return lhs; 127 | } 128 | 129 | // component-wise vector subtract assign 130 | _CPU_AND_GPU_CODE_ friend Vector2 &operator -= (Vector2 &lhs, const Vector2 &rhs) { 131 | lhs.x -= rhs.x; lhs.y -= rhs.y; return lhs; 132 | } 133 | 134 | // unary negate 135 | _CPU_AND_GPU_CODE_ friend Vector2 operator - (const Vector2 &rhs) { 136 | Vector2 rv; rv.x = -rhs.x; rv.y = -rhs.y; return rv; 137 | } 138 | 139 | // vector add 140 | _CPU_AND_GPU_CODE_ friend Vector2 operator + (const Vector2 &lhs, const Vector2 &rhs) { 141 | Vector2 rv(lhs); return rv += rhs; 142 | } 143 | 144 | // vector subtract 145 | _CPU_AND_GPU_CODE_ friend Vector2 operator - (const Vector2 &lhs, const Vector2 &rhs) { 146 | Vector2 rv(lhs); return rv -= rhs; 147 | } 148 | 149 | // scalar multiply 150 | _CPU_AND_GPU_CODE_ friend Vector2 operator * (const Vector2 &lhs, T rhs) { 151 | Vector2 rv(lhs); return rv *= rhs; 152 | } 153 | 154 | // scalar multiply 155 | _CPU_AND_GPU_CODE_ friend Vector2 operator * (T lhs, const Vector2 &rhs) { 156 | Vector2 rv(lhs); return rv *= rhs; 157 | } 158 | 159 | // vector component-wise multiply 160 | _CPU_AND_GPU_CODE_ friend Vector2 operator * (const Vector2 &lhs, const Vector2 &rhs) { 161 | Vector2 rv(lhs); return rv *= rhs; 162 | } 163 | 164 | // scalar multiply 165 | _CPU_AND_GPU_CODE_ friend Vector2 operator / (const Vector2 &lhs, T rhs) { 166 | Vector2 rv(lhs); return rv /= rhs; 167 | } 168 | 169 | // vector component-wise multiply 170 | _CPU_AND_GPU_CODE_ friend Vector2 operator / (const Vector2 &lhs, const Vector2 &rhs) { 171 | Vector2 rv(lhs); return rv /= rhs; 172 | } 173 | 174 | //////////////////////////////////////////////////////// 175 | // Comparison operators 176 | //////////////////////////////////////////////////////// 177 | 178 | // equality 179 | _CPU_AND_GPU_CODE_ friend bool operator == (const Vector2 &lhs, const Vector2 &rhs) { 180 | return (lhs.x == rhs.x) && (lhs.y == rhs.y); 181 | } 182 | 183 | // inequality 184 | _CPU_AND_GPU_CODE_ friend bool operator != (const Vector2 &lhs, const Vector2 &rhs) { 185 | return (lhs.x != rhs.x) || (lhs.y != rhs.y); 186 | } 187 | 188 | friend std::ostream& operator<<(std::ostream& os, const Vector2& dt){ 189 | os << dt.x << ", " << dt.y; 190 | return os; 191 | } 192 | }; 193 | 194 | template class Vector3 : public Vector3_ < T > 195 | { 196 | public: 197 | typedef T value_type; 198 | _CPU_AND_GPU_CODE_ inline int size() const { return 3; } 199 | 200 | //////////////////////////////////////////////////////// 201 | // Constructors 202 | //////////////////////////////////////////////////////// 203 | _CPU_AND_GPU_CODE_ Vector3(){} // Default constructor 204 | _CPU_AND_GPU_CODE_ Vector3(const T &t) { this->x = t; this->y = t; this->z = t; } // Scalar constructor 205 | _CPU_AND_GPU_CODE_ Vector3(const T *tp) { this->x = tp[0]; this->y = tp[1]; this->z = tp[2]; } // Construct from array 206 | _CPU_AND_GPU_CODE_ Vector3(const T v0, const T v1, const T v2) { this->x = v0; this->y = v1; this->z = v2; } // Construct from explicit values 207 | _CPU_AND_GPU_CODE_ explicit Vector3(const Vector4_ &u) { this->x = u.x; this->y = u.y; this->z = u.z; } 208 | _CPU_AND_GPU_CODE_ explicit Vector3(const Vector2_ &u, T v0) { this->x = u.x; this->y = u.y; this->z = v0; } 209 | 210 | _CPU_AND_GPU_CODE_ inline Vector3 toIntRound() const { 211 | return Vector3((int)ROUND(this->x), (int)ROUND(this->y), (int)ROUND(this->z)); 212 | } 213 | 214 | _CPU_AND_GPU_CODE_ inline Vector3 toInt() const { 215 | return Vector3((int)(this->x), (int)(this->y), (int)(this->z)); 216 | } 217 | 218 | _CPU_AND_GPU_CODE_ inline Vector3 toInt(Vector3 &residual) const { 219 | Vector3 intRound = toInt(); 220 | residual = Vector3(this->x - intRound.x, this->y - intRound.y, this->z - intRound.z); 221 | return intRound; 222 | } 223 | 224 | _CPU_AND_GPU_CODE_ inline Vector3 toShortRound() const { 225 | return Vector3((short)ROUND(this->x), (short)ROUND(this->y), (short)ROUND(this->z)); 226 | } 227 | 228 | _CPU_AND_GPU_CODE_ inline Vector3 toShortFloor() const { 229 | return Vector3((short)floor(this->x), (short)floor(this->y), (short)floor(this->z)); 230 | } 231 | 232 | _CPU_AND_GPU_CODE_ inline Vector3 toIntFloor() const { 233 | return Vector3((int)floor(this->x), (int)floor(this->y), (int)floor(this->z)); 234 | } 235 | 236 | _CPU_AND_GPU_CODE_ inline Vector3 toIntFloor(Vector3 &residual) const { 237 | Vector3 intFloor(floor(this->x), floor(this->y), floor(this->z)); 238 | residual = *this - intFloor; 239 | return Vector3((int)intFloor.x, (int)intFloor.y, (int)intFloor.z); 240 | } 241 | 242 | _CPU_AND_GPU_CODE_ inline Vector3 toUChar() const { 243 | Vector3 vi = toIntRound(); return Vector3((unsigned char)CLAMP(vi.x, 0, 255), (unsigned char)CLAMP(vi.y, 0, 255), (unsigned char)CLAMP(vi.z, 0, 255)); 244 | } 245 | 246 | _CPU_AND_GPU_CODE_ inline Vector3 toFloat() const { 247 | return Vector3((float)this->x, (float)this->y, (float)this->z); 248 | } 249 | 250 | _CPU_AND_GPU_CODE_ inline Vector3 normalised() const { 251 | float norm = 1.0f / sqrt((float)(this->x * this->x + this->y * this->y + this->z * this->z)); 252 | return Vector3((float)this->x * norm, (float)this->y * norm, (float)this->z * norm); 253 | } 254 | 255 | _CPU_AND_GPU_CODE_ const T *getValues() const { return this->v; } 256 | _CPU_AND_GPU_CODE_ Vector3 &setValues(const T *rhs) { this->x = rhs[0]; this->y = rhs[1]; this->z = rhs[2]; return *this; } 257 | 258 | // indexing operators 259 | _CPU_AND_GPU_CODE_ T &operator [](int i) { return this->v[i]; } 260 | _CPU_AND_GPU_CODE_ const T &operator [](int i) const { return this->v[i]; } 261 | 262 | // type-cast operators 263 | _CPU_AND_GPU_CODE_ operator T *() { return this->v; } 264 | _CPU_AND_GPU_CODE_ operator const T *() const { return this->v; } 265 | 266 | //////////////////////////////////////////////////////// 267 | // Math operators 268 | //////////////////////////////////////////////////////// 269 | 270 | // scalar multiply assign 271 | _CPU_AND_GPU_CODE_ friend Vector3 &operator *= (Vector3 &lhs, T d) { 272 | lhs.x *= d; lhs.y *= d; lhs.z *= d; return lhs; 273 | } 274 | 275 | // component-wise vector multiply assign 276 | _CPU_AND_GPU_CODE_ friend Vector3 &operator *= (Vector3 &lhs, const Vector3 &rhs) { 277 | lhs.x *= rhs.x; lhs.y *= rhs.y; lhs.z *= rhs.z; return lhs; 278 | } 279 | 280 | // scalar divide assign 281 | _CPU_AND_GPU_CODE_ friend Vector3 &operator /= (Vector3 &lhs, T d) { 282 | lhs.x /= d; lhs.y /= d; lhs.z /= d; return lhs; 283 | } 284 | 285 | // component-wise vector divide assign 286 | _CPU_AND_GPU_CODE_ friend Vector3 &operator /= (Vector3 &lhs, const Vector3 &rhs) { 287 | lhs.x /= rhs.x; lhs.y /= rhs.y; lhs.z /= rhs.z; return lhs; 288 | } 289 | 290 | // component-wise vector add assign 291 | _CPU_AND_GPU_CODE_ friend Vector3 &operator += (Vector3 &lhs, const Vector3 &rhs) { 292 | lhs.x += rhs.x; lhs.y += rhs.y; lhs.z += rhs.z; return lhs; 293 | } 294 | 295 | // component-wise vector subtract assign 296 | _CPU_AND_GPU_CODE_ friend Vector3 &operator -= (Vector3 &lhs, const Vector3 &rhs) { 297 | lhs.x -= rhs.x; lhs.y -= rhs.y; lhs.z -= rhs.z; return lhs; 298 | } 299 | 300 | // unary negate 301 | _CPU_AND_GPU_CODE_ friend Vector3 operator - (const Vector3 &rhs) { 302 | Vector3 rv; rv.x = -rhs.x; rv.y = -rhs.y; rv.z = -rhs.z; return rv; 303 | } 304 | 305 | // vector add 306 | _CPU_AND_GPU_CODE_ friend Vector3 operator + (const Vector3 &lhs, const Vector3 &rhs){ 307 | Vector3 rv(lhs); return rv += rhs; 308 | } 309 | 310 | // vector subtract 311 | _CPU_AND_GPU_CODE_ friend Vector3 operator - (const Vector3 &lhs, const Vector3 &rhs){ 312 | Vector3 rv(lhs); return rv -= rhs; 313 | } 314 | 315 | // scalar multiply 316 | _CPU_AND_GPU_CODE_ friend Vector3 operator * (const Vector3 &lhs, T rhs) { 317 | Vector3 rv(lhs); return rv *= rhs; 318 | } 319 | 320 | // scalar multiply 321 | _CPU_AND_GPU_CODE_ friend Vector3 operator * (T lhs, const Vector3 &rhs) { 322 | Vector3 rv(lhs); return rv *= rhs; 323 | } 324 | 325 | // vector component-wise multiply 326 | _CPU_AND_GPU_CODE_ friend Vector3 operator * (const Vector3 &lhs, const Vector3 &rhs) { 327 | Vector3 rv(lhs); return rv *= rhs; 328 | } 329 | 330 | // scalar multiply 331 | _CPU_AND_GPU_CODE_ friend Vector3 operator / (const Vector3 &lhs, T rhs) { 332 | Vector3 rv(lhs); return rv /= rhs; 333 | } 334 | 335 | // vector component-wise multiply 336 | _CPU_AND_GPU_CODE_ friend Vector3 operator / (const Vector3 &lhs, const Vector3 &rhs) { 337 | Vector3 rv(lhs); return rv /= rhs; 338 | } 339 | 340 | //////////////////////////////////////////////////////// 341 | // Comparison operators 342 | //////////////////////////////////////////////////////// 343 | 344 | // inequality 345 | _CPU_AND_GPU_CODE_ friend bool operator != (const Vector3 &lhs, const Vector3 &rhs) { 346 | return (lhs.x != rhs.x) || (lhs.y != rhs.y) || (lhs.z != rhs.z); 347 | } 348 | 349 | //////////////////////////////////////////////////////////////////////////////// 350 | // dimension specific operations 351 | //////////////////////////////////////////////////////////////////////////////// 352 | 353 | // cross product 354 | _CPU_AND_GPU_CODE_ friend Vector3 cross(const Vector3 &lhs, const Vector3 &rhs) { 355 | Vector3 r; 356 | r.x = lhs.y * rhs.z - lhs.z * rhs.y; 357 | r.y = lhs.z * rhs.x - lhs.x * rhs.z; 358 | r.z = lhs.x * rhs.y - lhs.y * rhs.x; 359 | return r; 360 | } 361 | 362 | friend std::ostream& operator<<(std::ostream& os, const Vector3& dt){ 363 | os << dt.x << ", " << dt.y << ", " << dt.z; 364 | return os; 365 | } 366 | }; 367 | 368 | //////////////////////////////////////////////////////// 369 | // Non-member comparison operators 370 | //////////////////////////////////////////////////////// 371 | 372 | // equality 373 | template _CPU_AND_GPU_CODE_ inline bool operator == (const Vector3 &lhs, const Vector3 &rhs){ 374 | return (lhs.x == rhs.x) && (lhs.y == rhs.y) && (lhs.z == rhs.z); 375 | } 376 | 377 | template class Vector4 : public Vector4_ < T > 378 | { 379 | public: 380 | typedef T value_type; 381 | _CPU_AND_GPU_CODE_ inline int size() const { return 4; } 382 | 383 | //////////////////////////////////////////////////////// 384 | // Constructors 385 | //////////////////////////////////////////////////////// 386 | 387 | _CPU_AND_GPU_CODE_ Vector4() {} // Default constructor 388 | _CPU_AND_GPU_CODE_ Vector4(const T &t) { this->x = t; this->y = t; this->z = t; this->w = t; } //Scalar constructor 389 | _CPU_AND_GPU_CODE_ Vector4(const T *tp) { this->x = tp[0]; this->y = tp[1]; this->z = tp[2]; this->w = tp[3]; } // Construct from array 390 | _CPU_AND_GPU_CODE_ Vector4(const T v0, const T v1, const T v2, const T v3) { this->x = v0; this->y = v1; this->z = v2; this->w = v3; } // Construct from explicit values 391 | _CPU_AND_GPU_CODE_ explicit Vector4(const Vector3_ &u, T v0) { this->x = u.x; this->y = u.y; this->z = u.z; this->w = v0; } 392 | _CPU_AND_GPU_CODE_ explicit Vector4(const Vector2_ &u, T v0, T v1) { this->x = u.x; this->y = u.y; this->z = v0; this->w = v1; } 393 | 394 | _CPU_AND_GPU_CODE_ inline Vector4 toIntRound() const { 395 | return Vector4((int)ROUND(this->x), (int)ROUND(this->y), (int)ROUND(this->z), (int)ROUND(this->w)); 396 | } 397 | 398 | _CPU_AND_GPU_CODE_ inline Vector4 toUChar() const { 399 | Vector4 vi = toIntRound(); return Vector4((unsigned char)CLAMP(vi.x, 0, 255), (unsigned char)CLAMP(vi.y, 0, 255), (unsigned char)CLAMP(vi.z, 0, 255), (unsigned char)CLAMP(vi.w, 0, 255)); 400 | } 401 | 402 | _CPU_AND_GPU_CODE_ inline Vector4 toFloat() const { 403 | return Vector4((float)this->x, (float)this->y, (float)this->z, (float)this->w); 404 | } 405 | 406 | _CPU_AND_GPU_CODE_ inline Vector4 homogeneousCoordinatesNormalize() const { 407 | return (this->w <= 0) ? *this : Vector4(this->x / this->w, this->y / this->w, this->z / this->w, 1); 408 | } 409 | 410 | _CPU_AND_GPU_CODE_ inline Vector3 toVector3() const { 411 | return Vector3(this->x, this->y, this->z); 412 | } 413 | 414 | _CPU_AND_GPU_CODE_ const T *getValues() const { return this->v; } 415 | _CPU_AND_GPU_CODE_ Vector4 &setValues(const T *rhs) { this->x = rhs[0]; this->y = rhs[1]; this->z = rhs[2]; this->w = rhs[3]; return *this; } 416 | 417 | // indexing operators 418 | _CPU_AND_GPU_CODE_ T &operator [](int i) { return this->v[i]; } 419 | _CPU_AND_GPU_CODE_ const T &operator [](int i) const { return this->v[i]; } 420 | 421 | // type-cast operators 422 | _CPU_AND_GPU_CODE_ operator T *() { return this->v; } 423 | _CPU_AND_GPU_CODE_ operator const T *() const { return this->v; } 424 | 425 | //////////////////////////////////////////////////////// 426 | // Math operators 427 | //////////////////////////////////////////////////////// 428 | 429 | // scalar multiply assign 430 | _CPU_AND_GPU_CODE_ friend Vector4 &operator *= (Vector4 &lhs, T d) { 431 | lhs.x *= d; lhs.y *= d; lhs.z *= d; lhs.w *= d; return lhs; 432 | } 433 | 434 | // component-wise vector multiply assign 435 | _CPU_AND_GPU_CODE_ friend Vector4 &operator *= (Vector4 &lhs, const Vector4 &rhs) { 436 | lhs.x *= rhs.x; lhs.y *= rhs.y; lhs.z *= rhs.z; lhs.w *= rhs.w; return lhs; 437 | } 438 | 439 | // scalar divide assign 440 | _CPU_AND_GPU_CODE_ friend Vector4 &operator /= (Vector4 &lhs, T d){ 441 | lhs.x /= d; lhs.y /= d; lhs.z /= d; lhs.w /= d; return lhs; 442 | } 443 | 444 | // component-wise vector divide assign 445 | _CPU_AND_GPU_CODE_ friend Vector4 &operator /= (Vector4 &lhs, const Vector4 &rhs) { 446 | lhs.x /= rhs.x; lhs.y /= rhs.y; lhs.z /= rhs.z; lhs.w /= rhs.w; return lhs; 447 | } 448 | 449 | // component-wise vector add assign 450 | _CPU_AND_GPU_CODE_ friend Vector4 &operator += (Vector4 &lhs, const Vector4 &rhs) { 451 | lhs.x += rhs.x; lhs.y += rhs.y; lhs.z += rhs.z; lhs.w += rhs.w; return lhs; 452 | } 453 | 454 | // component-wise vector subtract assign 455 | _CPU_AND_GPU_CODE_ friend Vector4 &operator -= (Vector4 &lhs, const Vector4 &rhs) { 456 | lhs.x -= rhs.x; lhs.y -= rhs.y; lhs.z -= rhs.z; lhs.w -= rhs.w; return lhs; 457 | } 458 | 459 | // unary negate 460 | _CPU_AND_GPU_CODE_ friend Vector4 operator - (const Vector4 &rhs) { 461 | Vector4 rv; rv.x = -rhs.x; rv.y = -rhs.y; rv.z = -rhs.z; rv.w = -rhs.w; return rv; 462 | } 463 | 464 | // vector add 465 | _CPU_AND_GPU_CODE_ friend Vector4 operator + (const Vector4 &lhs, const Vector4 &rhs) { 466 | Vector4 rv(lhs); return rv += rhs; 467 | } 468 | 469 | // vector subtract 470 | _CPU_AND_GPU_CODE_ friend Vector4 operator - (const Vector4 &lhs, const Vector4 &rhs) { 471 | Vector4 rv(lhs); return rv -= rhs; 472 | } 473 | 474 | // scalar multiply 475 | _CPU_AND_GPU_CODE_ friend Vector4 operator * (const Vector4 &lhs, T rhs) { 476 | Vector4 rv(lhs); return rv *= rhs; 477 | } 478 | 479 | // scalar multiply 480 | _CPU_AND_GPU_CODE_ friend Vector4 operator * (T lhs, const Vector4 &rhs) { 481 | Vector4 rv(lhs); return rv *= rhs; 482 | } 483 | 484 | // vector component-wise multiply 485 | _CPU_AND_GPU_CODE_ friend Vector4 operator * (const Vector4 &lhs, const Vector4 &rhs) { 486 | Vector4 rv(lhs); return rv *= rhs; 487 | } 488 | 489 | // scalar divide 490 | _CPU_AND_GPU_CODE_ friend Vector4 operator / (const Vector4 &lhs, T rhs) { 491 | Vector4 rv(lhs); return rv /= rhs; 492 | } 493 | 494 | // vector component-wise divide 495 | _CPU_AND_GPU_CODE_ friend Vector4 operator / (const Vector4 &lhs, const Vector4 &rhs) { 496 | Vector4 rv(lhs); return rv /= rhs; 497 | } 498 | 499 | //////////////////////////////////////////////////////// 500 | // Comparison operators 501 | //////////////////////////////////////////////////////// 502 | 503 | // equality 504 | _CPU_AND_GPU_CODE_ friend bool operator == (const Vector4 &lhs, const Vector4 &rhs) { 505 | return (lhs.x == rhs.x) && (lhs.y == rhs.y) && (lhs.z == rhs.z) && (lhs.w == rhs.w); 506 | } 507 | 508 | // inequality 509 | _CPU_AND_GPU_CODE_ friend bool operator != (const Vector4 &lhs, const Vector4 &rhs) { 510 | return (lhs.x != rhs.x) || (lhs.y != rhs.y) || (lhs.z != rhs.z) || (lhs.w != rhs.w); 511 | } 512 | 513 | friend std::ostream& operator<<(std::ostream& os, const Vector4& dt){ 514 | os << dt.x << ", " << dt.y << ", " << dt.z << ", " << dt.w; 515 | return os; 516 | } 517 | }; 518 | 519 | template class Vector6 : public Vector6_ < T > 520 | { 521 | public: 522 | typedef T value_type; 523 | _CPU_AND_GPU_CODE_ inline int size() const { return 6; } 524 | 525 | //////////////////////////////////////////////////////// 526 | // Constructors 527 | //////////////////////////////////////////////////////// 528 | 529 | _CPU_AND_GPU_CODE_ Vector6() {} // Default constructor 530 | _CPU_AND_GPU_CODE_ Vector6(const T &t) { this->v[0] = t; this->v[1] = t; this->v[2] = t; this->v[3] = t; this->v[4] = t; this->v[5] = t; } //Scalar constructor 531 | _CPU_AND_GPU_CODE_ Vector6(const T *tp) { this->v[0] = tp[0]; this->v[1] = tp[1]; this->v[2] = tp[2]; this->v[3] = tp[3]; this->v[4] = tp[4]; this->v[5] = tp[5]; } // Construct from array 532 | _CPU_AND_GPU_CODE_ Vector6(const T v0, const T v1, const T v2, const T v3, const T v4, const T v5) { this->v[0] = v0; this->v[1] = v1; this->v[2] = v2; this->v[3] = v3; this->v[4] = v4; this->v[5] = v5; } // Construct from explicit values 533 | _CPU_AND_GPU_CODE_ explicit Vector6(const Vector4_ &u, T v0, T v1) { this->v[0] = u.x; this->v[1] = u.y; this->v[2] = u.z; this->v[3] = u.w; this->v[4] = v0; this->v[5] = v1; } 534 | _CPU_AND_GPU_CODE_ explicit Vector6(const Vector3_ &u, T v0, T v1, T v2) { this->v[0] = u.x; this->v[1] = u.y; this->v[2] = u.z; this->v[3] = v0; this->v[4] = v1; this->v[5] = v2; } 535 | _CPU_AND_GPU_CODE_ explicit Vector6(const Vector2_ &u, T v0, T v1, T v2, T v3) { this->v[0] = u.x; this->v[1] = u.y; this->v[2] = v0; this->v[3] = v1; this->v[4] = v2, this->v[5] = v3; } 536 | 537 | _CPU_AND_GPU_CODE_ inline Vector6 toIntRound() const { 538 | return Vector6((int)ROUND(this[0]), (int)ROUND(this[1]), (int)ROUND(this[2]), (int)ROUND(this[3]), (int)ROUND(this[4]), (int)ROUND(this[5])); 539 | } 540 | 541 | _CPU_AND_GPU_CODE_ inline Vector6 toUChar() const { 542 | Vector6 vi = toIntRound(); return Vector6((unsigned char)CLAMP(vi[0], 0, 255), (unsigned char)CLAMP(vi[1], 0, 255), (unsigned char)CLAMP(vi[2], 0, 255), (unsigned char)CLAMP(vi[3], 0, 255), (unsigned char)CLAMP(vi[4], 0, 255), (unsigned char)CLAMP(vi[5], 0, 255)); 543 | } 544 | 545 | _CPU_AND_GPU_CODE_ inline Vector6 toFloat() const { 546 | return Vector6((float)this[0], (float)this[1], (float)this[2], (float)this[3], (float)this[4], (float)this[5]); 547 | } 548 | 549 | _CPU_AND_GPU_CODE_ const T *getValues() const { return this->v; } 550 | _CPU_AND_GPU_CODE_ Vector6 &setValues(const T *rhs) { this[0] = rhs[0]; this[1] = rhs[1]; this[2] = rhs[2]; this[3] = rhs[3]; this[4] = rhs[4]; this[5] = rhs[5]; return *this; } 551 | 552 | // indexing operators 553 | _CPU_AND_GPU_CODE_ T &operator [](int i) { return this->v[i]; } 554 | _CPU_AND_GPU_CODE_ const T &operator [](int i) const { return this->v[i]; } 555 | 556 | // type-cast operators 557 | _CPU_AND_GPU_CODE_ operator T *() { return this->v; } 558 | _CPU_AND_GPU_CODE_ operator const T *() const { return this->v; } 559 | 560 | //////////////////////////////////////////////////////// 561 | // Math operators 562 | //////////////////////////////////////////////////////// 563 | 564 | // scalar multiply assign 565 | _CPU_AND_GPU_CODE_ friend Vector6 &operator *= (Vector6 &lhs, T d) { 566 | lhs[0] *= d; lhs[1] *= d; lhs[2] *= d; lhs[3] *= d; lhs[4] *= d; lhs[5] *= d; return lhs; 567 | } 568 | 569 | // component-wise vector multiply assign 570 | _CPU_AND_GPU_CODE_ friend Vector6 &operator *= (Vector6 &lhs, const Vector6 &rhs) { 571 | lhs[0] *= rhs[0]; lhs[1] *= rhs[1]; lhs[2] *= rhs[2]; lhs[3] *= rhs[3]; lhs[4] *= rhs[4]; lhs[5] *= rhs[5]; return lhs; 572 | } 573 | 574 | // scalar divide assign 575 | _CPU_AND_GPU_CODE_ friend Vector6 &operator /= (Vector6 &lhs, T d){ 576 | lhs[0] /= d; lhs[1] /= d; lhs[2] /= d; lhs[3] /= d; lhs[4] /= d; lhs[5] /= d; return lhs; 577 | } 578 | 579 | // component-wise vector divide assign 580 | _CPU_AND_GPU_CODE_ friend Vector6 &operator /= (Vector6 &lhs, const Vector6 &rhs) { 581 | lhs[0] /= rhs[0]; lhs[1] /= rhs[1]; lhs[2] /= rhs[2]; lhs[3] /= rhs[3]; lhs[4] /= rhs[4]; lhs[5] /= rhs[5]; return lhs; 582 | } 583 | 584 | // component-wise vector add assign 585 | _CPU_AND_GPU_CODE_ friend Vector6 &operator += (Vector6 &lhs, const Vector6 &rhs) { 586 | lhs[0] += rhs[0]; lhs[1] += rhs[1]; lhs[2] += rhs[2]; lhs[3] += rhs[3]; lhs[4] += rhs[4]; lhs[5] += rhs[5]; return lhs; 587 | } 588 | 589 | // component-wise vector subtract assign 590 | _CPU_AND_GPU_CODE_ friend Vector6 &operator -= (Vector6 &lhs, const Vector6 &rhs) { 591 | lhs[0] -= rhs[0]; lhs[1] -= rhs[1]; lhs[2] -= rhs[2]; lhs[3] -= rhs[3]; lhs[4] -= rhs[4]; lhs[5] -= rhs[5]; return lhs; 592 | } 593 | 594 | // unary negate 595 | _CPU_AND_GPU_CODE_ friend Vector6 operator - (const Vector6 &rhs) { 596 | Vector6 rv; rv[0] = -rhs[0]; rv[1] = -rhs[1]; rv[2] = -rhs[2]; rv[3] = -rhs[3]; rv[4] = -rhs[4]; rv[5] = -rhs[5]; return rv; 597 | } 598 | 599 | // vector add 600 | _CPU_AND_GPU_CODE_ friend Vector6 operator + (const Vector6 &lhs, const Vector6 &rhs) { 601 | Vector6 rv(lhs); return rv += rhs; 602 | } 603 | 604 | // vector subtract 605 | _CPU_AND_GPU_CODE_ friend Vector6 operator - (const Vector6 &lhs, const Vector6 &rhs) { 606 | Vector6 rv(lhs); return rv -= rhs; 607 | } 608 | 609 | // scalar multiply 610 | _CPU_AND_GPU_CODE_ friend Vector6 operator * (const Vector6 &lhs, T rhs) { 611 | Vector6 rv(lhs); return rv *= rhs; 612 | } 613 | 614 | // scalar multiply 615 | _CPU_AND_GPU_CODE_ friend Vector6 operator * (T lhs, const Vector6 &rhs) { 616 | Vector6 rv(lhs); return rv *= rhs; 617 | } 618 | 619 | // vector component-wise multiply 620 | _CPU_AND_GPU_CODE_ friend Vector6 operator * (const Vector6 &lhs, const Vector6 &rhs) { 621 | Vector6 rv(lhs); return rv *= rhs; 622 | } 623 | 624 | // scalar divide 625 | _CPU_AND_GPU_CODE_ friend Vector6 operator / (const Vector6 &lhs, T rhs) { 626 | Vector6 rv(lhs); return rv /= rhs; 627 | } 628 | 629 | // vector component-wise divide 630 | _CPU_AND_GPU_CODE_ friend Vector6 operator / (const Vector6 &lhs, const Vector6 &rhs) { 631 | Vector6 rv(lhs); return rv /= rhs; 632 | } 633 | 634 | //////////////////////////////////////////////////////// 635 | // Comparison operators 636 | //////////////////////////////////////////////////////// 637 | 638 | // equality 639 | _CPU_AND_GPU_CODE_ friend bool operator == (const Vector6 &lhs, const Vector6 &rhs) { 640 | return (lhs[0] == rhs[0]) && (lhs[1] == rhs[1]) && (lhs[2] == rhs[2]) && (lhs[3] == rhs[3]) && (lhs[4] == rhs[4]) && (lhs[5] == rhs[5]); 641 | } 642 | 643 | // inequality 644 | _CPU_AND_GPU_CODE_ friend bool operator != (const Vector6 &lhs, const Vector6 &rhs) { 645 | return (lhs[0] != rhs[0]) || (lhs[1] != rhs[1]) || (lhs[2] != rhs[2]) || (lhs[3] != rhs[3]) || (lhs[4] != rhs[4]) || (lhs[5] != rhs[5]); 646 | } 647 | 648 | friend std::ostream& operator<<(std::ostream& os, const Vector6& dt){ 649 | os << dt[0] << ", " << dt[1] << ", " << dt[2] << ", " << dt[3] << ", " << dt[4] << ", " << dt[5]; 650 | return os; 651 | } 652 | }; 653 | 654 | 655 | template class VectorX : public VectorX_ < T, s > 656 | { 657 | public: 658 | typedef T value_type; 659 | _CPU_AND_GPU_CODE_ inline int size() const { return this->vsize; } 660 | 661 | //////////////////////////////////////////////////////// 662 | // Constructors 663 | //////////////////////////////////////////////////////// 664 | 665 | _CPU_AND_GPU_CODE_ VectorX() { this->vsize = s; } // Default constructor 666 | _CPU_AND_GPU_CODE_ VectorX(const T &t) { for (int i = 0; i < s; i++) this->v[i] = t; } //Scalar constructor 667 | _CPU_AND_GPU_CODE_ VectorX(const T *tp) { for (int i = 0; i < s; i++) this->v[i] = tp[i]; } // Construct from array 668 | 669 | // indexing operators 670 | _CPU_AND_GPU_CODE_ T &operator [](int i) { return this->v[i]; } 671 | _CPU_AND_GPU_CODE_ const T &operator [](int i) const { return this->v[i]; } 672 | 673 | 674 | _CPU_AND_GPU_CODE_ inline VectorX toIntRound() const { 675 | VectorX retv; 676 | for (int i = 0; i < s; i++) retv[i] = (int)ROUND(this->v[i]); 677 | return retv; 678 | } 679 | 680 | _CPU_AND_GPU_CODE_ inline VectorX toUChar() const { 681 | VectorX vi = toIntRound(); 682 | VectorX retv; 683 | for (int i = 0; i < s; i++) retv[i] = (unsigned char)CLAMP(vi[0], 0, 255); 684 | return retv; 685 | } 686 | 687 | _CPU_AND_GPU_CODE_ inline VectorX toFloat() const { 688 | VectorX retv; 689 | for (int i = 0; i < s; i++) retv[i] = (float) this->v[i]; 690 | return retv; 691 | } 692 | 693 | _CPU_AND_GPU_CODE_ const T *getValues() const { return this->v; } 694 | _CPU_AND_GPU_CODE_ VectorX &setValues(const T *rhs) { for (int i = 0; i < s; i++) this->v[i] = rhs[i]; return *this; } 695 | _CPU_AND_GPU_CODE_ void Clear(T v){ 696 | for (int i = 0; i < s; i++) 697 | this->v[i] = v; 698 | } 699 | 700 | 701 | // type-cast operators 702 | _CPU_AND_GPU_CODE_ operator T *() { return this->v; } 703 | _CPU_AND_GPU_CODE_ operator const T *() const { return this->v; } 704 | 705 | //////////////////////////////////////////////////////// 706 | // Math operators 707 | //////////////////////////////////////////////////////// 708 | 709 | // scalar multiply assign 710 | _CPU_AND_GPU_CODE_ friend VectorX &operator *= (VectorX &lhs, T d) { 711 | for (int i = 0; i < s; i++) lhs[i] *= d; return lhs; 712 | } 713 | 714 | // component-wise vector multiply assign 715 | _CPU_AND_GPU_CODE_ friend VectorX &operator *= (VectorX &lhs, const VectorX &rhs) { 716 | for (int i = 0; i < s; i++) lhs[i] *= rhs[i]; return lhs; 717 | } 718 | 719 | // scalar divide assign 720 | _CPU_AND_GPU_CODE_ friend VectorX &operator /= (VectorX &lhs, T d){ 721 | for (int i = 0; i < s; i++) lhs[i] /= d; return lhs; 722 | } 723 | 724 | // component-wise vector divide assign 725 | _CPU_AND_GPU_CODE_ friend VectorX &operator /= (VectorX &lhs, const VectorX &rhs) { 726 | for (int i = 0; i < s; i++) lhs[i] /= rhs[i]; return lhs; 727 | } 728 | 729 | // component-wise vector add assign 730 | _CPU_AND_GPU_CODE_ friend VectorX &operator += (VectorX &lhs, const VectorX &rhs) { 731 | for (int i = 0; i < s; i++) lhs[i] += rhs[i]; return lhs; 732 | } 733 | 734 | // component-wise vector subtract assign 735 | _CPU_AND_GPU_CODE_ friend VectorX &operator -= (VectorX &lhs, const VectorX &rhs) { 736 | for (int i = 0; i < s; i++) lhs[i] -= rhs[i]; return lhs; 737 | } 738 | 739 | // unary negate 740 | _CPU_AND_GPU_CODE_ friend VectorX operator - (const VectorX &rhs) { 741 | VectorX rv; for (int i = 0; i < s; i++) rv[i] = -rhs[i]; return rv; 742 | } 743 | 744 | // vector add 745 | _CPU_AND_GPU_CODE_ friend VectorX operator + (const VectorX &lhs, const VectorX &rhs) { 746 | VectorX rv(lhs); return rv += rhs; 747 | } 748 | 749 | // vector subtract 750 | _CPU_AND_GPU_CODE_ friend VectorX operator - (const VectorX &lhs, const VectorX &rhs) { 751 | VectorX rv(lhs); return rv -= rhs; 752 | } 753 | 754 | // scalar multiply 755 | _CPU_AND_GPU_CODE_ friend VectorX operator * (const VectorX &lhs, T rhs) { 756 | VectorX rv(lhs); return rv *= rhs; 757 | } 758 | 759 | // scalar multiply 760 | _CPU_AND_GPU_CODE_ friend VectorX operator * (T lhs, const VectorX &rhs) { 761 | VectorX rv(lhs); return rv *= rhs; 762 | } 763 | 764 | // vector component-wise multiply 765 | _CPU_AND_GPU_CODE_ friend VectorX operator * (const VectorX &lhs, const VectorX &rhs) { 766 | VectorX rv(lhs); return rv *= rhs; 767 | } 768 | 769 | // scalar divide 770 | _CPU_AND_GPU_CODE_ friend VectorX operator / (const VectorX &lhs, T rhs) { 771 | VectorX rv(lhs); return rv /= rhs; 772 | } 773 | 774 | // vector component-wise divide 775 | _CPU_AND_GPU_CODE_ friend VectorX operator / (const VectorX &lhs, const VectorX &rhs) { 776 | VectorX rv(lhs); return rv /= rhs; 777 | } 778 | 779 | //////////////////////////////////////////////////////// 780 | // Comparison operators 781 | //////////////////////////////////////////////////////// 782 | 783 | // equality 784 | _CPU_AND_GPU_CODE_ friend bool operator == (const VectorX &lhs, const VectorX &rhs) { 785 | for (int i = 0; i < s; i++) if (lhs[i] != rhs[i]) return false; 786 | return true; 787 | } 788 | 789 | // inequality 790 | _CPU_AND_GPU_CODE_ friend bool operator != (const VectorX &lhs, const Vector6 &rhs) { 791 | for (int i = 0; i < s; i++) if (lhs[i] != rhs[i]) return true; 792 | return false; 793 | } 794 | 795 | friend std::ostream& operator<<(std::ostream& os, const VectorX& dt){ 796 | for (int i = 0; i < s; i++) os << dt[i] << "\n"; 797 | return os; 798 | } 799 | }; 800 | 801 | //////////////////////////////////////////////////////////////////////////////// 802 | // Generic vector operations 803 | //////////////////////////////////////////////////////////////////////////////// 804 | 805 | template< class T> _CPU_AND_GPU_CODE_ inline T sqr(const T &v) { return v*v; } 806 | 807 | // compute the dot product of two vectors 808 | template _CPU_AND_GPU_CODE_ inline typename T::value_type dot(const T &lhs, const T &rhs) { 809 | typename T::value_type r = 0; 810 | for (int i = 0; i < lhs.size(); i++) 811 | r += lhs[i] * rhs[i]; 812 | return r; 813 | } 814 | 815 | // return the length of the provided vector 816 | template< class T> _CPU_AND_GPU_CODE_ inline typename T::value_type length(const T &vec) { 817 | return sqrt(dot(vec, vec)); 818 | } 819 | 820 | // return the normalized version of the vector 821 | template< class T> _CPU_AND_GPU_CODE_ inline T normalize(const T &vec) { 822 | typename T::value_type sum = length(vec); 823 | return sum == 0 ? T(typename T::value_type(0)) : vec / sum; 824 | } 825 | 826 | //template< class T> _CPU_AND_GPU_CODE_ inline T min(const T &lhs, const T &rhs) { 827 | // return lhs <= rhs ? lhs : rhs; 828 | //} 829 | 830 | //template< class T> _CPU_AND_GPU_CODE_ inline T max(const T &lhs, const T &rhs) { 831 | // return lhs >= rhs ? lhs : rhs; 832 | //} 833 | 834 | //component wise min 835 | template< class T> _CPU_AND_GPU_CODE_ inline T minV(const T &lhs, const T &rhs) { 836 | T rv; 837 | for (int i = 0; i < lhs.size(); i++) 838 | rv[i] = min(lhs[i], rhs[i]); 839 | return rv; 840 | } 841 | 842 | // component wise max 843 | template< class T> 844 | _CPU_AND_GPU_CODE_ inline T maxV(const T &lhs, const T &rhs) { 845 | T rv; 846 | for (int i = 0; i < lhs.size(); i++) 847 | rv[i] = max(lhs[i], rhs[i]); 848 | return rv; 849 | } 850 | }; 851 | --------------------------------------------------------------------------------