├── .attic └── memcpy_memset.txt ├── README.md ├── common.h ├── config-defaults.h ├── config.h ├── everything.h └── vectormath ├── mat44_multiply.h ├── vec3_dot.h ├── vec4_dot.h ├── vec4_homogenize.h ├── vec4_reciprocal.h ├── vec4n_dot.h └── vector3.h /.attic/memcpy_memset.txt: -------------------------------------------------------------------------------- 1 | 2 | (Need to enable PLD, see 3 | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0344b/Babjbfdb.html 4 | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0344k/Cbbbdaed.html 5 | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0344k/Bgbciiaf.html 6 | ) 7 | 8 | memcpy_neon: 9 | push {r4-r11} 10 | mov r3, r0 11 | 1: subs r2, r2, #128 12 | pld [r1, #64] 13 | pld [r1, #256] 14 | pld [r1, #320] 15 | ldm r1!, {r4-r11} 16 | vld1.64 {d0-d3}, [r1,:128]! 17 | vld1.64 {d4-d7}, [r1,:128]! 18 | vld1.64 {d16-d19}, [r1,:128]! 19 | stm r3!, {r4-r11} 20 | vst1.64 {d0-d3}, [r3,:128]! 21 | vst1.64 {d4-d7}, [r3,:128]! 22 | vst1.64 {d16-d19}, [r3,:128]! 23 | bgt 1b 24 | pop {r4-r11} 25 | bx lr 26 | 27 | memset_neon_1: http://gitorious.org/0xdroid/bionic/commit/780898e723d883e0ed13387f11066275121048b9 28 | memset_neon_2: 29 | push {r4-r11} 30 | mov r3, r0 31 | vdup.8 q0, r1 32 | vmov q1, q0 33 | orr r4, r1, r1, lsl #8 34 | orr r4, r4, r4, lsl #16 35 | mov r5, r4 36 | mov r6, r4 37 | mov r7, r4 38 | mov r8, r4 39 | mov r9, r4 40 | mov r10, r4 41 | mov r11, r4 42 | add r12, r3, r2, lsr #2 43 | 1: subs r2, r2, #128 44 | pld [r3, #64] 45 | stm r3!, {r4-r11} 46 | vst1.64 {d0-d3}, [r12,:128]! 47 | vst1.64 {d0-d3}, [r12,:128]! 48 | vst1.64 {d0-d3}, [r12,:128]! 49 | bgt 1b 50 | pop {r4-r11} 51 | bx lr 52 | 53 | 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ARMv7 Functions 2 | =============== 3 | 4 | This is a collection of various functions optimized for __armv7__ and __neon__. 5 | 6 | The five holy laws 7 | ------------------ 8 | 9 | 1. __Never return floating point values by value__. It would work fine if -mfloat-abi=hard was supported everywhere, but sadly it's not. With the more common -mfloat-abi=softfp, every time you do a return my_float_value, it does either a fmrs or a vstr, followed by a load operation in order to read the result back! __Instead, use a non-const reference as first parameter__. It allows super smooth inlining of your intermediate results without unnecessary loads and stores, just like it would do if hard floats were available (works for vector types too) ! 10 | 2. __Try to minimize loads and stores__. Though GCC doesn't support evolved vldmia/vstmia and will generate poor code for operations on float32x4x4_t, so handcoding them make sense in that case. 11 | 3. __Use vector types everywhere it makes sense__. Functions prefixed with vec3_ and vec4_ directly work on float32x4_t. Those prefixed with mat44_ directly work with float32x4x4_t. Parameters are passed as references, so the compiler doesn't perform unnecessary ARM register transfers. 12 | 4. __Don't hard-code registers__, but use dummy values instead for clobber, and let the compiler allocate registers as needed. 13 | 5. __A good clobber list is an empty clobber list__. If you let the compiler handle loads for you, "_memory_" shouldn't even show up in your clobber list. The only item that might is "_cc_". 14 | 15 | Compilation flags 16 | ----------------- 17 | 18 | For best performance I usually use the following CFLAGS: -mthumb -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -mvectorize-with-neon-quad -O3 -ffast-math -fomit-frame-pointer -fstrict-aliasing -fgcse-las -funsafe-loop-optimizations -fsee -ftree-vectorize, with -arch armv7 if it's _gcc for iOS_ or -march=armv7-a if it's _eabi-none-gcc_. 19 | 20 | Preprocessor macros 21 | ------------------- 22 | 23 | Several preprocessor macros, when defined, change the behaviour of the code. See config.h and config-defaults.h for details… 24 | 25 | -------------------------------------------------------------------------------- /common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // A few sanity checks 7 | #ifndef __FAST_MATH__ 8 | #error "Please add -ffast-math to your compile flags" 9 | #endif 10 | 11 | #ifndef __ARM_NEON__ 12 | #error "Please add -mfpu=neon to your compile flags" 13 | #endif 14 | 15 | // Most needed header 16 | #include 17 | 18 | // Standard defines, prefixed with ARMV7_FUNC_*. 19 | #define ARMV7_FUNC_API static inline __attribute__((always_inline)) 20 | #define ARMV7_FUNC_PI 0x1.921FB54442D18p1f 21 | #define ARMV7_FUNC_2PI 0x1.921FB54442D18p2f 22 | 23 | // Standard types 24 | namespace ARM7_FUNC_NAMESPACE { 25 | typedef float32x4_t vector3_t; 26 | typedef float32x4_t vector4_t; 27 | typedef float32x4x4_t matrix44_t; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /config-defaults.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 4 | #define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2 5 | #endif 6 | 7 | #ifndef ARM7_FUNC_NAMESPACE 8 | #define ARM7_FUNC_NAMESPACE armv7func 9 | #endif 10 | 11 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Number of Newton-Raphson iterations used for 4 | // computing 1/w, in vector normalizations. 5 | // Default to 2 6 | //#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2 7 | 8 | // Name of our namespace 9 | // Default is "armv7func" 10 | //#define ARM7_FUNC_NAMESPACE armv7_is_cool 11 | 12 | -------------------------------------------------------------------------------- /everything.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | //#include 13 | 14 | -------------------------------------------------------------------------------- /vectormath/mat44_multiply.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace ARM7_FUNC_NAMESPACE { 5 | 6 | ARMV7_FUNC_API void mat44_multiply(matrix44_t& result, const matrix44_t& a, const matrix44_t& b) { 7 | // result = first column of B x first row of A 8 | result.val[0] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[0]), 0); 9 | result.val[1] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[1]), 0); 10 | result.val[2] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[2]), 0); 11 | result.val[3] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[3]), 0); 12 | // result += second column of B x second row of A 13 | result.val[0] = vmlaq_lane_f32(result.val[0], b.val[1], vget_low_f32(a.val[0]), 1); 14 | result.val[1] = vmlaq_lane_f32(result.val[1], b.val[1], vget_low_f32(a.val[1]), 1); 15 | result.val[2] = vmlaq_lane_f32(result.val[2], b.val[1], vget_low_f32(a.val[2]), 1); 16 | result.val[3] = vmlaq_lane_f32(result.val[3], b.val[1], vget_low_f32(a.val[3]), 1); 17 | // result += third column of B x third row of A 18 | result.val[0] = vmlaq_lane_f32(result.val[0], b.val[2], vget_high_f32(a.val[0]), 0); 19 | result.val[1] = vmlaq_lane_f32(result.val[1], b.val[2], vget_high_f32(a.val[1]), 0); 20 | result.val[2] = vmlaq_lane_f32(result.val[2], b.val[2], vget_high_f32(a.val[2]), 0); 21 | result.val[3] = vmlaq_lane_f32(result.val[3], b.val[2], vget_high_f32(a.val[3]), 0); 22 | // result += last column of B x last row of A 23 | result.val[0] = vmlaq_lane_f32(result.val[0], b.val[3], vget_high_f32(a.val[0]), 1); 24 | result.val[1] = vmlaq_lane_f32(result.val[1], b.val[3], vget_high_f32(a.val[1]), 1); 25 | result.val[2] = vmlaq_lane_f32(result.val[2], b.val[3], vget_high_f32(a.val[2]), 1); 26 | result.val[3] = vmlaq_lane_f32(result.val[3], b.val[3], vget_high_f32(a.val[3]), 1); 27 | 28 | #if 0 29 | // Original, hand-written assembly: 30 | // Pros: 31 | // * used vldmia/vstmia, which gcc can't at this point 32 | // * used Um constraints and %m operand, allowing gcc 33 | // to use "sp" where it made sense 34 | // Cons: 35 | // * performed the full matrix multiplication, even 36 | // when only a sub-expression was really used. The 37 | // intrinsics code abode, OTOH, has parts optimized 38 | // away by the compiler. 39 | asm volatile( 40 | "vldmia %m[a], {q4-q7}\n\t" 41 | "vldmia %m[b], {q8-q11}\n\t" 42 | "vmul.f32 q0, q8, d8[0]\n\t" 43 | "vmul.f32 q1, q8, d10[0]\n\t" 44 | "vmul.f32 q2, q8, d12[0]\n\t" 45 | "vmul.f32 q3, q8, d14[0]\n\t" 46 | "vmla.f32 q0, q9, d8[1]\n\t" 47 | "vmla.f32 q1, q9, d10[1]\n\t" 48 | "vmla.f32 q2, q9, d12[1]\n\t" 49 | "vmla.f32 q3, q9, d14[1]\n\t" 50 | "vmla.f32 q0, q10, d9[0]\n\t" 51 | "vmla.f32 q1, q10, d11[0]\n\t" 52 | "vmla.f32 q2, q10, d13[0]\n\t" 53 | "vmla.f32 q3, q10, d15[0]\n\t" 54 | "vmla.f32 q0, q11, d9[1]\n\t" 55 | "vmla.f32 q1, q11, d11[1]\n\t" 56 | "vmla.f32 q2, q11, d13[1]\n\t" 57 | "vmla.f32 q3, q11, d15[1]\n\t" 58 | "vstmia %m[result], {q0-q3}" 59 | : 60 | : [result] "Um" (result), [a] "Um" (a), [b] "Um" (b) 61 | : "memory", 62 | "q0", "q1", "q2", "q3", 63 | "q4", "q5", "q6", "q7", 64 | "q8", "q9","q10", "q11" 65 | ); 66 | #endif 67 | } 68 | 69 | } // ARM7_FUNC_NAMESPACE 70 | 71 | -------------------------------------------------------------------------------- /vectormath/vec3_dot.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace ARM7_FUNC_NAMESPACE { 5 | 6 | ARMV7_FUNC_API void vec3_dot(float& result, const vector3_t& a, const vector3_t& b) { 7 | register vector3_t tmp; 8 | register const int zero(0); 9 | asm volatile ( 10 | "# %q[tmp].x = dot(%q[a].xyz, %q[b].xyz);\n\t" 11 | "vmul.f32 %q[tmp], %q[a], %q[b]\n\t" 12 | "vmov.32 %f[tmp][1], %[zero]\n\t" 13 | "vadd.f32 %e[tmp], %e[tmp], %f[tmp]\n\t" 14 | "vpadd.f32 %e[tmp], %e[tmp]" 15 | : [tmp] "=&w" (tmp) : [a] "w" (a), [b] "w" (b), [zero] "r" (zero): ); 16 | result = vgetq_lane_f32(tmp, 0); 17 | } 18 | 19 | } // ARM7_FUNC_NAMESPACE 20 | 21 | -------------------------------------------------------------------------------- /vectormath/vec4_dot.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace ARM7_FUNC_NAMESPACE { 5 | 6 | ARMV7_FUNC_API void vec4_dot(float& result, const vector4_t& a, const vector4_t& b) { 7 | register vector4_t tmp; 8 | asm volatile ( 9 | "# %q[tmp].x = dot(%q[a].xyzw, %q[b].xyzw);\n\t" 10 | "vmul.f32 %q[tmp], %q[a], %q[b]\n\t" 11 | "vadd.f32 %e[tmp], %e[tmp], %f[tmp]\n\t" 12 | "vpadd.f32 %e[tmp], %e[tmp]" 13 | : [tmp] "=&w" (tmp) : [a] "w" (a), [b] "w" (b) : ); 14 | result = vgetq_lane_f32(tmp, 0); 15 | } 16 | 17 | } // ARM7_FUNC_NAMESPACE 18 | 19 | -------------------------------------------------------------------------------- /vectormath/vec4_homogenize.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace ARM7_FUNC_NAMESPACE { 6 | 7 | ARMV7_FUNC_API void vec4_homogenize(vector4_t& vec) { 8 | asm volatile("#begin vec4_homogenize" :::); 9 | register vector4_t wwww, wwww_recp; 10 | 11 | // This use a neon>arm transfer! why?? 12 | // tmp1 = vdupq_n_f32(vgetq_lane_f32(vec, 3)); 13 | asm volatile("vdup.f32 %q[wwww], %f[xyzw][1]" : [wwww] "=&w" (wwww) : [xyzw] "w" (vec) : ); 14 | 15 | vec4_reciprocal(wwww_recp, wwww); 16 | vec *= wwww_recp; 17 | asm volatile("#end vec4_homogenize" :::); 18 | } 19 | 20 | } // ARM7_FUNC_NAMESPACE 21 | 22 | -------------------------------------------------------------------------------- /vectormath/vec4_reciprocal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace ARM7_FUNC_NAMESPACE { 5 | 6 | namespace details { 7 | 8 | template 9 | struct NewtonRaphsonStepper { 10 | static inline __attribute__((always_inline)) void exec(float32x4_t& x_recp, const float32x4_t& x) { 11 | x_recp*= vrecpsq_f32(x_recp, x); 12 | NewtonRaphsonStepper::exec(x_recp, x); 13 | } 14 | }; 15 | 16 | template<> 17 | struct NewtonRaphsonStepper<0> { 18 | static inline __attribute__((always_inline)) void exec(float32x4_t& x_recp, const float32x4_t& x) { } 19 | }; 20 | } 21 | 22 | ARMV7_FUNC_API void vec4_reciprocal(vector4_t& result, const vector4_t& vec) { 23 | asm volatile("#begin vec4_reciprocal" :::); 24 | result = vrecpeq_f32(vec); 25 | details::NewtonRaphsonStepper::exec(result, vec); 26 | asm volatile("#end vec4_reciprocal" :::); 27 | } 28 | 29 | } // ARM7_FUNC_NAMESPACE 30 | 31 | -------------------------------------------------------------------------------- /vectormath/vec4n_dot.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace ARM7_FUNC_NAMESPACE { 5 | 6 | // Compute the dot product of two vectors of 4n floats 7 | ARMV7_FUNC_API void vec4n_dot(float& result, const vector4_t* a, const vector4_t* b, unsigned int n) { 8 | register vector4_t tmp, va, vb; 9 | asm volatile ( 10 | "# %q[tmp].x = dot(%q[va][i], %q[vb][i]) for (i=0; i 3 | #include 4 | // TODO: 5 | // #include 6 | #error "Please don't use this, it's not ready" 7 | 8 | namespace ARM7_FUNC_NAMESPACE { 9 | 10 | class Vector3Static; 11 | class Vector3 { 12 | private: 13 | float32x4_t val; 14 | friend class Vector3Static; 15 | 16 | public: 17 | Vector3() 18 | { } 19 | 20 | Vector3(const Vector3 & vec) 21 | : val(vec.val) 22 | { } 23 | 24 | Vector3(const float32_t& x, const float32_t& y, const float32_t& z) { 25 | setX(x); 26 | setY(y); 27 | setZ(z); 28 | } 29 | 30 | explicit 31 | Vector3(const float32_t& scalar) 32 | : val(vdupq_n_f32(scalar)) 33 | { } 34 | 35 | explicit 36 | Vector3(const float32x4_t& v) 37 | : val(v) 38 | { } 39 | 40 | Vector3& operator=(const Vector3& vec) { 41 | val=vec.val; 42 | return *this; 43 | } 44 | 45 | Vector3& setX(const float32_t& x) { 46 | return setElem(0, x); 47 | } 48 | 49 | Vector3& setY(const float32_t& y) { 50 | return setElem(1, y); 51 | } 52 | 53 | Vector3& setZ(const float32_t& z) { 54 | return setElem(2, z); 55 | } 56 | 57 | float32_t getX() const { 58 | return getElem(0); 59 | } 60 | 61 | float32_t getY() const { 62 | return getElem(1); 63 | } 64 | 65 | float32_t getZ() const { 66 | return getElem(2); 67 | } 68 | 69 | Vector3& setElem(int idx, const float32_t& value) { 70 | val = vsetq_lane_f32(value, val, idx); 71 | return *this; 72 | } 73 | 74 | float32_t getElem(int idx) const { 75 | return vgetq_lane_f32(val, idx); 76 | } 77 | 78 | float32_t operator[](int idx) const { 79 | return getElem(idx); 80 | } 81 | 82 | const Vector3 operator+(const Vector3& vec) const { 83 | return Vector3(val + vec.val); 84 | } 85 | 86 | const Vector3 operator-(const Vector3& vec) const { 87 | return Vector3(val - vec.val); 88 | } 89 | 90 | const Vector3 operator*(const float32_t& scalar) const { 91 | return Vector3(val * vdupq_n_f32(scalar)); 92 | } 93 | 94 | const Vector3 operator/(const float32_t& scalar) const { 95 | float32x4_t tmp; 96 | vec4_reciprocal(tmp, vdupq_n_f32(scalar)); 97 | return Vector3(val * tmp); 98 | } 99 | 100 | Vector3& operator+=(const Vector3& vec) { 101 | val += vec.val; 102 | return *this; 103 | } 104 | 105 | Vector3& operator-=(const Vector3& vec) { 106 | val -= vec.val; 107 | return *this; 108 | } 109 | 110 | Vector3& operator*=(const float32_t& scalar) { 111 | val *= vdupq_n_f32(scalar); 112 | return *this; 113 | } 114 | 115 | Vector3& operator/=(const float32_t& scalar) { 116 | float32x4_t tmp; 117 | vec4_reciprocal(tmp, vdupq_n_f32(scalar)); 118 | val *= tmp; 119 | return *this; 120 | } 121 | 122 | const Vector3 operator-( ) const { 123 | return Vector3(-val); 124 | } 125 | 126 | static const Vector3& xAxis() { 127 | static Vector3 instance((float32x4_t) { 1.f, 0.f, 0.f, 0.f }); 128 | return instance; 129 | } 130 | 131 | static const Vector3& yAxis() { 132 | static Vector3 instance((float32x4_t) { 0.f, 1.f, 0.f, 0.f }); 133 | return instance; 134 | } 135 | 136 | static const Vector3& zAxis() { 137 | static Vector3 instance((float32x4_t) { 0.f, 0.f, 1.f, 0.f }); 138 | return instance; 139 | } 140 | 141 | } __attribute__((aligned(16))); 142 | 143 | class Vector3Static { 144 | public: 145 | static const Vector3 mulPerElem(const Vector3& vec0, const Vector3& vec1) { 146 | return Vector3(vec0.val * vec1.val); 147 | } 148 | static const Vector3 divPerElem(const Vector3& vec0, const Vector3& vec1 ) { 149 | return mulPerElem(vec0, recipPerElem(vec1)); 150 | } 151 | static const Vector3 recipPerElem(const Vector3& vec) { 152 | float32x4_t tmp; 153 | vec4_reciprocal(tmp, vec.val); 154 | return Vector3(tmp); 155 | } 156 | 157 | static const Vector3 sqrtPerElem(const Vector3& vec) { 158 | float32x4_t tmp; 159 | // TODO: 160 | // vec4_reciprocal_squareroot(tmp, vec.val); 161 | // vec4_reciprocal(tmp, tmp); 162 | return Vector3(tmp); 163 | } 164 | 165 | static const Vector3 rsqrtPerElem(const Vector3& vec) { 166 | float32x4_t tmp; 167 | // TODO: 168 | // vec4_reciprocal_squareroot(tmp, vec.val); 169 | return Vector3(tmp); 170 | } 171 | 172 | static const Vector3 absPerElem(const Vector3& vec); 173 | static const Vector3 copySignPerElem(const Vector3& vec0, const Vector3& vec1); 174 | static const Vector3 maxPerElem(const Vector3& vec0, const Vector3& vec1); 175 | static const Vector3 minPerElem(const Vector3& vec0, const Vector3& vec1); 176 | static float maxElem(const Vector3& vec); 177 | static float minElem(const Vector3& vec); 178 | static float sum(const Vector3& vec); 179 | static float dot(const Vector3& vec0, const Vector3 & vec1); 180 | static float lengthSqr(const Vector3& vec); 181 | static float length(const Vector3& vec); 182 | static const Vector3 normalize(const Vector3& vec); 183 | static const Vector3 cross(const Vector3& vec0, const Vector3& vec1); 184 | }; 185 | 186 | inline const Vector3 operator*(const float32_t& scalar, const Vector3& vec) { 187 | return vec*scalar; 188 | } 189 | 190 | inline const Vector3 mulPerElem(const Vector3& vec0, const Vector3& vec1) { 191 | return Vector3Static::mulPerElem(vec0, vec1); 192 | } 193 | 194 | inline const Vector3 divPerElem(const Vector3& vec0, const Vector3& vec1) { 195 | return Vector3Static::divPerElem(vec0, vec1); 196 | } 197 | 198 | inline const Vector3 recipPerElem(const Vector3& vec) { 199 | return Vector3Static::recipPerElem(vec); 200 | } 201 | 202 | inline const Vector3 sqrtPerElem( const Vector3 & vec ); 203 | inline const Vector3 rsqrtPerElem( const Vector3 & vec ); 204 | inline const Vector3 absPerElem( const Vector3 & vec ); 205 | inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ); 206 | inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ); 207 | inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ); 208 | inline float maxElem( const Vector3 & vec ); 209 | inline float minElem( const Vector3 & vec ); 210 | inline float sum( const Vector3 & vec ); 211 | inline float dot( const Vector3 & vec0, const Vector3 & vec1 ); 212 | inline float lengthSqr( const Vector3 & vec ); 213 | inline float length( const Vector3 & vec ); 214 | inline const Vector3 normalize( const Vector3 & vec ); 215 | inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ); 216 | 217 | //inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 ); 218 | //inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ); 219 | //inline const Matrix3 crossMatrix( const Vector3 & vec ); 220 | //inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ); 221 | 222 | inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ); 223 | inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ); 224 | inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ); 225 | 226 | } // ARM7_FUNC_NAMESPACE 227 | 228 | --------------------------------------------------------------------------------