├── .attic
└── memcpy_memset.txt
├── README.md
├── common.h
├── config-defaults.h
├── config.h
├── everything.h
└── vectormath
├── mat44_multiply.h
├── vec3_dot.h
├── vec4_dot.h
├── vec4_homogenize.h
├── vec4_reciprocal.h
├── vec4n_dot.h
└── vector3.h
/.attic/memcpy_memset.txt:
--------------------------------------------------------------------------------
1 |
2 | (Need to enable PLD, see
3 | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0344b/Babjbfdb.html
4 | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0344k/Cbbbdaed.html
5 | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0344k/Bgbciiaf.html
6 | )
7 |
8 | memcpy_neon:
9 | push {r4-r11}
10 | mov r3, r0
11 | 1: subs r2, r2, #128
12 | pld [r1, #64]
13 | pld [r1, #256]
14 | pld [r1, #320]
15 | ldm r1!, {r4-r11}
16 | vld1.64 {d0-d3}, [r1,:128]!
17 | vld1.64 {d4-d7}, [r1,:128]!
18 | vld1.64 {d16-d19}, [r1,:128]!
19 | stm r3!, {r4-r11}
20 | vst1.64 {d0-d3}, [r3,:128]!
21 | vst1.64 {d4-d7}, [r3,:128]!
22 | vst1.64 {d16-d19}, [r3,:128]!
23 | bgt 1b
24 | pop {r4-r11}
25 | bx lr
26 |
27 | memset_neon_1: http://gitorious.org/0xdroid/bionic/commit/780898e723d883e0ed13387f11066275121048b9
28 | memset_neon_2:
29 | push {r4-r11}
30 | mov r3, r0
31 | vdup.8 q0, r1
32 | vmov q1, q0
33 | orr r4, r1, r1, lsl #8
34 | orr r4, r4, r4, lsl #16
35 | mov r5, r4
36 | mov r6, r4
37 | mov r7, r4
38 | mov r8, r4
39 | mov r9, r4
40 | mov r10, r4
41 | mov r11, r4
42 | add r12, r3, r2, lsr #2
43 | 1: subs r2, r2, #128
44 | pld [r3, #64]
45 | stm r3!, {r4-r11}
46 | vst1.64 {d0-d3}, [r12,:128]!
47 | vst1.64 {d0-d3}, [r12,:128]!
48 | vst1.64 {d0-d3}, [r12,:128]!
49 | bgt 1b
50 | pop {r4-r11}
51 | bx lr
52 |
53 |
54 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ARMv7 Functions
2 | ===============
3 |
4 | This is a collection of various functions optimized for __armv7__ and __neon__.
5 |
6 | The five holy laws
7 | ------------------
8 |
9 | 1. __Never return floating point values by value__. It would work fine if -mfloat-abi=hard
was supported everywhere, but sadly it's not. With the more common -mfloat-abi=softfp
, every time you do a return my_float_value
, it does either a fmrs
or a vstr
, followed by a load operation in order to read the result back! __Instead, use a non-const reference as first parameter__. It allows super smooth inlining of your intermediate results without unnecessary loads and stores, just like it would do if hard floats were available (works for vector types too) !
10 | 2. __Try to minimize loads and stores__. Though GCC doesn't support evolved vldmia
/vstmia
and will generate poor code for operations on float32x4x4_t
, so handcoding them make sense in that case.
11 | 3. __Use vector types everywhere it makes sense__. Functions prefixed with vec3_
and vec4_
directly work on float32x4_t
. Those prefixed with mat44_
directly work with float32x4x4_t
. Parameters are passed as references, so the compiler doesn't perform unnecessary ARM register transfers.
12 | 4. __Don't hard-code registers__, but use dummy values instead for clobber, and let the compiler allocate registers as needed.
13 | 5. __A good clobber list is an empty clobber list__. If you let the compiler handle loads for you, "_memory_" shouldn't even show up in your clobber list. The only item that might is "_cc_".
14 |
15 | Compilation flags
16 | -----------------
17 |
18 | For best performance I usually use the following CFLAGS: -mthumb -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -mvectorize-with-neon-quad -O3 -ffast-math -fomit-frame-pointer -fstrict-aliasing -fgcse-las -funsafe-loop-optimizations -fsee -ftree-vectorize
, with -arch armv7
if it's _gcc for iOS_ or -march=armv7-a
if it's _eabi-none-gcc_.
19 |
20 | Preprocessor macros
21 | -------------------
22 |
23 | Several preprocessor macros, when defined, change the behaviour of the code. See config.h
and config-defaults.h
for details…
24 |
25 |
--------------------------------------------------------------------------------
/common.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 |
6 | // A few sanity checks
7 | #ifndef __FAST_MATH__
8 | #error "Please add -ffast-math to your compile flags"
9 | #endif
10 |
11 | #ifndef __ARM_NEON__
12 | #error "Please add -mfpu=neon to your compile flags"
13 | #endif
14 |
15 | // Most needed header
16 | #include
17 |
18 | // Standard defines, prefixed with ARMV7_FUNC_*.
19 | #define ARMV7_FUNC_API static inline __attribute__((always_inline))
20 | #define ARMV7_FUNC_PI 0x1.921FB54442D18p1f
21 | #define ARMV7_FUNC_2PI 0x1.921FB54442D18p2f
22 |
23 | // Standard types
24 | namespace ARM7_FUNC_NAMESPACE {
25 | typedef float32x4_t vector3_t;
26 | typedef float32x4_t vector4_t;
27 | typedef float32x4x4_t matrix44_t;
28 | }
29 |
30 |
--------------------------------------------------------------------------------
/config-defaults.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS
4 | #define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2
5 | #endif
6 |
7 | #ifndef ARM7_FUNC_NAMESPACE
8 | #define ARM7_FUNC_NAMESPACE armv7func
9 | #endif
10 |
11 |
--------------------------------------------------------------------------------
/config.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Number of Newton-Raphson iterations used for
4 | // computing 1/w, in vector normalizations.
5 | // Default to 2
6 | //#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2
7 |
8 | // Name of our namespace
9 | // Default is "armv7func"
10 | //#define ARM7_FUNC_NAMESPACE armv7_is_cool
11 |
12 |
--------------------------------------------------------------------------------
/everything.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | //#include
13 |
14 |
--------------------------------------------------------------------------------
/vectormath/mat44_multiply.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace ARM7_FUNC_NAMESPACE {
5 |
6 | ARMV7_FUNC_API void mat44_multiply(matrix44_t& result, const matrix44_t& a, const matrix44_t& b) {
7 | // result = first column of B x first row of A
8 | result.val[0] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[0]), 0);
9 | result.val[1] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[1]), 0);
10 | result.val[2] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[2]), 0);
11 | result.val[3] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[3]), 0);
12 | // result += second column of B x second row of A
13 | result.val[0] = vmlaq_lane_f32(result.val[0], b.val[1], vget_low_f32(a.val[0]), 1);
14 | result.val[1] = vmlaq_lane_f32(result.val[1], b.val[1], vget_low_f32(a.val[1]), 1);
15 | result.val[2] = vmlaq_lane_f32(result.val[2], b.val[1], vget_low_f32(a.val[2]), 1);
16 | result.val[3] = vmlaq_lane_f32(result.val[3], b.val[1], vget_low_f32(a.val[3]), 1);
17 | // result += third column of B x third row of A
18 | result.val[0] = vmlaq_lane_f32(result.val[0], b.val[2], vget_high_f32(a.val[0]), 0);
19 | result.val[1] = vmlaq_lane_f32(result.val[1], b.val[2], vget_high_f32(a.val[1]), 0);
20 | result.val[2] = vmlaq_lane_f32(result.val[2], b.val[2], vget_high_f32(a.val[2]), 0);
21 | result.val[3] = vmlaq_lane_f32(result.val[3], b.val[2], vget_high_f32(a.val[3]), 0);
22 | // result += last column of B x last row of A
23 | result.val[0] = vmlaq_lane_f32(result.val[0], b.val[3], vget_high_f32(a.val[0]), 1);
24 | result.val[1] = vmlaq_lane_f32(result.val[1], b.val[3], vget_high_f32(a.val[1]), 1);
25 | result.val[2] = vmlaq_lane_f32(result.val[2], b.val[3], vget_high_f32(a.val[2]), 1);
26 | result.val[3] = vmlaq_lane_f32(result.val[3], b.val[3], vget_high_f32(a.val[3]), 1);
27 |
28 | #if 0
29 | // Original, hand-written assembly:
30 | // Pros:
31 | // * used vldmia/vstmia, which gcc can't at this point
32 | // * used Um constraints and %m operand, allowing gcc
33 | // to use "sp" where it made sense
34 | // Cons:
35 | // * performed the full matrix multiplication, even
36 | // when only a sub-expression was really used. The
37 | // intrinsics code abode, OTOH, has parts optimized
38 | // away by the compiler.
39 | asm volatile(
40 | "vldmia %m[a], {q4-q7}\n\t"
41 | "vldmia %m[b], {q8-q11}\n\t"
42 | "vmul.f32 q0, q8, d8[0]\n\t"
43 | "vmul.f32 q1, q8, d10[0]\n\t"
44 | "vmul.f32 q2, q8, d12[0]\n\t"
45 | "vmul.f32 q3, q8, d14[0]\n\t"
46 | "vmla.f32 q0, q9, d8[1]\n\t"
47 | "vmla.f32 q1, q9, d10[1]\n\t"
48 | "vmla.f32 q2, q9, d12[1]\n\t"
49 | "vmla.f32 q3, q9, d14[1]\n\t"
50 | "vmla.f32 q0, q10, d9[0]\n\t"
51 | "vmla.f32 q1, q10, d11[0]\n\t"
52 | "vmla.f32 q2, q10, d13[0]\n\t"
53 | "vmla.f32 q3, q10, d15[0]\n\t"
54 | "vmla.f32 q0, q11, d9[1]\n\t"
55 | "vmla.f32 q1, q11, d11[1]\n\t"
56 | "vmla.f32 q2, q11, d13[1]\n\t"
57 | "vmla.f32 q3, q11, d15[1]\n\t"
58 | "vstmia %m[result], {q0-q3}"
59 | :
60 | : [result] "Um" (result), [a] "Um" (a), [b] "Um" (b)
61 | : "memory",
62 | "q0", "q1", "q2", "q3",
63 | "q4", "q5", "q6", "q7",
64 | "q8", "q9","q10", "q11"
65 | );
66 | #endif
67 | }
68 |
69 | } // ARM7_FUNC_NAMESPACE
70 |
71 |
--------------------------------------------------------------------------------
/vectormath/vec3_dot.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace ARM7_FUNC_NAMESPACE {
5 |
6 | ARMV7_FUNC_API void vec3_dot(float& result, const vector3_t& a, const vector3_t& b) {
7 | register vector3_t tmp;
8 | register const int zero(0);
9 | asm volatile (
10 | "# %q[tmp].x = dot(%q[a].xyz, %q[b].xyz);\n\t"
11 | "vmul.f32 %q[tmp], %q[a], %q[b]\n\t"
12 | "vmov.32 %f[tmp][1], %[zero]\n\t"
13 | "vadd.f32 %e[tmp], %e[tmp], %f[tmp]\n\t"
14 | "vpadd.f32 %e[tmp], %e[tmp]"
15 | : [tmp] "=&w" (tmp) : [a] "w" (a), [b] "w" (b), [zero] "r" (zero): );
16 | result = vgetq_lane_f32(tmp, 0);
17 | }
18 |
19 | } // ARM7_FUNC_NAMESPACE
20 |
21 |
--------------------------------------------------------------------------------
/vectormath/vec4_dot.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace ARM7_FUNC_NAMESPACE {
5 |
6 | ARMV7_FUNC_API void vec4_dot(float& result, const vector4_t& a, const vector4_t& b) {
7 | register vector4_t tmp;
8 | asm volatile (
9 | "# %q[tmp].x = dot(%q[a].xyzw, %q[b].xyzw);\n\t"
10 | "vmul.f32 %q[tmp], %q[a], %q[b]\n\t"
11 | "vadd.f32 %e[tmp], %e[tmp], %f[tmp]\n\t"
12 | "vpadd.f32 %e[tmp], %e[tmp]"
13 | : [tmp] "=&w" (tmp) : [a] "w" (a), [b] "w" (b) : );
14 | result = vgetq_lane_f32(tmp, 0);
15 | }
16 |
17 | } // ARM7_FUNC_NAMESPACE
18 |
19 |
--------------------------------------------------------------------------------
/vectormath/vec4_homogenize.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | namespace ARM7_FUNC_NAMESPACE {
6 |
7 | ARMV7_FUNC_API void vec4_homogenize(vector4_t& vec) {
8 | asm volatile("#begin vec4_homogenize" :::);
9 | register vector4_t wwww, wwww_recp;
10 |
11 | // This use a neon>arm transfer! why??
12 | // tmp1 = vdupq_n_f32(vgetq_lane_f32(vec, 3));
13 | asm volatile("vdup.f32 %q[wwww], %f[xyzw][1]" : [wwww] "=&w" (wwww) : [xyzw] "w" (vec) : );
14 |
15 | vec4_reciprocal(wwww_recp, wwww);
16 | vec *= wwww_recp;
17 | asm volatile("#end vec4_homogenize" :::);
18 | }
19 |
20 | } // ARM7_FUNC_NAMESPACE
21 |
22 |
--------------------------------------------------------------------------------
/vectormath/vec4_reciprocal.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace ARM7_FUNC_NAMESPACE {
5 |
6 | namespace details {
7 |
8 | template
9 | struct NewtonRaphsonStepper {
10 | static inline __attribute__((always_inline)) void exec(float32x4_t& x_recp, const float32x4_t& x) {
11 | x_recp*= vrecpsq_f32(x_recp, x);
12 | NewtonRaphsonStepper::exec(x_recp, x);
13 | }
14 | };
15 |
16 | template<>
17 | struct NewtonRaphsonStepper<0> {
18 | static inline __attribute__((always_inline)) void exec(float32x4_t& x_recp, const float32x4_t& x) { }
19 | };
20 | }
21 |
22 | ARMV7_FUNC_API void vec4_reciprocal(vector4_t& result, const vector4_t& vec) {
23 | asm volatile("#begin vec4_reciprocal" :::);
24 | result = vrecpeq_f32(vec);
25 | details::NewtonRaphsonStepper::exec(result, vec);
26 | asm volatile("#end vec4_reciprocal" :::);
27 | }
28 |
29 | } // ARM7_FUNC_NAMESPACE
30 |
31 |
--------------------------------------------------------------------------------
/vectormath/vec4n_dot.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace ARM7_FUNC_NAMESPACE {
5 |
6 | // Compute the dot product of two vectors of 4n floats
7 | ARMV7_FUNC_API void vec4n_dot(float& result, const vector4_t* a, const vector4_t* b, unsigned int n) {
8 | register vector4_t tmp, va, vb;
9 | asm volatile (
10 | "# %q[tmp].x = dot(%q[va][i], %q[vb][i]) for (i=0; i
3 | #include
4 | // TODO:
5 | // #include
6 | #error "Please don't use this, it's not ready"
7 |
8 | namespace ARM7_FUNC_NAMESPACE {
9 |
10 | class Vector3Static;
11 | class Vector3 {
12 | private:
13 | float32x4_t val;
14 | friend class Vector3Static;
15 |
16 | public:
17 | Vector3()
18 | { }
19 |
20 | Vector3(const Vector3 & vec)
21 | : val(vec.val)
22 | { }
23 |
24 | Vector3(const float32_t& x, const float32_t& y, const float32_t& z) {
25 | setX(x);
26 | setY(y);
27 | setZ(z);
28 | }
29 |
30 | explicit
31 | Vector3(const float32_t& scalar)
32 | : val(vdupq_n_f32(scalar))
33 | { }
34 |
35 | explicit
36 | Vector3(const float32x4_t& v)
37 | : val(v)
38 | { }
39 |
40 | Vector3& operator=(const Vector3& vec) {
41 | val=vec.val;
42 | return *this;
43 | }
44 |
45 | Vector3& setX(const float32_t& x) {
46 | return setElem(0, x);
47 | }
48 |
49 | Vector3& setY(const float32_t& y) {
50 | return setElem(1, y);
51 | }
52 |
53 | Vector3& setZ(const float32_t& z) {
54 | return setElem(2, z);
55 | }
56 |
57 | float32_t getX() const {
58 | return getElem(0);
59 | }
60 |
61 | float32_t getY() const {
62 | return getElem(1);
63 | }
64 |
65 | float32_t getZ() const {
66 | return getElem(2);
67 | }
68 |
69 | Vector3& setElem(int idx, const float32_t& value) {
70 | val = vsetq_lane_f32(value, val, idx);
71 | return *this;
72 | }
73 |
74 | float32_t getElem(int idx) const {
75 | return vgetq_lane_f32(val, idx);
76 | }
77 |
78 | float32_t operator[](int idx) const {
79 | return getElem(idx);
80 | }
81 |
82 | const Vector3 operator+(const Vector3& vec) const {
83 | return Vector3(val + vec.val);
84 | }
85 |
86 | const Vector3 operator-(const Vector3& vec) const {
87 | return Vector3(val - vec.val);
88 | }
89 |
90 | const Vector3 operator*(const float32_t& scalar) const {
91 | return Vector3(val * vdupq_n_f32(scalar));
92 | }
93 |
94 | const Vector3 operator/(const float32_t& scalar) const {
95 | float32x4_t tmp;
96 | vec4_reciprocal(tmp, vdupq_n_f32(scalar));
97 | return Vector3(val * tmp);
98 | }
99 |
100 | Vector3& operator+=(const Vector3& vec) {
101 | val += vec.val;
102 | return *this;
103 | }
104 |
105 | Vector3& operator-=(const Vector3& vec) {
106 | val -= vec.val;
107 | return *this;
108 | }
109 |
110 | Vector3& operator*=(const float32_t& scalar) {
111 | val *= vdupq_n_f32(scalar);
112 | return *this;
113 | }
114 |
115 | Vector3& operator/=(const float32_t& scalar) {
116 | float32x4_t tmp;
117 | vec4_reciprocal(tmp, vdupq_n_f32(scalar));
118 | val *= tmp;
119 | return *this;
120 | }
121 |
122 | const Vector3 operator-( ) const {
123 | return Vector3(-val);
124 | }
125 |
126 | static const Vector3& xAxis() {
127 | static Vector3 instance((float32x4_t) { 1.f, 0.f, 0.f, 0.f });
128 | return instance;
129 | }
130 |
131 | static const Vector3& yAxis() {
132 | static Vector3 instance((float32x4_t) { 0.f, 1.f, 0.f, 0.f });
133 | return instance;
134 | }
135 |
136 | static const Vector3& zAxis() {
137 | static Vector3 instance((float32x4_t) { 0.f, 0.f, 1.f, 0.f });
138 | return instance;
139 | }
140 |
141 | } __attribute__((aligned(16)));
142 |
143 | class Vector3Static {
144 | public:
145 | static const Vector3 mulPerElem(const Vector3& vec0, const Vector3& vec1) {
146 | return Vector3(vec0.val * vec1.val);
147 | }
148 | static const Vector3 divPerElem(const Vector3& vec0, const Vector3& vec1 ) {
149 | return mulPerElem(vec0, recipPerElem(vec1));
150 | }
151 | static const Vector3 recipPerElem(const Vector3& vec) {
152 | float32x4_t tmp;
153 | vec4_reciprocal(tmp, vec.val);
154 | return Vector3(tmp);
155 | }
156 |
157 | static const Vector3 sqrtPerElem(const Vector3& vec) {
158 | float32x4_t tmp;
159 | // TODO:
160 | // vec4_reciprocal_squareroot(tmp, vec.val);
161 | // vec4_reciprocal(tmp, tmp);
162 | return Vector3(tmp);
163 | }
164 |
165 | static const Vector3 rsqrtPerElem(const Vector3& vec) {
166 | float32x4_t tmp;
167 | // TODO:
168 | // vec4_reciprocal_squareroot(tmp, vec.val);
169 | return Vector3(tmp);
170 | }
171 |
172 | static const Vector3 absPerElem(const Vector3& vec);
173 | static const Vector3 copySignPerElem(const Vector3& vec0, const Vector3& vec1);
174 | static const Vector3 maxPerElem(const Vector3& vec0, const Vector3& vec1);
175 | static const Vector3 minPerElem(const Vector3& vec0, const Vector3& vec1);
176 | static float maxElem(const Vector3& vec);
177 | static float minElem(const Vector3& vec);
178 | static float sum(const Vector3& vec);
179 | static float dot(const Vector3& vec0, const Vector3 & vec1);
180 | static float lengthSqr(const Vector3& vec);
181 | static float length(const Vector3& vec);
182 | static const Vector3 normalize(const Vector3& vec);
183 | static const Vector3 cross(const Vector3& vec0, const Vector3& vec1);
184 | };
185 |
186 | inline const Vector3 operator*(const float32_t& scalar, const Vector3& vec) {
187 | return vec*scalar;
188 | }
189 |
190 | inline const Vector3 mulPerElem(const Vector3& vec0, const Vector3& vec1) {
191 | return Vector3Static::mulPerElem(vec0, vec1);
192 | }
193 |
194 | inline const Vector3 divPerElem(const Vector3& vec0, const Vector3& vec1) {
195 | return Vector3Static::divPerElem(vec0, vec1);
196 | }
197 |
198 | inline const Vector3 recipPerElem(const Vector3& vec) {
199 | return Vector3Static::recipPerElem(vec);
200 | }
201 |
202 | inline const Vector3 sqrtPerElem( const Vector3 & vec );
203 | inline const Vector3 rsqrtPerElem( const Vector3 & vec );
204 | inline const Vector3 absPerElem( const Vector3 & vec );
205 | inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
206 | inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
207 | inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
208 | inline float maxElem( const Vector3 & vec );
209 | inline float minElem( const Vector3 & vec );
210 | inline float sum( const Vector3 & vec );
211 | inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
212 | inline float lengthSqr( const Vector3 & vec );
213 | inline float length( const Vector3 & vec );
214 | inline const Vector3 normalize( const Vector3 & vec );
215 | inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
216 |
217 | //inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
218 | //inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
219 | //inline const Matrix3 crossMatrix( const Vector3 & vec );
220 | //inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
221 |
222 | inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
223 | inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
224 | inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
225 |
226 | } // ARM7_FUNC_NAMESPACE
227 |
228 |
--------------------------------------------------------------------------------