├── Makefile
├── bitwise.h
├── LICENSE
├── main.c
└── impl.h


/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	# wow what a fancy makefile
3 | 	gcc main.c


--------------------------------------------------------------------------------
/bitwise.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdint.h>
 3 | 
 4 | typedef uint64_t    u64;
 5 | typedef uint32_t    u32;
 6 | typedef uint16_t    u16;
 7 | typedef uint8_t     u8;
 8 | 
 9 | typedef int64_t     s64;
10 | 
11 | static inline u64 mask(int lo, int hi) {
12 |     u64 size = hi - lo;
13 |     return ((1ull << size) - 1) << lo;
14 | }
15 | 
16 | static inline bool bit(u64 x, int n) {
17 |     return (x >> n) & 1;
18 | }
19 | 
20 | static inline u64 sign_extend(u64 value, int from_size, int to_size) {
21 |     bool negative = bit(value, from_size - 1);
22 |     if (negative) value |= mask(from_size, to_size);
23 |     return value;
24 | };
25 | 
26 | static inline u64 asr(u64 value, int shift, int size) {
27 |     s64 sign_extended = (s64) sign_extend(value, size, 64);
28 |     sign_extended >>= shift;
29 |     return sign_extended & mask(0, size);
30 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2024 zaydlang
 2 | 
 3 | This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
 4 | 
 5 | Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
 6 | 
 7 |     1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 8 |     2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 9 |     3. This notice may not be removed or altered from any source distribution.
10 | 


--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #define PC_BUILD 1
  3 | #include "impl.h"
  4 | #undef printf
  5 | 
  6 | #include <stdio.h>
  7 | #include <time.h>
  8 | #include <stdlib.h>
  9 | 
 10 | // generate interesting random numbers
 11 | unsigned int get_rand_32() {
 12 |     retry:
 13 |     switch (rand() & 0b1111) {
 14 |         case 0: return rand() & 0xFF;
 15 |         case 1: return rand() & 0xFFFF;
 16 |         case 2: return rand() & 0xFFFFFF;
 17 |         case 3: return 0xFFFFFFFF - (rand() & 0xFF);
 18 |         case 4: return 0xFFFFFFFF - (rand() & 0xFFFF);
 19 |         case 5: return 0xFFFFFFFF - (rand() & 0xFFFFFF);
 20 |         case 6: return 0xAAAAAAAA & (rand() & 0xFFFF) | ((rand() & 0xFFFF) << 16);
 21 |         case 7: return 0x55555555 & (rand() & 0xFFFF) | ((rand() & 0xFFFF) << 16);
 22 |         case 8: return 0;
 23 |         case 9: return ((rand() & 0xFFFF) | ((rand() & 0xFFFF) << 16));
 24 | 
 25 |         default:
 26 |             goto retry;
 27 |             // return (rand() & 0xFFFF) | ((rand() & 0xFFFF) << 16);   
 28 |     }
 29 | }
 30 | 
 31 | bool guess_mul_zero(uint32_t x) {
 32 |     // the carry flag's behavior for short mul when the multiplicand is 0:
 33 |     // used for some crude testing of the mul instruction, before doing full
 34 |     // out fuzzing on a GBA
 35 |     
 36 |     if (x >> 8 == 0xFFFFFF) {
 37 |         unsigned int masked_x = x & 0xFF;
 38 |         if (masked_x >= 0xC0) return false;
 39 |         return (masked_x & 0x55) != 0;
 40 |     }
 41 | 
 42 |     if (x >> 16 == 0xFFFF) {
 43 |         unsigned int masked_x = x & 0xFFFF;
 44 |         if (masked_x >= 0xC000) return false;
 45 |         return (masked_x & 0x5555) != 0;
 46 |     }
 47 | 
 48 |     if (x >> 24 == 0xFF) {
 49 |         unsigned int masked_x = x & 0xFFFFFF;
 50 |         if (masked_x >= 0xC00000) return false;
 51 |         return (masked_x & 0x555555) != 0;
 52 |     }
 53 | 
 54 |     else {
 55 |         return (x >> 30) == 2;
 56 |     }
 57 | 
 58 | }
 59 | 
 60 | int main() {
 61 |     srand(time(NULL));
 62 | 
 63 |     printf("Running mul carry regression tests...\n");
 64 |     for (int i = 0; i < 256; i++) {
 65 |         unsigned int multiplicand = 0;
 66 |         unsigned int multiplier = 0xFFFFFFFF - i;
 67 | 
 68 |         unsigned int guess = mul(multiplicand, multiplier).carry;
 69 |         unsigned int actual = guess_mul_zero(multiplier);
 70 | 
 71 |         if (guess != actual) {
 72 |             printf("[MUL CARRY REGRESSION] Failed: %llx * %llx = %llx, got %llx\n", multiplicand, multiplier, actual, guess);
 73 |             return 1;
 74 |         } else {
 75 |             printf("[MUL CARRY REGRESSION] Passed: %llx * %llx = %llx, got %llx\n", multiplicand, multiplier, actual, guess);
 76 |         }
 77 |     }
 78 | 
 79 |     printf("Running mul regression tests...\n");
 80 |     for (int i = 0; i < 10000; i++) {
 81 |         unsigned int multiplicand = get_rand_32();
 82 |         unsigned int multiplier = get_rand_32();
 83 | 
 84 |         unsigned int guess = mul(multiplicand, multiplier).output;
 85 |         unsigned int actual = (unsigned int) multiplicand * (unsigned int) multiplier;
 86 | 
 87 |         if (guess != actual) {
 88 |             printf("[MUL REGRESSION] Failed: %llx * %llx = %llx, got %llx\n", multiplicand, multiplier, actual, guess);
 89 |             return 1;
 90 |         } else {
 91 |             printf("[MUL REGRESSION] Passed: %llx * %llx = %llx, got %llx\n", multiplicand, multiplier, actual, guess);
 92 |         }
 93 |     }
 94 | 
 95 |     printf("Running mla regression tests...\n");
 96 |     for (int i = 0; i < 10000; i++) {
 97 |         unsigned int multiplicand = get_rand_32();
 98 |         unsigned int multiplier = get_rand_32();
 99 |         unsigned int accumulate = get_rand_32();
100 | 
101 |         unsigned int guess = mla(multiplicand, multiplier, accumulate).output;
102 |         unsigned int actual = (unsigned int) multiplicand * (unsigned int) multiplier + (unsigned int) accumulate;
103 | 
104 |         if (guess != actual) {
105 |             printf("[MLA REGRESSION] Failed: %llx * %llx + %llx = %llx, got %llx\n", multiplicand, multiplier, accumulate, actual, guess);
106 |             return 1;
107 |         }
108 |     }
109 | 
110 |     printf("Running umull regression tests...\n");
111 |     for (int i = 0; i < 10000; i++) {
112 |         unsigned int multiplicand = get_rand_32();
113 |         unsigned int multiplier = get_rand_32();
114 |         unsigned int accumulate = get_rand_32();
115 | 
116 |         unsigned long long guess = umull(multiplicand, multiplier).output;
117 |         unsigned long long actual = (unsigned long long) multiplicand * (unsigned long long) multiplier;
118 | 
119 |         if (guess != actual) {
120 |             printf("[UMULL REGRESSION] Failed #%d: %llx * %llx = %llx, got %llx\n", i, multiplicand, multiplier, actual, guess);
121 |             return 1;
122 |         } else {
123 |             printf("[UMULL REGRESSION] Passed #%d: %llx * %llx = %llx, got %llx\n", i, multiplicand, multiplier, actual, guess);
124 |         }
125 |     }
126 | 
127 |     printf("Running umlal regression tests...\n");
128 |     for (int i = 0; i < 10000; i++) {
129 |         unsigned int multiplicand = get_rand_32();
130 |         unsigned int multiplier = get_rand_32();
131 |         unsigned int accumulate = get_rand_32();
132 |         unsigned int accumulate2 = get_rand_32();
133 | 
134 |         unsigned long long guess = umlal(accumulate, accumulate2, multiplicand, multiplier).output;
135 |         unsigned long long actual_acc = (unsigned long long) accumulate + ((unsigned long long) accumulate2 << 32);
136 |         unsigned long long actual =
137 |             (unsigned long long) multiplicand * (unsigned long long) multiplier + actual_acc;
138 | 
139 |         if (guess != actual) {
140 |             printf("[UMLAL REGRESSION] Failed: %llx * %llx + %llx = %llx, got %llx\n", multiplicand, multiplier, actual_acc, actual, guess);
141 |             return 1;
142 |         } else {
143 |             printf("[UMLAL REGRESSION] Passed: %llx * %llx + %llx = %llx, got %llx\n", multiplicand, multiplier, actual_acc, actual, guess);
144 |         }
145 |     }
146 | 
147 |     printf("Running smull regression tests...\n");
148 |     for (int i = 0; i < 10000; i++) {
149 |         unsigned int multiplicand = get_rand_32();
150 |         unsigned int multiplier = get_rand_32();
151 | 
152 |         long long guess = smull(multiplicand, multiplier).output;
153 |         long long actual = (long long) (int) multiplicand * (long long) (int) multiplier;
154 | 
155 |         if (guess != actual) {
156 |             printf("[SMULL REGRESSION] Failed: %llx * %llx = %llx, got %llx\n", multiplicand, multiplier, actual, guess);
157 |             return 1;
158 |         } else {
159 |             printf("[SMULL REGRESSION] Passed: %llx * %llx = %llx, got %llx\n", multiplicand, multiplier, actual, guess);
160 |         }
161 |     }
162 | 
163 |     printf("Running smlal regression tests...\n");
164 |     for (int i = 0; i < 10000; i++) {
165 |         unsigned int multiplicand = get_rand_32();
166 |         unsigned int multiplier = get_rand_32();
167 |         unsigned int accumulate = get_rand_32();
168 |         unsigned int accumulate2 = get_rand_32();
169 | 
170 |         long long guess = smlal(accumulate, accumulate2, multiplicand, multiplier).output;
171 |         long long actual_acc = (long long) accumulate + ((long long) accumulate2 << 32);
172 |         long long actual = (long long) (int) multiplicand * (long long) (int) multiplier + actual_acc;
173 | 
174 |         if (guess != actual) {
175 |             printf("[SMLAL REGRESSION] Failed: %llx * %llx + %llx = %llx, got %llx\n", multiplicand, multiplier, actual_acc, actual, guess);
176 |             return 1;
177 |         } else {
178 |             printf("[SMLAL REGRESSION] Passed: %llx * %llx + %llx = %llx, got %llx\n", multiplicand, multiplier, actual_acc, actual, guess);
179 |         }
180 |     }
181 | 
182 |     printf("All tests passed!\n");
183 | }


--------------------------------------------------------------------------------
/impl.h:
--------------------------------------------------------------------------------
  1 | #include "bitwise.h"
  2 | #include <stdbool.h>
  3 | #include <stdint.h>
  4 | 
  5 | // dont define PC_BUILD if you want to use this file in a GBA rom.
  6 | #ifdef PC_BUILD
  7 | #include <stdio.h>
  8 | #define printf(...) printf(__VA_ARGS__)
  9 | #else
 10 | #define printf(...)
 11 | #endif
 12 | 
 13 | // realistically this can only be a 3-bit value.
 14 | typedef u8 BoothChunk;
 15 | 
 16 | struct BoothRecodingOutput {
 17 |     u64  recoded_output;
 18 |     bool carry;
 19 | };
 20 | 
 21 | struct RecodedMultiplicands {
 22 |     struct BoothRecodingOutput m[4];
 23 | };
 24 | 
 25 | struct BoothRecodingOutput booth_recode(u64 input, BoothChunk booth_chunk) {
 26 |     struct BoothRecodingOutput output;
 27 |     switch (booth_chunk) {
 28 |         case 0: output = (struct BoothRecodingOutput) {            0, 0 }; break;
 29 |         case 1: output = (struct BoothRecodingOutput) {        input, 0 }; break;
 30 |         case 2: output = (struct BoothRecodingOutput) {        input, 0 }; break;
 31 |         case 3: output = (struct BoothRecodingOutput) {    2 * input, 0 }; break;
 32 |         case 4: output = (struct BoothRecodingOutput) { ~(2 * input), 1 }; break;
 33 |         case 5: output = (struct BoothRecodingOutput) {       ~input, 1 }; break;
 34 |         case 6: output = (struct BoothRecodingOutput) {       ~input, 1 }; break;
 35 |         case 7: output = (struct BoothRecodingOutput) {            0, 0 }; break;
 36 |     }
 37 | 
 38 |     output.recoded_output &= 0x3FFFFFFFFULL;
 39 |     return output;
 40 | }
 41 | 
 42 | struct CSAOutput {
 43 |     u64 output;
 44 |     u64 carry;
 45 | };
 46 | 
 47 | struct CSAOutput perform_csa(u64 a, u64 b, u64 c) {
 48 |     u64 output = a ^ b ^ c;
 49 |     u64 carry  = (a & b) | (b & c) | (c & a);
 50 |     return (struct CSAOutput) { output, carry };
 51 | }
 52 | 
 53 | // contains the current high 31 bits of the acc. this is shifted by 2 after each CSA.
 54 | u64 acc_shift_register = 0;
 55 | 
 56 | struct CSAOutput perform_csa_array(u64 partial_sum, u64 partial_carry, struct RecodedMultiplicands addends) {
 57 |     struct CSAOutput csa_output = { partial_sum, partial_carry };
 58 |     struct CSAOutput final_csa_output = { 0, 0 };
 59 | 
 60 |     for (int i = 0; i < 4; i++) {
 61 |         csa_output.output &= 0x1FFFFFFFFULL;
 62 |         csa_output.carry  &= 0x1FFFFFFFFULL;
 63 | 
 64 |         struct CSAOutput result = perform_csa(csa_output.output, addends.m[i].recoded_output & 0x1FFFFFFFFULL, csa_output.carry);
 65 | 
 66 |         // Inject the carry caused by booth recoding
 67 |         result.carry <<= 1;
 68 |         result.carry |= addends.m[i].carry;
 69 | 
 70 |         // Take the bottom two bits and inject them into the final output.
 71 |         // The value of the bottom two bits will not be changed by future
 72 |         // addends, because those addends must be at least 4 times as big
 73 |         // as the current addend. By directly injecting these two bits, the
 74 |         // hardware saves some space on the chip.
 75 |         final_csa_output.output |= (result.output & 3) << (2 * i);
 76 |         final_csa_output.carry  |= (result.carry  & 3) << (2 * i);
 77 |         
 78 |         // The next CSA will only operate on the upper bits - as explained
 79 |         // in the previous comment.
 80 |         result.output >>= 2;
 81 |         result.carry  >>= 2;
 82 | 
 83 |         // Perform the magic described in the tables for the handling of TransH
 84 |         // and High. acc_shift_register contains the upper 31 bits of the acc
 85 |         // in its lower bits.
 86 |         u64 magic = bit(acc_shift_register, 0) + !bit(csa_output.carry, 32) + !bit(addends.m[i].recoded_output, 33);
 87 |         result.output |= magic << 31;
 88 |         result.carry |= (u64) !bit(acc_shift_register, 1) << 32;        
 89 |         acc_shift_register >>= 2;
 90 | 
 91 |         csa_output = result;
 92 |     }
 93 | 
 94 |     final_csa_output.output |= csa_output.output << 8;
 95 |     final_csa_output.carry  |= csa_output.carry  << 8;
 96 | 
 97 |     return final_csa_output;
 98 | }
 99 | 
100 | struct RecodedMultiplicands get_recoded_multiplicands(u64 multiplicand, u64 multiplier) {
101 |     struct RecodedMultiplicands recoded_multiplicands;
102 | 
103 |     for (int i = 0; i < 4; i++) {
104 |         recoded_multiplicands.m[i] = booth_recode(multiplicand, (multiplier >> (2 * i)) & 0b111);
105 |     }
106 | 
107 |     return recoded_multiplicands;
108 | }
109 | 
110 | struct CSAOutput perform_one_cycle_of_booths_mutliplication(struct CSAOutput previous_output, u64 multiplicand, u64 multiplier) {
111 |     struct RecodedMultiplicands recoded_multiplicands = get_recoded_multiplicands(multiplicand, multiplier);
112 |     return perform_csa_array(previous_output.output, previous_output.carry, recoded_multiplicands);
113 | }
114 | 
115 | enum MultiplicationFlavor {
116 |     SHORT,
117 |     LONG_SIGNED,
118 |     LONG_UNSIGNED,
119 | };
120 | 
121 | bool is_long(enum MultiplicationFlavor flavor) {
122 |     return flavor == LONG_SIGNED || flavor == LONG_UNSIGNED;
123 | }
124 | 
125 | bool is_signed(enum MultiplicationFlavor flavor) {
126 |     return flavor == LONG_SIGNED || flavor == SHORT;
127 | }
128 | 
129 | bool should_terminate(u64 multiplier, enum MultiplicationFlavor flavor) {
130 |     if (is_signed(flavor)) {
131 |         return multiplier == 0x1FFFFFFFF || multiplier == 0;
132 |     } else {
133 |         return multiplier == 0;
134 |     }
135 | }
136 | 
137 | struct AdderOutput {
138 |     u32 output;
139 |     bool carry;
140 | };
141 | 
142 | struct AdderOutput adder(u32 a, u32 b, bool carry) {
143 |     u32 output = a + b + carry;
144 |     u64 real_output = (u64) a + (u64) b + (u64) carry;
145 |     return (struct AdderOutput) { output, output != real_output };
146 | }
147 | 
148 | struct MultiplicationOutput {
149 |     u64 output;
150 |     bool carry;
151 | };
152 | 
153 | struct u128 {
154 |     u64 lo;
155 |     u64 hi;
156 | };
157 | 
158 | struct u128 u128_ror(struct u128 input, int shift) {
159 |     return (struct u128) {
160 |         (input.lo >> shift) | (input.hi << (64 - shift)),
161 |         (input.hi >> shift) | (input.lo << (64 - shift)),
162 |     };
163 | }
164 | 
165 | struct MultiplicationOutput booths_multiplication(enum MultiplicationFlavor flavor, u64 multiplicand, u64 multiplier, u64 accumulator) {
166 |     struct CSAOutput csa_output = { 0, 0 };
167 | 
168 |     bool alu_carry_in = multiplier & 1;
169 | 
170 |     if (is_signed(flavor)) {
171 |         multiplier = sign_extend(multiplier, 32, 34);
172 |     } else {
173 |         multiplier = multiplier & 0x1FFFFFFFFull;
174 |     }
175 | 
176 |     if (is_signed(flavor)) {
177 |         multiplicand = sign_extend(multiplicand, 32, 34);
178 |     } else {
179 |         multiplicand = multiplicand & 0x1FFFFFFFFull;
180 |     }
181 | 
182 |     csa_output.carry = (multiplier & 1) ? ~(multiplicand) : 0;
183 |     csa_output.output = accumulator;
184 |     acc_shift_register = accumulator >> 34;
185 | 
186 |     struct u128 partial_sum   = { 0, 0 };
187 |     struct u128 partial_carry = { 0, 0 };
188 |     partial_sum.lo   = csa_output.output & 1;
189 |     partial_carry.lo = csa_output.carry  & 1;
190 | 
191 |     csa_output.output >>= 1;
192 |     csa_output.carry >>= 1;
193 |     partial_sum   = u128_ror(partial_sum, 1);
194 |     partial_carry = u128_ror(partial_carry, 1);
195 | 
196 |     int num_iterations = 0;
197 |     do {
198 |         csa_output = perform_one_cycle_of_booths_mutliplication(csa_output, multiplicand, multiplier);
199 | 
200 |         partial_sum.lo   |= csa_output.output & 0xFF;
201 |         partial_carry.lo |= csa_output.carry  & 0xFF;
202 | 
203 |         csa_output.output >>= 8;
204 |         csa_output.carry >>= 8;
205 | 
206 |         partial_sum = u128_ror(partial_sum, 8);
207 |         partial_carry = u128_ror(partial_carry, 8);
208 | 
209 |         multiplier = asr(multiplier, 8, 33);
210 |         num_iterations++;
211 |     } while (!should_terminate(multiplier, flavor));
212 |     partial_sum.lo |= csa_output.output;
213 |     partial_carry.lo |= csa_output.carry;
214 | 
215 |     // we have ror'd partial_sum and partial_carry by 8 * num_iterations + 1
216 |     // we now need to ror backwards, i tried my best to mimic the table, but
217 |     // i'm off by one for whatever reason.
218 |     int correction_ror;
219 |     if (num_iterations == 1) correction_ror = 23;
220 |     if (num_iterations == 2) correction_ror = 15;
221 |     if (num_iterations == 3) correction_ror = 7;
222 |     if (num_iterations == 4) correction_ror = 31;
223 | 
224 |     partial_sum   = u128_ror(partial_sum, correction_ror);
225 |     partial_carry = u128_ror(partial_carry, correction_ror);
226 | 
227 |     if (is_long(flavor)) {
228 |         if (num_iterations == 4) {
229 |             struct AdderOutput adder_output_lo = 
230 |                 adder(partial_sum.hi, partial_carry.hi, alu_carry_in);
231 |             struct AdderOutput adder_output_hi = 
232 |                 adder(partial_sum.hi >> 32, partial_carry.hi >> 32, 
233 |                     adder_output_lo.carry);
234 | 
235 |             return (struct MultiplicationOutput) {
236 |                 ((u64) adder_output_hi.output << 32) | adder_output_lo.output,
237 |                 (partial_carry.hi >> 63) & 1
238 |             };
239 |         } else {
240 |             struct AdderOutput adder_output_lo = 
241 |                 adder(partial_sum.hi >> 32, partial_carry.hi >> 32, alu_carry_in);
242 | 
243 |             int shift_amount = 1 + 8 * num_iterations;
244 | 
245 |             // why this is needed is unknown, but the multiplication doesn't work
246 |             // without it
247 |             shift_amount++;
248 | 
249 |             partial_carry.lo = sign_extend(partial_carry.lo, shift_amount, 64);
250 |             partial_sum.lo |= acc_shift_register << (shift_amount);
251 | 
252 |             struct AdderOutput adder_output_hi = 
253 |                 adder(partial_sum.lo, partial_carry.lo, adder_output_lo.carry);
254 |             return (struct MultiplicationOutput) { 
255 |                 ((u64) adder_output_hi.output << 32) | adder_output_lo.output,
256 |                 (partial_carry.hi >> 63) & 1
257 |             };
258 |         }
259 |     } else {
260 |         if (num_iterations == 4) {
261 |             struct AdderOutput adder_output = 
262 |                 adder(partial_sum.hi, partial_carry.hi, alu_carry_in);
263 |             return (struct MultiplicationOutput) { 
264 |                 adder_output.output,
265 |                 (partial_carry.hi >> 31) & 1
266 |             };
267 |         } else {
268 |             struct AdderOutput adder_output = 
269 |                 adder(partial_sum.hi >> 32, partial_carry.hi >> 32, alu_carry_in);
270 |             return (struct MultiplicationOutput) { 
271 |                 adder_output.output,
272 |                 (partial_carry.hi >> 63) & 1
273 |             };
274 |         }
275 |     }
276 | }
277 | 
278 | struct MultiplicationOutput mul(u32 rm, u32 rs) {
279 |     return booths_multiplication(SHORT, rm, rs, 0);
280 | }
281 | 
282 | struct MultiplicationOutput mla(u32 rm, u32 rs, u32 rn) {
283 |     return booths_multiplication(SHORT, rm, rs, rn);
284 | }
285 | 
286 | struct MultiplicationOutput umull(u32 rm, u32 rs) {
287 |     return booths_multiplication(LONG_UNSIGNED, rm, rs, 0);
288 | }
289 | 
290 | struct MultiplicationOutput umlal(u32 rdlo, u32 rdhi, u32 rm, u32 rs) {
291 |     return booths_multiplication(LONG_UNSIGNED, rm, rs, ((u64) rdhi << 32) | (u64) rdlo);
292 | }
293 | 
294 | struct MultiplicationOutput smull(u32 rm, u32 rs) {
295 |     return booths_multiplication(LONG_SIGNED, rm, rs, 0);
296 | }
297 | 
298 | struct MultiplicationOutput smlal(u32 rdlo, u32 rdhi, u32 rm, u32 rs) {
299 |     return booths_multiplication(LONG_SIGNED, rm, rs, (u64) rdhi << 32 | (u64) rdlo);
300 | }


--------------------------------------------------------------------------------