=Norig) twidx-=Norig;
239 | C_MUL(t,scratch[q] , twiddles[twidx] );
240 | C_ADDTO( Fout[ k ] ,t);
241 | }
242 | k += m;
243 | }
244 | }
245 | KISS_FFT_TMP_FREE(scratch);
246 | }
247 |
248 | static
249 | void kf_work(
250 | kiss_fft_cpx * Fout,
251 | const kiss_fft_cpx * f,
252 | const size_t fstride,
253 | int in_stride,
254 | int * factors,
255 | const kiss_fft_cfg st
256 | )
257 | {
258 | kiss_fft_cpx * Fout_beg=Fout;
259 | const int p=*factors++; /* the radix */
260 | const int m=*factors++; /* stage's fft length/p */
261 | const kiss_fft_cpx * Fout_end = Fout + p*m;
262 |
263 | #ifdef _OPENMP
264 | // use openmp extensions at the
265 | // top-level (not recursive)
266 | if (fstride==1 && p<=5)
267 | {
268 | int k;
269 |
270 | // execute the p different work units in different threads
271 | # pragma omp parallel for
272 | for (k=0;k floor_sqrt)
335 | p = n; /* no more factors, skip to end */
336 | }
337 | n /= p;
338 | *facbuf++ = p;
339 | *facbuf++ = n;
340 | } while (n > 1);
341 | }
342 |
343 | /*
344 | *
345 | * User-callable function to allocate all necessary storage space for the fft.
346 | *
347 | * The return value is a contiguous block of memory, allocated with malloc. As such,
348 | * It can be freed with free(), rather than a kiss_fft-specific function.
349 | * */
350 | kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
351 | {
352 | kiss_fft_cfg st=NULL;
353 | size_t memneeded = sizeof(struct kiss_fft_state)
354 | + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
355 |
356 | if ( lenmem==NULL ) {
357 | st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
358 | }else{
359 | if (mem != NULL && *lenmem >= memneeded)
360 | st = (kiss_fft_cfg)mem;
361 | *lenmem = memneeded;
362 | }
363 | if (st) {
364 | int i;
365 | st->nfft=nfft;
366 | st->inverse = inverse_fft;
367 |
368 | for (i=0;iinverse)
372 | phase *= -1;
373 | kf_cexp(st->twiddles+i, phase );
374 | }
375 |
376 | kf_factor(nfft,st->factors);
377 | }
378 | return st;
379 | }
380 |
381 |
382 | void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
383 | {
384 | if (fin == fout) {
385 | //NOTE: this is not really an in-place FFT algorithm.
386 | //It just performs an out-of-place FFT into a temp buffer
387 | kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
388 | kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
389 | memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
390 | KISS_FFT_TMP_FREE(tmpbuf);
391 | }else{
392 | kf_work( fout, fin, 1,in_stride, st->factors,st );
393 | }
394 | }
395 |
396 | void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
397 | {
398 | kiss_fft_stride(cfg,fin,fout,1);
399 | }
400 |
401 |
402 | void kiss_fft_cleanup(void)
403 | {
404 | // nothing needed any more
405 | }
406 |
407 | int kiss_fft_next_fast_size(int n)
408 | {
409 | while(1) {
410 | int m=n;
411 | while ( (m%2) == 0 ) m/=2;
412 | while ( (m%3) == 0 ) m/=3;
413 | while ( (m%5) == 0 ) m/=5;
414 | if (m<=1)
415 | break; /* n is completely factorable by twos, threes, and fives */
416 | n++;
417 | }
418 | return n;
419 | }
420 |
--------------------------------------------------------------------------------
/kiss_fft.h:
--------------------------------------------------------------------------------
1 | #ifndef KISS_FFT_H
2 | #define KISS_FFT_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 |
13 | /*
14 | ATTENTION!
15 | If you would like a :
16 | -- a utility that will handle the caching of fft objects
17 | -- real-only (no imaginary time component ) FFT
18 | -- a multi-dimensional FFT
19 | -- a command-line utility to perform ffts
20 | -- a command-line utility to perform fast-convolution filtering
21 |
22 | Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
23 | in the tools/ directory.
24 | */
25 |
26 | #ifdef USE_SIMD
27 | # include
28 | # define kiss_fft_scalar __m128
29 | #define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
30 | #define KISS_FFT_FREE _mm_free
31 | #else
32 | #define KISS_FFT_MALLOC malloc
33 | #define KISS_FFT_FREE free
34 | #endif
35 |
36 |
37 | #ifdef FIXED_POINT
38 | #include
39 | # if (FIXED_POINT == 32)
40 | # define kiss_fft_scalar int32_t
41 | # else
42 | # define kiss_fft_scalar int16_t
43 | # endif
44 | #else
45 | # ifndef kiss_fft_scalar
46 | /* default is float */
47 | # define kiss_fft_scalar float
48 | # endif
49 | #endif
50 |
51 | typedef struct {
52 | kiss_fft_scalar r;
53 | kiss_fft_scalar i;
54 | }kiss_fft_cpx;
55 |
56 | typedef struct kiss_fft_state* kiss_fft_cfg;
57 |
58 | /*
59 | * kiss_fft_alloc
60 | *
61 | * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
62 | *
63 | * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
64 | *
65 | * The return value from fft_alloc is a cfg buffer used internally
66 | * by the fft routine or NULL.
67 | *
68 | * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
69 | * The returned value should be free()d when done to avoid memory leaks.
70 | *
71 | * The state can be placed in a user supplied buffer 'mem':
72 | * If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
73 | * then the function places the cfg in mem and the size used in *lenmem
74 | * and returns mem.
75 | *
76 | * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
77 | * then the function returns NULL and places the minimum cfg
78 | * buffer size in *lenmem.
79 | * */
80 |
81 | kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
82 |
83 | /*
84 | * kiss_fft(cfg,in_out_buf)
85 | *
86 | * Perform an FFT on a complex input buffer.
87 | * for a forward FFT,
88 | * fin should be f[0] , f[1] , ... ,f[nfft-1]
89 | * fout will be F[0] , F[1] , ... ,F[nfft-1]
90 | * Note that each element is complex and can be accessed like
91 | f[k].r and f[k].i
92 | * */
93 | void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
94 |
95 | /*
96 | A more generic version of the above function. It reads its input from every Nth sample.
97 | * */
98 | void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
99 |
100 | /* If kiss_fft_alloc allocated a buffer, it is one contiguous
101 | buffer and can be simply free()d when no longer needed*/
102 | #define kiss_fft_free free
103 |
104 | /*
105 | Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
106 | your compiler output to call this before you exit.
107 | */
108 | void kiss_fft_cleanup(void);
109 |
110 |
111 | /*
112 | * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
113 | */
114 | int kiss_fft_next_fast_size(int n);
115 |
116 | /* for real ffts, we need an even size */
117 | #define kiss_fftr_next_fast_size_real(n) \
118 | (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
119 |
120 | #ifdef __cplusplus
121 | }
122 | #endif
123 |
124 | #endif
125 |
--------------------------------------------------------------------------------
/kiss_fft_bfly2_neon.S:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012, Code Aurora Forum. All rights reserved.
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions are
6 | * met:
7 | * * Redistributions of source code must retain the above copyright
8 | * notice, this list of conditions and the following disclaimer.
9 | * * Redistributions in binary form must reproduce the above
10 | * copyright notice, this list of conditions and the following
11 | * disclaimer in the documentation and/or other materials provided
12 | * with the distribution.
13 | * * Neither the name of Code Aurora Forum, Inc. nor the names of its
14 | * contributors may be used to endorse or promote products derived
15 | * from this software without specific prior written permission.
16 | *
17 | * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
21 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 | * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | */
29 |
30 | @ NEON optimized assembly routine of kf_bfly2()
31 |
32 | .text
33 | .fpu neon
34 | .align 4
35 | .global kf_bfly2
36 | .func kf_bfly2
37 |
38 | kf_bfly2:
39 | stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
40 | @ vstmdb sp!, {d8-d15}
41 | @ r0 - Fout| r1 - fstride | r2 - st | r3 - m
42 | pld [r0, #0]
43 | mov r8, r3, asl #3 @ convert m into bytes count (m*8)
44 | add r5, r0, r8 @ Fout2 = Fout + m;
45 | add r6, r2, #264 @ tw1 = st->twiddles
46 | pld [r6, #0]
47 | mov r1, r1, asl #3 @ convert fstride into bytes count (fstride*8)
48 | @ float32x4x2_t *Fout; q0, q1 (d0-d3)
49 | @ float32x4x2_t tmp; q2, q3 (d4-d7)
50 | @ float32x4x2_t *Fout2; q10, q11 (d20-d23)
51 | @ float32x4x2_t *tw1; q12, q13 (d24-d27)
52 | @ float32x4x2_t t; q8, q9 (d16-d19)
53 | asrs r4, r3, #2 @ size_t k=m/4;
54 | beq .bfly2_do_while1
55 | mov r7, r1, asl #2 @ convert fstride into bytes count (fstride*8*4 /*4 samples*/)
56 |
57 | .bfly2_do_while4: @ do { //process 4 samples per iteration
58 | vld2.32 {d20-d23}, [r5] @ load *Fout2;
59 | vld2.32 {d16-d19}, [r6], r7 @ load *tw1; tw1 += (fstride*4);
60 | pld [r6, #0] @ preload next tw1
61 | vmul.f32 q2, q10, q12 @ C_MUL (t, *Fout2 , *tw1);
62 | vmul.f32 q3, q11, q13
63 | vsub.f32 q8, q2, q3
64 | vmul.f32 q2, q10, q13
65 | vmul.f32 q3, q11, q12
66 | vadd.f32 q9, q2, q3
67 |
68 | vld2.32 {d0-d3}, [r0] @ load *Fout;
69 | vsub.f32 q10, q0, q8 @ C_SUB( *Fout2 , *Fout , t );
70 | vsub.f32 q11, q1, q9
71 | vst2.32 {d20-d23}, [r5]! @ store *Fout2; Fout2+=4
72 | pld [r5, #0] @ preload next Fout2
73 |
74 | vadd.f32 q0, q0, q8 @ C_ADDTO( *Fout , t );
75 | vadd.f32 q1, q1, q9
76 | vst2.32 {d0-d3}, [r0]! @ store *Fout; Fout+=4
77 | pld [r0, #0] @ preload next Fout
78 |
79 | subs r4, r4, #1 @ }while(--k);
80 | bne .bfly2_do_while4
81 |
82 | @.kf_bfly2_process_remaining:
83 | asr r8, r3, #31
84 | lsr r7, r8, #30
85 | add r4, r7, r3
86 | ands r3, r4, #3 @ if (k % 4 == 0)
87 | beq .kf_bfly2_done
88 | @ float32x4x2_t *Fout; d0 {s0,s1}
89 | @ float32x4x2_t tmp; d1 {s2,s3}
90 | @ float32x4x2_t *Fout2; d2 {s4,s5}
91 | @ float32x4x2_t *tw1; d3 {s6,s7}
92 | @ float32x4x2_t t; d4 {s8,s9}
93 |
94 |
95 | .bfly2_do_while1: @ do { //process 1 sample per iteration
96 | vld1.32 {d2}, [r5] @ load *Fout2;{s16,s17}
97 | vld1.32 {d3}, [r6], r1 @ load *tw1; tw1 += (fstride);{s24,s25}
98 | pld [r6, #0] @ preload next tw1
99 | vmul.f32 d1, d2, d3 @ @ C_MUL (t, *Fout2 , *tw1);
100 | vsub.f32 s8, s2, s3
101 | vmul.f32 s2, s4, s7
102 | vmul.f32 s3, s5, s6
103 | vadd.f32 s9, s2, s3
104 |
105 | vld1.32 {d0}, [r0] @ load *Fout;
106 | vsub.f32 d5, d0, d4 @ C_SUB( *Fout2 , *Fout , t );
107 | vst1.32 {d5}, [r5]! @ store *Fout2; ++Fout2
108 | pld [r5, #0] @ preload next Fout2
109 |
110 | vadd.f32 d0, d0, d4 @ C_ADDTO( *Fout , t );
111 | vst1.32 {d0}, [r0]! @ store *Fout; ++Fout
112 | pld [r0, #0] @ preload next Fout
113 |
114 | subs r3, r3, #1 @ }while(--k);
115 | bne .bfly2_do_while1
116 |
117 | .kf_bfly2_done:
118 | @ vldmia sp!, {d8-d15}
119 | ldmia sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
120 | nop
121 |
122 | .endfunc
123 | .end
124 |
--------------------------------------------------------------------------------
/kiss_fft_bfly4_neon.S:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012, Code Aurora Forum. All rights reserved.
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions are
6 | * met:
7 | * * Redistributions of source code must retain the above copyright
8 | * notice, this list of conditions and the following disclaimer.
9 | * * Redistributions in binary form must reproduce the above
10 | * copyright notice, this list of conditions and the following
11 | * disclaimer in the documentation and/or other materials provided
12 | * with the distribution.
13 | * * Neither the name of Code Aurora Forum, Inc. nor the names of its
14 | * contributors may be used to endorse or promote products derived
15 | * from this software without specific prior written permission.
16 | *
17 | * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
21 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 | * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | */
29 |
30 | @ NEON optimized assembly routine of kf_bfly4()
31 |
32 | .text
33 | .fpu neon
34 | .align 4
35 | .global kf_bfly4
36 | .func kf_bfly4
37 |
38 | kf_bfly4:
39 | stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
40 | @ vstmdb sp!, {d8-d15}
41 | @ r0 - Fout| r1 - fstride | r2 - st | r3 - m
42 | pld [r0, #0]
43 | mov r5, r3
44 | mov r3, r3, asl #3 @ convert m into bytes count (m*8)
45 | add r6, r2, #264 @ tw1 = st->twiddles
46 | pld [r6, #0]
47 | mov r7, r6 @ tw2 = st->twiddles
48 | mov r8, r7 @ tw3 = st->twiddles
49 | ldr r2, [r2, #4] @ st->inverse
50 | mov r1, r1, asl #3 @ convert fstride into bytes count (fstride*8)
51 | mov r9, r1, asl #1 @ fstride*2
52 | add r10, r1, r9 @ fstride*3
53 | @ float32x4x2_t rfout; q0, q1 (d0-d3)
54 | @ float32x4x2_t tmp; q2, q3 (d4-d7)
55 | @ float32x4x2_t scratch0; q12, q13 (d24-d27)
56 | @ float32x4x2_t scratch1; q14, q15 (d28-d31)
57 | @ float32x4x2_t scratch2; q8, q9 (d16-d19)
58 | @ float32x4x2_t scratch3; q10, q11 (d20-d23)
59 | asrs r4, r5, #2 @ size_t k=m/4;
60 | beq .kf_bfly4_do_while1 @ if(k==0)
61 |
62 | .kf_bfly4_do_while4: @ do { //process 4 samples per iteration
63 | add r11, r0, r3 @ fom = Fout+m;
64 | mov r12, r11
65 | pld [r7, #0]
66 | vld1.32 {d20}, [r6], r1 @ rtwd1 = vld1_f32((const float32_t*)tw1); tw1 += fstride;
67 | vld1.32 {d21}, [r6], r1 @ rtwd2 = vld1_f32((const float32_t*)tw1); tw1 += fstride;
68 | vld1.32 {d22}, [r6], r1 @ rtwd3 = vld1_f32((const float32_t*)tw1); tw1 += fstride;
69 | vld1.32 {d23}, [r6], r1 @ rtwd4 = vld1_f32((const float32_t*)tw1); tw1 += fstride;
70 | vuzp.32 q10, q11 @ scratch3 = vuzpq_f32(vcombine_f32(rtwd1, rtwd2), vcombine_f32(rtwd3, rtwd4));
71 | vld2.32 {d0-d3}, [r11], r3 @ rfout = vld2q_f32((const float32_t*)(fom1)); fom2 = Fout+m2;
72 | vmul.f32 q2, q0, q10 @ C_MUL_NEON(scratch0, rfout, scratch3);
73 | vmul.f32 q3, q1, q11
74 | vsub.f32 q12, q2, q3
75 | vmul.f32 q2, q0, q11
76 | vmul.f32 q3, q1, q10
77 | vadd.f32 q13, q2, q3
78 |
79 | pld [r8, #0]
80 | vld1.32 {d20}, [r7], r9 @ rtwd1 = vld1_f32((const float32_t*)tw2); tw2 += fstride*2;
81 | vld1.32 {d21}, [r7], r9 @ rtwd2 = vld1_f32((const float32_t*)tw2); tw2 += fstride*2;
82 | vld1.32 {d22}, [r7], r9 @ rtwd3 = vld1_f32((const float32_t*)tw2); tw2 += fstride*2;
83 | vld1.32 {d23}, [r7], r9 @ trtwd4 = vld1_f32((const float32_t*)tw2); tw2 += fstride*2;
84 | vuzp.32 q10, q11 @ scratch3 = vuzpq_f32(vcombine_f32(rtwd1, rtwd2), vcombine_f32(rtwd3, rtwd4));
85 | vld2.32 {d0-d3}, [r11], r3 @ rfout = vld2q_f32((const float32_t*)(fom2)); fom3 = Fout+m3;
86 | vmul.f32 q2, q0, q10 @ C_MUL_NEON(scratch1, rfout, scratch3);
87 | vmul.f32 q3, q1, q11
88 | vsub.f32 q14, q2, q3
89 | vmul.f32 q2, q0, q11
90 | vmul.f32 q3, q1, q10
91 | vadd.f32 q15, q2, q3
92 |
93 | pld [r0, #0]
94 | vld1.32 {d20}, [r8], r10 @ rtwd1 = vld1_f32((const float32_t*)tw3); tw3 += fstride*3;
95 | vld1.32 {d21}, [r8], r10 @ rtwd2 = vld1_f32((const float32_t*)tw3); tw3 += fstride*3;
96 | vld1.32 {d22}, [r8], r10 @ rtwd3 = vld1_f32((const float32_t*)tw3); tw3 += fstride*3;
97 | vld1.32 {d23}, [r8], r10 @ rtwd4 = vld1_f32((const float32_t*)tw3); tw3 += fstride*3;
98 | vuzp.32 q10, q11 @ scratch3 = vuzpq_f32(vcombine_f32(rtwd1, rtwd2), vcombine_f32(rtwd3, rtwd4));
99 | vld2.32 {d0-d3}, [r11] @ rfout = vld2q_f32((const float32_t*)(fom3));
100 | vmul.f32 q2, q0, q10 @ C_MUL_NEON(scratch2, rfout, scratch3);
101 | vmul.f32 q3, q1, q11
102 | vsub.f32 q8, q2, q3
103 | vmul.f32 q2, q0, q11
104 | vmul.f32 q3, q1, q10
105 | vadd.f32 q9, q2, q3
106 |
107 | vld2.32 {d0-d3}, [r0] @ rfout = vld2q_f32((const float32_t*)(Fout));
108 | vsub.f32 q2, q0, q14 @ C_SUB_NEON(tmp, rfout, scratch1 );
109 | vsub.f32 q3, q1, q15
110 |
111 | vadd.f32 q0, q0, q14 @ C_ADD_NEON(rfout, rfout, scratch1);
112 | vadd.f32 q1, q1, q15
113 |
114 | vadd.f32 q10, q12, q8 @ C_ADD_NEON(scratch3, scratch0, scratch2);
115 | vadd.f32 q11, q13, q9
116 |
117 | vsub.f32 q12, q12, q8 @ C_SUB_NEON(scratch0, scratch0, scratch2);
118 | vsub.f32 q13, q13, q9
119 |
120 | vsub.f32 q8, q0, q10 @ C_SUB_NEON(scratch2, rfout, scratch3);
121 | vsub.f32 q9, q1, q11
122 |
123 | vadd.f32 q0, q0, q10 @ C_ADD_NEON(rfout, rfout, scratch3);
124 | vadd.f32 q1, q1, q11
125 | vst2.32 {d0-d3}, [r0]! @ vst2q_f32((float32_t*)Fout, rfout); Fout+=4;;
126 |
127 | cmp r2, #0
128 | beq .not_inverse4 @ if(st->inverse) {
129 | vsub.f32 q10, q2, q13 @ scratch3.val[0] = vsubq_f32(tmp.val[0], scratch0.val[1]);
130 | vadd.f32 q11, q3, q12 @ scratch3.val[1] = vaddq_f32(tmp.val[1], scratch0.val[0]);
131 | vadd.f32 q14, q2, q13 @ scratch1.val[0] = vaddq_f32(tmp.val[0], scratch0.val[1]);
132 | vsub.f32 q15, q3, q12 @ scratch1.val[1] = vsubq_f32(tmp.val[1], scratch0.val[0]);
133 | b .c_end4
134 | .not_inverse4: @ } else {
135 | vadd.f32 q10, q2, q13 @ scratch3.val[0] = vaddq_f32(tmp.val[0], scratch0.val[1]);
136 | vsub.f32 q11, q3, q12 @ scratch3.val[1] = vsubq_f32(tmp.val[1], scratch0.val[0]);
137 | vsub.f32 q14, q2, q13 @ scratch1.val[0] = vsubq_f32(tmp.val[0], scratch0.val[1]);
138 | vadd.f32 q15, q3, q12 @ scratch1.val[1] = vaddq_f32(tmp.val[1], scratch0.val[0]);
139 | @ }
140 | .c_end4:
141 | vst2.32 {d20-d23}, [r12], r3 @ vst2q_f32((float32_t*)(fom), scratch3); fom2 = Fout+m2;
142 | vst2.32 {d16-d19}, [r12], r3 @ vst2q_f32((float32_t*)fom2, scratch2); fom3 = Fout+m3;
143 | vst2.32 {d28-d31}, [r12] @ vst2q_f32((float32_t*)(fom3), scratch1);
144 |
145 | pld [r6, #0]
146 |
147 | subs r4, r4, #1 @ }while(--k);
148 | bne .kf_bfly4_do_while4
149 |
150 | @.kf_bfly4_process_singles:
151 | asr r4, r5, #31
152 | lsr r4, r4, #30
153 | add r4, r4, r5
154 | ands r5, r4, #3 @ if (k%4 == 0)
155 | beq .kf_bfly4_done
156 |
157 | .kf_bfly4_do_while1: @ do { //process 1 sample per iteration
158 | pld [r7, #0]
159 | vld1.32 {d18}, [r6], r1 @ rtwd1 = vld1_f32((const float32_t*)tw1); tw1 += fstride;
160 | vuzp.32 d18, d19 @ scratch3 = vuzp_f32(rtwd1, rtwd2); //d11 is empty
161 | add r12, r0, r3 @ fom = Fout+m;
162 | vld1.32 {d0}, [r12], r3 @ rfout = vld2_f32((const float32_t*)(fom1)); fom2 = Fout+m2;
163 | vuzp.32 d0, d1 @ d1 is empty
164 | vmul.f32 q1, q0, q9 @ C_MUL_NEON(scratch0, rfout, scratch3);
165 | vsub.f32 d4, d2, d3
166 | vmul.f32 d2, d0, d19
167 | vmul.f32 d3, d1, d18
168 | vadd.f32 d5, d2, d3
169 |
170 | pld [r8, #0]
171 | vld1.32 {d18}, [r7], r9 @ rtwd1 = vld1_f32((const float32_t*)tw2); tw2+= fstride*2;
172 | vuzp.32 d18, d19 @ scratch3 = vuzp_f32(rtwd1, rtwd2); //d11 is empty
173 | vld1.32 {d0}, [r12], r3 @ rfout = vld2_f32((const float32_t*)(fom2)); fom3 = Fout+m3;
174 | vuzp.32 d0, d1 @ d1 is empty
175 | vmul.f32 q1, q0, q9 @ C_MUL_NEON(scratch1, rfout, scratch3);
176 | vsub.f32 d6, d2, d3
177 | vmul.f32 d2, d0, d19
178 | vmul.f32 d3, d1, d18
179 | vadd.f32 d7, d2, d3
180 |
181 | pld [r0, #0]
182 | vld1.32 {d18}, [r8], r10 @ rtwd1 = vld1_f32((const float32_t*)tw3); tw3 += fstride*3;
183 | vuzp.32 d18, d19 @ scratch3 = vuzp_f32(rtwd1, rtwd2); //d11 is empty
184 | vld1.32 {d0}, [r12] @ rfout = vld2_f32((const float32_t*)(fom3));
185 | vuzp.32 d0, d1 @ d1 is empty
186 | vmul.f32 q1, q0, q9 @ C_MUL_NEON(scratch2, rfout, scratch3);
187 | vsub.f32 d16, d2, d3
188 | vmul.f32 d2, d0, d19
189 | vmul.f32 d3, d1, d18
190 | vadd.f32 d17, d2, d3
191 |
192 | vld1.32 {d0}, [r0] @ rfout = vld2_f32((const float32_t*)(Fout));
193 | vuzp.32 d0, d1
194 | vsub.f32 q1, q0, q3 @ C_SUB_NEON(tmp, rfout, scratch1 );
195 |
196 | vadd.f32 q0, q0, q3 @ C_ADD_NEON(rfout, rfout, scratch1);
197 |
198 | vadd.f32 q9, q2, q8 @ C_ADD_NEON(scratch3, scratch0, scratch2);
199 |
200 | vsub.f32 q2, q2, q8 @ C_SUB_NEON(scratch0, scratch0, scratch2);
201 |
202 | vsub.f32 q8, q0, q9 @ C_SUB_NEON(scratch2, rfout, scratch3);
203 |
204 | vadd.f32 q0, q0, q9 @ C_ADD_NEON(rfout, rfout, scratch3);
205 |
206 | cmp r2, #0
207 | beq .not_inverse1 @ if(st->inverse) {
208 | vsub.f32 d18, d2, d5 @ scratch3.val[0] = vsub_f32(tmp.val[0], scratch0.val[1]);
209 | vadd.f32 d19, d3, d4 @ scratch3.val[1] = vadd_f32(tmp.val[1], scratch0.val[0]);
210 | vadd.f32 d6, d2, d5 @ scratch1.val[0] = vadd_f32(tmp.val[0], scratch0.val[1]);
211 | vsub.f32 d7, d3, d4 @ scratch1.val[1] = vsub_f32(tmp.val[1], scratch0.val[0]);
212 | b .c_end1
213 | .not_inverse1: @ } else {
214 | vadd.f32 d18, d2, d5 @ scratch3.val[0] = vadd_f32(tmp.val[0], scratch0.val[1]);
215 | vsub.f32 d19, d3, d4 @ scratch3.val[1] = vsub_f32(tmp.val[1], scratch0.val[0]);
216 | vsub.f32 d6, d2, d5 @ scratch1.val[0] = vsub_f32(tmp.val[0], scratch0.val[1]);
217 | vadd.f32 d7, d3, d4 @ scratch1.val[1] = vadd_f32(tmp.val[1], scratch0.val[0]);
218 | @ }
219 | .c_end1:
220 | mov r12, r0
221 | vzip.32 d0, d1
222 | vst1.32 {d0}, [r12], r3 @ vst2_f32((float32_t*)Fout, rfout); fom = Fout+m;
223 |
224 | vzip.32 d18, d19
225 | vst1.32 {d18}, [r12], r3 @ vst2_f32((float32_t*)(fom), scratch3); fom2 = Fout+m2;
226 |
227 | vzip.32 d16, d17
228 | vst1.32 {d16}, [r12], r3 @ vst2_f32((float32_t*)fom2, scratch2); fom3 = Fout+m3;
229 |
230 | vzip.32 d6, d7
231 | vst1.32 {d6}, [r12] @ vst2_f32((float32_t*)(fom3), scratch1);
232 |
233 | add r0, r0, #8 @ Fout+=1;
234 | pld [r6, #0]
235 |
236 | subs r5, r5, #1 @ }while(--k);
237 | bne .kf_bfly4_do_while1
238 |
239 | .kf_bfly4_done:
240 | @ vldmia sp!, {d8-d15}
241 | ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
242 | nop
243 |
244 | .endfunc
245 | .end
246 |
247 |
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | ViewRF - RTL-SDR Spectrum Analyzer for the BeagleBone
3 | Copyright (C) 2013 Stephen Ong
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 |
18 | */
19 |
20 | #include "dialog.h"
21 | #include
22 | #include
23 |
24 | int main(int argc, char *argv[])
25 | {
26 | QApplication a(argc, argv);
27 | Dialog w;
28 | //QWSServer::setCursorVisible(false); //Hide cursor, esp for touchscreen
29 | w.setWindowState(Qt::WindowFullScreen);
30 | w.show();
31 |
32 | return a.exec();
33 | }
34 |
--------------------------------------------------------------------------------
/sdrcapture.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | ViewRF - RTL-SDR Spectrum Analyzer for the BeagleBone
3 | Copyright (C) 2013 Stephen Ong
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 |
18 | */
19 |
20 | #include "sdrcapture.h"
21 | #include
22 |
23 | /*****************************************************************************
24 | * Initializes hardware
25 | ****************************************************************************/
26 | SDRCapture::SDRCapture(QObject *parent) :
27 | QObject(parent)
28 | {
29 | qDebug() << Q_FUNC_INFO << QThread::currentThreadId() << QThread::currentThread();
30 |
31 | char vendor[256], product[256], serial[256];
32 | dev = NULL;
33 | out_block_size = 2*BLOCK_LENGTH;
34 | buffer = (uint8_t *) malloc(out_block_size * sizeof(uint8_t));
35 |
36 | int device_count = rtlsdr_get_device_count();
37 | qDebug() << device_count;
38 | for (int i = 0; i < device_count; i++) {
39 | rtlsdr_get_device_usb_strings(i, vendor, product, serial);
40 | qDebug() << i << ", " << vendor <<", "<< product << ", " << serial;
41 |
42 | }
43 | int r = rtlsdr_open(&dev, 0);
44 | if (r < 0) qDebug() << "Fail rtlsdr_open";
45 | r = rtlsdr_set_sample_rate(dev, DEFAULT_SAMPLE_RATE);
46 | if (r < 0) qDebug() << "Fail rtlsdr_set_sample_rate";
47 |
48 | set_frequency(DEFAULT_CENTER_FREQUENCY);
49 |
50 |
51 | //Set Manual gain
52 | r = rtlsdr_set_tuner_gain_mode(dev, 1);
53 | if (r < 0) qDebug() << "WARNING: Failed to enable manual gain.\n";
54 |
55 | //Use offset tuning
56 | set_offset_tuning(false);
57 |
58 |
59 | //rtlsdr_set_testmode(dev, 1); //Test mode for detecting lost samples
60 |
61 | /* Reset endpoint before we start reading from it (mandatory) */
62 | r = rtlsdr_reset_buffer(dev);
63 | if (r < 0) qDebug() << "WARNING: Failed to reset buffers.\n";
64 |
65 | is_locked = FALSE;
66 |
67 | num_gains = rtlsdr_get_tuner_gains(dev, gain_table);
68 | }
69 |
70 | /*****************************************************************************
71 | * Is the tuner Elonics E4000.
72 | ****************************************************************************/
73 | bool SDRCapture::isE4000()
74 | {
75 | if(rtlsdr_get_tuner_type(dev)==RTLSDR_TUNER_E4000) return TRUE;
76 | return FALSE;
77 |
78 | }
79 |
80 | /*****************************************************************************
81 | * How many gain settings are available for this tuner
82 | ****************************************************************************/
83 | unsigned SDRCapture::get_num_gains()
84 | {
85 | return (unsigned) num_gains;
86 | }
87 |
88 | /*****************************************************************************
89 | * Get IQ data buffer
90 | ****************************************************************************/
91 | uint8_t *SDRCapture::getBuffer()
92 | {
93 | return buffer;
94 | }
95 |
96 | /*****************************************************************************
97 | * Event handler for gathering IQ block
98 | ****************************************************************************/
99 | void SDRCapture::threadFunction()
100 | {
101 | int n_read;
102 | int r;
103 |
104 | r = rtlsdr_reset_buffer(dev);
105 | if (r < 0) qDebug() << "rtlsdr_reset_buffer1 fail\n";
106 |
107 | r = rtlsdr_read_sync(dev, buffer, out_block_size, &n_read);
108 | if (r < 0) qDebug() << "rtlsdr_read_sync fail\n";
109 |
110 | emit packetCaptured();
111 | }
112 |
113 | /*****************************************************************************
114 | * Sets frequency
115 | ****************************************************************************/
116 | void SDRCapture::set_frequency(unsigned freq)
117 | {
118 | int r = rtlsdr_set_center_freq(dev, freq);
119 | if (r < 0) {
120 | qDebug() << "Fail rtlsdr_set_center_freq";
121 | is_locked = FALSE;
122 | }
123 | else{
124 | locked_frequency = freq;
125 | is_locked = TRUE;
126 | }
127 | }
128 |
129 | /*****************************************************************************
130 | * Set LNA gain
131 | ****************************************************************************/
132 | void SDRCapture::set_gain(unsigned gain_index)
133 | {
134 | if(gain_index >= num_gains){
135 | gain_index = num_gains - 1;
136 | }
137 |
138 | int r = rtlsdr_set_tuner_gain(dev, gain_table[gain_index]);
139 | if (r < 0) qDebug() << "Fail rtlsdr_set_tuner_gain";
140 | }
141 |
142 | /*****************************************************************************
143 | * Enable offset tuning for E4000
144 | ****************************************************************************/
145 | void SDRCapture::set_offset_tuning(bool on)
146 | {
147 | int r;
148 | if(on){
149 | r = rtlsdr_set_offset_tuning(dev, 1);
150 | }
151 | else{
152 | r = rtlsdr_set_offset_tuning(dev, 0);
153 | }
154 | if (r < 0) qDebug() << "WARNING: Failed to use offset tuning.\n";
155 | }
156 |
157 | // End of sdrcapture.cpp
158 |
--------------------------------------------------------------------------------
/sdrcapture.h:
--------------------------------------------------------------------------------
1 | /*
2 | ViewRF - RTL-SDR Spectrum Analyzer for the BeagleBone
3 | Copyright (C) 2013 Stephen Ong
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 |
18 | */
19 |
20 | #ifndef SDRCAPTURE_H
21 | #define SDRCAPTURE_H
22 |
23 | #include
24 | #include "rtl-sdr.h"
25 |
26 | //#define USE_KISSFFT
27 | #define USE_AVFFT //AVFFT is a touch faster
28 |
29 | #define HAMMING_WINDOW
30 | //#define BLACKMANNUTALL_WINDOW
31 |
32 | #define DEFAULT_CENTER_FREQUENCY 952000000 // In Hz
33 | #define DEFAULT_SAMPLE_RATE 2400000 // 2.4 MS/s gives good result
34 |
35 | //----------------------------
36 | //BLOCK_LENGTH is divided into overlapping FFT_LENGTH blocks
37 | //number of edge-to-edge blocks n = BLOCK_LENGTH/FFT_LENGTH
38 | //number of 50% overlapping blocks is n + (n-1)
39 |
40 | #define FFT_LENGTH_2N (10) //2^10 = 1024
41 | #define FFT_LENGTH (1<
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 |
18 | */
19 |
20 | #include "spectrumplot.h"
21 |
22 | #include
23 |
24 | SpectrumPlot::SpectrumPlot(QWidget *parent) :
25 | QwtPlot(parent)
26 | {
27 | d_directPainter = new QwtPlotDirectPainter();
28 |
29 | setAutoReplot( false );
30 | setCanvas( new QwtPlotCanvas() );
31 |
32 | plotLayout()->setAlignCanvasToScales( true );
33 |
34 | setTitle("Spectrum Analyzer - Stephen Ong");
35 | setAxisTitle( QwtPlot::xBottom, "Frequency (MHz)" );
36 | setAxisTitle( QwtPlot::yLeft, "Amplitude (dB)" );
37 | setAxisScale( QwtPlot::xBottom, 0, RESULT_LENGTH );
38 | setAxisScale( QwtPlot::yLeft, -90, -10);
39 |
40 | QwtPlotGrid *grid = new QwtPlotGrid();
41 | grid->setPen( Qt::gray, 0.0, Qt::DotLine );
42 | grid->enableX( true );
43 | grid->enableXMin( true );
44 | grid->enableY( true );
45 | grid->enableYMin( false );
46 | grid->attach( this );
47 |
48 |
49 | d_curve = new QwtPlotCurve();
50 | d_curve->setStyle( QwtPlotCurve::Lines );
51 | d_curve->setPen(Qt::darkGray);
52 | //d_curve->setRenderHint( QwtPlotItem::RenderAntialiased, true );
53 | d_curve->setPaintAttribute( QwtPlotCurve::ClipPolygons, false );
54 | d_curve->attach( this );
55 |
56 | for(int i=0; isetSamples(d_x, data, RESULT_LENGTH);
78 | replot();
79 |
80 | }
81 |
82 | void SpectrumPlot::SetXRange(double xStart, double xStop)
83 | {
84 | setAxisScale( QwtPlot::xBottom, xStart, xStop );
85 | int num_points = RESULT_LENGTH;
86 | for(int i=0; i
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 |
18 | */
19 |
20 | #ifndef SPECTRUMPLOT_H
21 | #define SPECTRUMPLOT_H
22 |
23 | #include
24 |
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 | #include
35 |
36 | #include "sdrcapture.h"
37 |
38 | class SpectrumPlot : public QwtPlot
39 | {
40 | Q_OBJECT
41 | public:
42 | explicit SpectrumPlot(QWidget *parent = 0);
43 | void SetData(double data[]);
44 | void SetXRange(double xStart, double xStop);
45 | signals:
46 |
47 | public slots:
48 |
49 | private:
50 | QwtPlotDirectPainter *d_directPainter;
51 | QwtPlotCurve *d_curve;
52 | double d_x[RESULT_LENGTH];
53 | };
54 |
55 | #endif // SPECTRUMPLOT_H
56 |
--------------------------------------------------------------------------------