├── c_src ├── .gitignore └── dsp.c ├── .gitignore ├── src ├── dsp.app.src └── dsp.erl ├── rebar.config ├── Makefile └── README.md /c_src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | priv 2 | deps 3 | .rebar 4 | -------------------------------------------------------------------------------- /src/dsp.app.src: -------------------------------------------------------------------------------- 1 | {application, dsp, 2 | [{description, "Lumenosys Robotics DSP NIF library application"}, 3 | {vsn, git}, 4 | {registered, []}, 5 | {applications, [ 6 | kernel, 7 | stdlib 8 | ]}, 9 | {env, []} 10 | ]}. 11 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- erlang -*- 2 | %% Config file for dsp nif application 3 | {erl_opts, [debug_info, fail_on_warning]}. 4 | {sub_dirs, ["src"]}. 5 | 6 | 7 | {deps, [ 8 | {libbfdsp, ".*", {git, "https://github.com/lumenosys/libbfdsp.git", {branch, "master"}},[raw]} 9 | ]}. 10 | 11 | {erl_first_files, ["libbfdsp"]}. 12 | 13 | {port_env, [ 14 | {"LIBDSP_DIR", "${PWD}/deps/libdsp_installdir"}, 15 | {"STRIP", "bfin-linux-uclibc-strip"}, 16 | {"CC", "bfin-linux-uclibc-gcc"}, 17 | {"CROSS_COMPILE", "bfin-linux-uclibc-"}, 18 | {"DESTDIR", "${PWD}/deps/libdsp_installdir"}, 19 | {"STAGING_DIR", "${HOME}/Projects/Lumenosys/blackfin/lumenosys-adi-buildroot/output/host/usr/bfin-buildroot-linux-uclibc/sysroot"}, 20 | {"CFLAGS", "-Wall -O3 -fpic -funroll-loops -ffast-math -fomit-frame-pointer -mcpu=bf537-0.3 -isystem ${LIBDSP_DIR}/include -I ${STAGING_DIR}/usr/lib/erlang/usr/include"}, 21 | {"(linux)", "LDFLAGS", "-L${LIBDSP_DIR}/lib -Wl,-Bstatic -lbfdsp -Wl,-Bdynamic"} 22 | ]}. 23 | 24 | {port_specs, [ 25 | {"(linux|freebsd|darwin)","priv/libdsp.so", 26 | [ "c_src/dsp.c" 27 | ]} 28 | ]}. 29 | 30 | %% {edoc_opts, [{doclet, edown_doclet}]}. 31 | %%{ct_extra_params, " -spec test/tunnel_port.spec"}. 32 | 33 | {pre_hooks, 34 | [{"(linux|darwin|solaris)", compile, "make -C deps/libbfdsp install"}, 35 | {"(freebsd)", compile, "gmake -C deps/libbfdsp"}]}. 36 | 37 | {post_hooks, [{"linux", compile, "${STRIP} priv/*"}]}. 38 | -------------------------------------------------------------------------------- /src/dsp.erl: -------------------------------------------------------------------------------- 1 | %%% @author Lumenosys Robotics 2 | %%% @copyright (C) 2015, Lumenosys Robotics 3 | %%% @doc 4 | %%% 5 | 6 | -module(dsp). 7 | -export([fir_fr16_init/1, 8 | fir_fr16/2, 9 | coeff_iirdf1_fr16/2, 10 | iirdf1_fr16_init/1, 11 | iirdf1_fr16/2, 12 | rfft_fr16_init/1, 13 | rfft_fr16/2, 14 | vecvmlt_fr16/2, 15 | vecdot_fr1x32/2, 16 | vecdot_fr16_sr/3, 17 | cabs_fr16/1, 18 | gen_hanning_fr16/2, 19 | autocoh_fr16/2, 20 | crosscoh_fr16/3, 21 | autocorr_fr16/2, 22 | crosscorr_fr16/3, 23 | histogram_fr16/4, 24 | mean_fr16/1, 25 | var_fr16/1, 26 | max_fr16/1, 27 | min_fr16/1, 28 | interleave/2, 29 | deinterleave/3]). 30 | 31 | -on_load(init/0). 32 | 33 | init() -> 34 | Lib = filename:join(code:priv_dir("dsp"), "libdsp"), 35 | ok = erlang:load_nif(Lib, 0). 36 | 37 | fir_fr16_init(_State) -> 38 | exit(nif_library_not_loaded). 39 | 40 | fir_fr16(_Handle, _InputData) -> 41 | exit(nif_library_not_loaded). 42 | 43 | coeff_iirdf1_fr16(_A, _B) -> 44 | exit(nif_library_not_loaded). 45 | 46 | iirdf1_fr16_init(_State) -> 47 | exit(nif_library_not_loaded). 48 | 49 | iirdf1_fr16(_Handle, _InputData) -> 50 | exit(nif_library_not_loaded). 51 | 52 | rfft_fr16_init(_State) -> 53 | exit(nif_library_not_loaded). 54 | 55 | rfft_fr16(_Handle, _InputData) -> 56 | exit(nif_library_not_loaded). 57 | 58 | vecvmlt_fr16(_InputVecA, _InputVecB) -> 59 | exit(nif_library_not_loaded). 60 | 61 | vecdot_fr1x32(_InputVecA, _InputVecB) -> 62 | exit(nif_library_not_loaded). 63 | 64 | vecdot_fr16_sr(_InputVecA, _InputVecB, _SRand) -> 65 | exit(nif_library_not_loaded). 66 | 67 | cabs_fr16(_InputVec) -> 68 | exit(nif_library_not_loaded). 69 | 70 | gen_hanning_fr16(_Stride, _Size) -> 71 | exit(nif_library_not_loaded). 72 | 73 | autocoh_fr16(_Samples, _Lags) -> 74 | exit(nif_library_not_loaded). 75 | 76 | crosscoh_fr16(_SamplesA, _SamplesB, _Lags) -> 77 | exit(nif_library_not_loaded). 78 | 79 | autocorr_fr16(_Samples, _Lags) -> 80 | exit(nif_library_not_loaded). 81 | 82 | crosscorr_fr16(_SamplesA, _SamplesB, _Lags) -> 83 | exit(nif_library_not_loaded). 84 | 85 | histogram_fr16(_Samples, _MaxSample, _MinSample, _BinCount) -> 86 | exit(nif_library_not_loaded). 87 | 88 | mean_fr16(_Samples) -> 89 | exit(nif_library_not_loaded). 90 | 91 | var_fr16(_Samples) -> 92 | exit(nif_library_not_loaded). 93 | 94 | max_fr16(_Samples) -> 95 | exit(nif_library_not_loaded). 96 | 97 | min_fr16(_Samples) -> 98 | exit(nif_library_not_loaded). 99 | 100 | interleave(_BinList, _ChunkSize) -> 101 | exit(nif_library_not_loaded). 102 | 103 | deinterleave(_Bin, _Count, _ChunkSize) -> 104 | exit(nif_library_not_loaded). 105 | 106 | 107 | %%% @end 108 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Lumenosys Robotics. All Rights Reserved. 2 | # 3 | # Most (maybe all) of this borrowed from: 4 | # 5 | # Copyright 2012 Erlware, LLC. All Rights Reserved. 6 | # 7 | # This file is provided to you under the Apache License, 8 | # Version 2.0 (the "License"); you may not use this file 9 | # except in compliance with the License. You may obtain 10 | # a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, 15 | # software distributed under the License is distributed on an 16 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | # KIND, either express or implied. See the License for the 18 | # specific language governing permissions and limitations 19 | # under the License. 20 | # 21 | 22 | ERLFLAGS= -pa $(CURDIR)/.eunit -pa $(CURDIR)/ebin -pa $(CURDIR)/deps/*/ebin 23 | 24 | DEPS_PLT=$(CURDIR)/.deps_plt 25 | DEPS=erts kernel stdlib 26 | 27 | # ============================================================================= 28 | # Verify that the programs we need to run are installed on this system 29 | # ============================================================================= 30 | ERL = $(shell which erl) 31 | 32 | ifeq ($(ERL),) 33 | $(error "Erlang not available on this system") 34 | endif 35 | 36 | REBAR=$(shell which rebar) 37 | 38 | ifeq ($(REBAR),) 39 | $(error "Rebar not available on this system") 40 | endif 41 | 42 | .PHONY: all compile doc clean test dialyzer typer shell distclean pdf \ 43 | update-deps clean-common-test-data rebuild 44 | 45 | all: deps compile 46 | # dialyzer test 47 | 48 | # ============================================================================= 49 | # Rules to build the system 50 | # ============================================================================= 51 | 52 | deps: 53 | $(REBAR) get-deps 54 | $(REBAR) compile 55 | 56 | update-deps: 57 | $(REBAR) update-deps 58 | $(REBAR) compile 59 | 60 | compile: 61 | $(REBAR) skip_deps=true compile 62 | 63 | doc: 64 | $(REBAR) skip_deps=true doc 65 | 66 | eunit: compile clean-common-test-data 67 | $(REBAR) skip_deps=true eunit 68 | 69 | test: compile eunit 70 | 71 | $(DEPS_PLT): 72 | @echo Building local plt at $(DEPS_PLT) 73 | @echo 74 | dialyzer --output_plt $(DEPS_PLT) --build_plt \ 75 | --apps $(DEPS) -r deps 76 | 77 | dialyzer: $(DEPS_PLT) 78 | dialyzer --fullpath --plt $(DEPS_PLT) -Wrace_conditions -r ./ebin 79 | 80 | typer: 81 | typer --plt $(DEPS_PLT) -r ./src 82 | 83 | shell: deps compile 84 | # You often want *rebuilt* rebar tests to be available to the 85 | # shell you have to call eunit (to get the tests 86 | # rebuilt). However, eunit runs the tests, which probably 87 | # fails (thats probably why You want them in the shell). This 88 | # runs eunit but tells make to ignore the result. 89 | - @$(REBAR) skip_deps=true eunit 90 | @$(ERL) $(ERLFLAGS) 91 | 92 | pdf: 93 | pandoc README.md -o README.pdf 94 | 95 | clean: 96 | - rm -rf $(CURDIR)/test/*.beam 97 | - rm -rf $(CURDIR)/log 98 | - rm -rf $(CURDIR)/ebin 99 | $(REBAR) skip_deps=true clean 100 | 101 | distclean: clean 102 | - rm -rf $(DEPS_PLT) 103 | - rm -rvf $(CURDIR)/deps 104 | 105 | rebuild: distclean deps compile escript dialyzer test 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dsp 2 | === 3 | 4 | **dsp** is an Erlang NIF library that provides a collection of optimized Digital Signal Processing functions including filters (IIR, FIR), transforms (FFT, IFFT), correlation functions, statistics, and more. This library allows Erlang applications to efficiently perform signal processing on the Analog Devices Blackfin processors. It can be used for things like sensor signal processing, feature extaction, robot control, and data visualization on the Lumenosys Robotics [BMOD][1] board. 5 | 6 | Dependencies 7 | ------------ 8 | 9 | To build you will need a working installation of Erlang 17 (or 10 | later).
11 | Please refer to [Erlang/OTP](http://www.erlang.org) for information on building and installing Erlang/OTP. 12 | 13 | This application is built using [rebar](https://github.com/rebar/rebar). Refer to [building rebar](https://github.com/rebar/rebar/wiki/Building-rebar) for information on building and using rebar. 14 | 15 | You will also need libbfdsp, a fork of the Analog Devices DSP library for the Blackfin processor. This should be pulled in by rebar automatically. 16 | 17 | You will also need the Analog Devices [Blackfin GNU toolchain][2] installed in your path. 18 | 19 | Downloading 20 | ----------- 21 | 22 | ```sh 23 | $ git clone git://github.com/lumenosys/dsp.git 24 | ``` 25 | Building 26 | -------- 27 | 28 | Compile: 29 | 30 | ```sh 31 | $ cd dsp 32 | $ make all 33 | ... 34 | ==> dsp (compile) 35 | ``` 36 | 37 | Usage example 38 | ------------- 39 | 40 | ```erlang 41 | %% 42 | %% Intialize Fourier Transform 43 | %% 44 | fft_fr16_init(FFTSize) -> 45 | {ok, Handle} = dsp:rfft_fr16_init({{fft_size, FFTSize}, 46 | {twiddle_stride, 1}, 47 | {scale_method, 1}}), 48 | %% generate hanning window 49 | Win = dsp:gen_hanning_fr16(1, FFTSize), 50 | %% Note: (FFTSize/2) * 2 since fract16 is 2 bytes 51 | {Handle, Win}. 52 | 53 | %%% 54 | %%% Compute the Fourier Transform on a binary vector of Q15 55 | %%% samples. The first argument is a tuple constructed by first 56 | %%% calling stft_fr16_init(WindowLen, Overlap). The second argument is 57 | %%% a binary containing the Q15 formatted samples. 58 | %%% 59 | fft_fr16({FFTHandle, Win}, NewData) -> 60 | %% window the signal 61 | WinSig = dsp:vecvmlt_fr16(Win, NewData), 62 | %% do the FFT 63 | {Cresult, _BlockExponent} = dsp:rfft_fr16(FFTHandle, WinSig), 64 | %% compute the magnitude (and keep only the real half of the FFT result) 65 | RealSize = ?FFT_SIZE, % Real-side size in bytes == (FFT_SIZE / 2) * 2 66 | <> = dsp:cabs_fr16(Cresult), 67 | {ok, {FFTHandle, Win}, NewMagFrame}. 68 | 69 | ``` 70 | 71 | Copyright and License 72 | --------------------- 73 | 74 | > %CopyrightBegin% 75 | > 76 | > Copyright Lumenosys Robotics 2014-2015. All Rights Reserved. 77 | > 78 | > Licensed under the Apache License, Version 2.0 (the "License"); 79 | > you may not use this file except in compliance with the License. 80 | > You may obtain a copy of the License at 81 | > 82 | > http://www.apache.org/licenses/LICENSE-2.0 83 | > 84 | > Unless required by applicable law or agreed to in writing, software 85 | > distributed under the License is distributed on an "AS IS" BASIS, 86 | > WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 87 | > See the License for the specific language governing permissions and 88 | > limitations under the License. 89 | > 90 | > %CopyrightEnd% 91 | 92 | 93 | [1]: https://lumenosys.com/products 94 | [2]: http://sourceforge.net/projects/adi-toolchain/ -------------------------------------------------------------------------------- /c_src/dsp.c: -------------------------------------------------------------------------------- 1 | /* dsp.c --- 2 | * 3 | * Filename: dsp.c 4 | * Description: 5 | * Author: 6 | * Maintainer: 7 | * Created: Wed Jun 4 22:09:30 2014 (-0700) 8 | * Version: 9 | * URL: 10 | * Keywords: 11 | * Compatibility: 12 | * 13 | */ 14 | 15 | /* Commentary: 16 | * 17 | * 18 | * 19 | */ 20 | 21 | /* Change Log: 22 | * 23 | * 24 | */ 25 | 26 | /* *This program is free software; you can redistribute it and/or 27 | * modify it under the terms of the GNU General Public License as 28 | * published by the Free Software Foundation; either version 3, or 29 | * (at your option) any later version. 30 | * 31 | * This program is distributed in the hope that it will be useful, 32 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 33 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 34 | * General Public License for more details. 35 | * 36 | * You should have received a copy of the GNU General Public License 37 | * along with this program; see the file COPYING. If not, write to 38 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth 39 | * Floor, Boston, MA 02110-1301, USA. 40 | */ 41 | 42 | /* Code: */ 43 | 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #include 52 | 53 | /* libdsp */ 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | 61 | /* XXX TODO 62 | * 63 | * 1. Use dirty nifs once they are supported on target. 64 | */ 65 | 66 | /* the Blackfin libdsp functions */ 67 | 68 | /* 69 | * Finite impulse response filer 70 | * 71 | * The fir_fr16 function implements a finite impulse response (FIR) 72 | * filter. The function generates the filtered response of the input 73 | * data input and stores the result in the output vector output. The 74 | * number of input sam- ples and the length of the output vector are 75 | * specified by the argument length. The function maintains the 76 | * filter state in the structured variable filter_state, which must be 77 | * declared and initialized before calling the function. The macro 78 | * fir_init, defined in the filter.h header file, is available to 79 | * initialize the structure. 80 | */ 81 | extern void fir_fr16(const fract16 input[], 82 | fract16 output[], 83 | int length, 84 | fir_state_fr16 *filter_state); 85 | 86 | /* 87 | * Direct form I impulse response filter 88 | * 89 | * The iirdf1_fr16 function implements a direct form I infinite 90 | * impulse response (IIR) filter. It generates the filtered response 91 | * of the input data input and stores the result in the output vector 92 | * output. The number of input samples and the length of the output 93 | * vector is specified by the argu- ment length. 94 | */ 95 | extern void iirdf1_fr16(const fract16 input[], 96 | fract16 output[], 97 | int length, 98 | iirdf1_state_fr16 *filter_state); 99 | 100 | /* 101 | * Convert coefficients for DF1 IIR filter 102 | * 103 | * This function transforms a set of A-coefficients and a set of 104 | * B-coefficients into a set of coefficients for the iirdf1_fr16 105 | * function (see on page 4-173), which implements an optimized, direct 106 | * form 1 infinite impulse response (IIR) filter. 107 | */ 108 | extern void coeff_iirdf1_fr16 (const float acoeff[], 109 | const float bcoeff[], 110 | fract16 coeff[ ], int nstages); 111 | 112 | /* The autocoherance functions compute the autocoherence of the input 113 | * vec- tor samples[], which contain sample_length values. The 114 | * autocoherence of an input signal is its autocorrelation minus its 115 | * mean squared. The func- tions return the result in the output array 116 | * coherence[] of length lags . 117 | */ 118 | extern void autocoh_fr16(const fract16 samples[], 119 | int sample_length, 120 | int lags, 121 | fract16 coherence[]); 122 | 123 | /* The cross-coherance functions compute the cross-coherence of two 124 | * input vectors samples_x[] and samples_y[]. The cross-coherence is 125 | * the cross-correlation minus the product of the mean of samples_x 126 | * and the mean of samples_y. The length of the input vectors is given 127 | * by sample_length . The functions return the result in the array 128 | * coherence with lags elements. 129 | */ 130 | extern void crosscoh_fr16 (const fract16 samples_x[ ], 131 | const fract16 samples_y[ ], 132 | int sample_length, 133 | int lags, 134 | fract16 coherence[ ]); 135 | 136 | /* The autocorrelation functions perform an autocorrelation of a 137 | * signal. Autocorrelation is the cross-correlation of a signal with 138 | * a copy of itself. It provides information about the time variation 139 | * of the signal. The signal to be autocorrelated is given by the 140 | * samples[] input array. The number of samples of the autocorrelation 141 | * sequence to be produced is given by lags . The length of the input 142 | * sequence is given by sample_length . Autocorrelation is used in 143 | * digital signal processing applications such as speech analysis. 144 | */ 145 | extern void autocorr_fr16 (const fract16 samples[], 146 | int sample_length, 147 | int lags, 148 | fract16 correlation[]); 149 | 150 | /* The cross-correlation functions perform a cross-correlation between 151 | * two signals. The cross-correlation is the sum of the scalar 152 | * products of the sig- nals in which the signals are displaced in 153 | * time with respect to one another. The signals to be correlated are 154 | * given by the input vectors samples_x[] and samples_y[]. The length 155 | * of the input vectors is given by sample_length. The functions 156 | * return the result in the array correlation with lags elements. 157 | * Cross-correlation is used in signal processing applications such as 158 | * speech analysis. 159 | */ 160 | extern void crosscorr_fr16 (const fract16 samples_x[], 161 | const fract16 samples_y[], 162 | int sample_length, 163 | int lags, 164 | fract16 correlation[]); 165 | 166 | /* The histogram functions compute a histogram of the input vector 167 | * samples[ ] that contains nsamples samples, and store the result in 168 | * the output vector histogram . The minimum and maximum value of any 169 | * input sample is specified by min_sample and max_sample , 170 | * respectively. These values are used by the function to calculate 171 | * the size of each bin as (max_sample – min_sample) / bin_count, 172 | * where bin_count is the size of the output vector histogram. Any 173 | * input value that is outside the range [ min_sample, max_sample ) 174 | * exceeds the boundaries of the output vector and is discarded. L 175 | * out-of-bounds To preserve maximum checking, performance the 176 | * histogram_fr16 while performing function allocates a temporary work 177 | * area on the stack. The work area is allocated with (bin_count + 2) 178 | * elements and the stack may therefore overflow if the number of bins 179 | * is sufficiently large. The size of the stack may be adjusted by 180 | * making appropriate changes to the . ldf file. 181 | */ 182 | extern void histogram_fr16 (const fract16 samples[], 183 | int histogram[], 184 | fract16 max_sample, 185 | fract16 min_sample, 186 | int sample_length, 187 | int bin_count); 188 | 189 | /* mean 190 | * 191 | * The mean functions return the mean of the input array samples[ ]. 192 | * The number of elements in the array is sample_length. */ 193 | extern fract16 mean_fr16(const fract16 samples[], 194 | int sample_length); 195 | 196 | /* var 197 | * 198 | * The variance functions return the variance of the elements within 199 | * the input vector samples[ ]. The number of elements in the vector 200 | * is sample_length . 201 | * 202 | * The var_fr16 function can be used to compute the variance of up to 203 | * 65535 input data with a value of 0x8000 before the sum a i 204 | * saturates. 205 | */ 206 | extern fract16 var_fr16(const fract16 samples[], 207 | int sample_length); 208 | 209 | /* 210 | * Minimum value of two 211 | */ 212 | extern fract16 min_fr16(fract16 a, fract16 b); 213 | 214 | /* 215 | * Maximum value of two 216 | */ 217 | extern fract16 max_fr16(fract16 a, fract16 b); 218 | 219 | /* rms */ 220 | /* zero_cross*/ 221 | 222 | /* 223 | * Real Vector x Vector multiplication 224 | * 225 | */ 226 | extern void vecvmlt_fr16(const fract16 vec_a[], 227 | const fract16 vec_b[], 228 | fract16 out[], int length); 229 | 230 | /* 231 | * Complex absolute value 232 | * 233 | * The cabs functions compute the complex absolute value of a complex 234 | * input and return the result. 235 | */ 236 | extern fract16 cabs_fr16(complex_fract16 a); 237 | 238 | /* 239 | * Generate Hanning window 240 | * 241 | * The gen_hanning function generates a vector 242 | * containing the Hanning window. The length of the window required is 243 | * specified by the parameter window_size, and the parameter 244 | * window_stride is used to space the win- dow values within the 245 | * output vector hanning_window. The length of the output vector 246 | * should therefore be window_size*window_stride . This win- dow is 247 | * also known as the cosine window. 248 | */ 249 | extern void gen_hanning_fr16(fract16 hanning_window[], 250 | int window_stride, 251 | int window_size); 252 | 253 | 254 | /* N-point radix-2 real input FFT 255 | * 256 | * This function transforms the time domain real input signal sequence 257 | * to the frequency domain by using the radix-2 FFT. The function 258 | * takes advantage of the fact that the imaginary part of the input 259 | * equals zero, which in turn eliminates half of the multiplications 260 | * in the butterfly. The size of the input array input and the output 261 | * array output is fft_size , where fft_size represents the number of 262 | * points in the FFT. If the input data can be overwritten, the 263 | * optimum memory usage can be achieved by also specifying the input 264 | * array as the output array, provided that the mem- ory size of the 265 | * input array is at least 2*fft_size. The twiddle table is passed in 266 | * the argument twiddle_table, which must contain at least fft_size/2 267 | * twiddle factors. The table is composed of +cosine and -sine 268 | * coefficients and may be initialized by using the func- tion 269 | * twidfftrad2_fr16 . For optimum performance, the twiddle table 270 | * should be allocated in a different memory section than the output 271 | * array. 272 | * 273 | * The argument twiddle_stride should be set to 1 if the twiddle table 274 | * was originally created for an FFT of size fft_size . If the twiddle 275 | * table was cre- ated for a larger FFT of size N*fft_size (where N is 276 | * a power of 2), then twiddle_stride should be set to N . This 277 | * argument therefore provides a way of using a single twiddle table 278 | * to calculate FFTs of different sizes. The argument scale_method 279 | * controls how the function will apply scaling while computing a 280 | * Fourier Transform. The available options are static scaling 281 | * (dividing the input at any stage by 2), dynamic scaling (dividing 282 | * the input at any stage by 2 if the largest absolute input value is 283 | * greater or equal than 0.5), or no scaling. 284 | */ 285 | extern void rfft_fr16(const fract16 input[], 286 | complex_fract16 output[], 287 | const complex_fract16 twiddle_table[], 288 | int twiddle_stride, 289 | int fft_size, 290 | int *block_exponent, 291 | int scale_method); 292 | 293 | /* 294 | * Generate FFT twiddle factors for radix-2 FFT 295 | * 296 | * The twidfftrad2_fr16 function calculates complex twiddle 297 | * coefficients for an FFT of size fft_size and returns the 298 | * coefficients in the vector twiddle_table . The size of the vector, 299 | * which is known as a twiddle table, must be at least fft_size/2. It 300 | * contains pairs of sine and cosine values that are used by an FFT 301 | * function to calculate a Fast Fourier Transform. The table generated 302 | * by this function may be used by any of the FFT functions cfft_fr16, 303 | * ifft_fr16, and rfft_fr16. 304 | */ 305 | extern void twidfftrad2_fr16(complex_fract16 twiddle_table[], 306 | int fft_size); 307 | 308 | 309 | /* 310 | * Custom DSP functions added to libbfdsp 311 | */ 312 | 313 | /* 314 | * fract16 vector dot product (full 32-bit result) 315 | * 316 | * This function performs the vector dot product of the two fract16 input 317 | * arrays. When the two arrays are 32-bit aligned, this function can utilize 318 | * both Blackfin MAC units in parallel to reduce the cycle time by half. This 319 | * function is a modified version of vecdot_fr16(), which returns a full 32-bit 320 | * result to allow custom rounding methods. 321 | */ 322 | // extern fract32 vecdot_fr1x32(const fract16 a[], const fract16 b[], int length); 323 | 324 | /* added by Lumenosys Robotics, April 2015. */ 325 | //extern fract16 vecdot_fr16_sr(const fract16 a[], const fract16 b[], int _length, fract16 srnd); 326 | 327 | /* NIF library resource structures */ 328 | struct fir_fr16_nif_res { 329 | /* cast pointer to this struct onto a resource allocated when 330 | * the filter initialized. */ 331 | /* must setup the data pointers to the appropriate blocks of 332 | * data following this struct */ 333 | fir_state_fr16 state; 334 | unsigned char mem[0]; 335 | }; 336 | 337 | struct rfft_fr16_nif_res { 338 | /* cast pointer to this struct onto a resource allocated when 339 | * the filter initialized. */ 340 | /* must setup the data pointers to the appropriate blocks of 341 | * data following this struct */ 342 | int twiddle_stride; 343 | int fft_size; 344 | int scale_method; 345 | complex_fract16 *twiddle_table; 346 | unsigned char mem[0]; 347 | }; 348 | 349 | struct iirdf1_fr16_nif_res { 350 | /* cast pointer to this struct onto a resource allocated when 351 | * the filter initialized. */ 352 | /* must setup the data pointers to the appropriate blocks of 353 | * data following this struct */ 354 | iirdf1_state_fr16 state; 355 | unsigned char mem[0]; 356 | }; 357 | 358 | /* resource types owned by this NIF library */ 359 | struct libdsp_priv_data { 360 | ErlNifResourceType *fir_fr16_res_type; 361 | ErlNifResourceType *iirdf1_fr16_res_type; 362 | ErlNifResourceType *rfft_fr16_res_type; 363 | }; 364 | 365 | ERL_NIF_TERM 366 | mk_atom(ErlNifEnv* env, const char* atom) 367 | { 368 | ERL_NIF_TERM ret; 369 | 370 | if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1)) 371 | { 372 | return enif_make_atom(env, atom); 373 | } 374 | 375 | return ret; 376 | } 377 | 378 | /* the fir_fr16_init_nif interface function */ 379 | static ERL_NIF_TERM fir_fr16_init_nif(ErlNifEnv *env, 380 | int argc, 381 | const ERL_NIF_TERM argv[]) 382 | { 383 | const ERL_NIF_TERM *state_term; 384 | int state_tuple_len; 385 | char atom[80]; 386 | const ERL_NIF_TERM *substate_term; 387 | int substate_tuple_len; 388 | ERL_NIF_TERM res_term; 389 | 390 | /* filter state terms */ 391 | ErlNifBinary h_term; 392 | int l; 393 | 394 | struct fir_fr16_nif_res *res; 395 | 396 | struct libdsp_priv_data *priv = 397 | (struct libdsp_priv_data *)enif_priv_data(env); 398 | 399 | /* 400 | * State tuple is of the form: 401 | * 402 | * {{ h, binary }, ( binary of coeffs) 403 | * { l, int } (interp/decim index ) 404 | * } 405 | * 406 | */ 407 | if (!enif_get_tuple(env, argv[0], &state_tuple_len, &state_term)) { 408 | printf("%s: expected state tuple\n", __PRETTY_FUNCTION__); 409 | return enif_make_badarg(env); 410 | } 411 | if (state_tuple_len > 2) { 412 | printf("%s: invalid fir_fr16 state tuple\n", __PRETTY_FUNCTION__); 413 | return enif_make_badarg(env); 414 | } 415 | 416 | /* now go through the state tuple, and validate and extract 417 | * the state */ 418 | 419 | /* 420 | * now try to get the coeffs tuple 421 | */ 422 | if (!enif_get_tuple(env, state_term[0], 423 | &substate_tuple_len, &substate_term)) { 424 | printf("%s: expected substate tuple\n", __PRETTY_FUNCTION__); 425 | return enif_make_badarg(env); 426 | } 427 | 428 | /* grab and validate the substate atom */ 429 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 430 | printf("%s: failed to get state atom\n",__PRETTY_FUNCTION__); 431 | return enif_make_badarg(env); 432 | } 433 | if (strcmp(atom, "h") != 0) { 434 | printf("%s: invalid substate atom %s\n", __PRETTY_FUNCTION__,atom); 435 | return enif_make_badarg(env); 436 | } 437 | 438 | /* so far so good, grab the state value */ 439 | if (!enif_inspect_binary(env, substate_term[1], &h_term)) { 440 | printf("%s: expected coeffs binary\n",__PRETTY_FUNCTION__); 441 | return enif_make_badarg(env); 442 | } 443 | 444 | /* 445 | * now try to get the interp index tuple 446 | */ 447 | if (!enif_get_tuple(env, state_term[1], 448 | &substate_tuple_len, &substate_term)) { 449 | printf("%s: expected substate tuple\n",__PRETTY_FUNCTION__); 450 | return enif_make_badarg(env); 451 | } 452 | 453 | /* grab and validate the substate atom */ 454 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 455 | printf("%s: failed to get state atom\n",__PRETTY_FUNCTION__); 456 | return enif_make_badarg(env); 457 | } 458 | if (strcmp(atom, "l") != 0) { 459 | printf("%s: invalid substate atom %s\n", __PRETTY_FUNCTION__,atom); 460 | return enif_make_badarg(env); 461 | } 462 | 463 | if (!enif_get_int(env, substate_term[1], &l)) { 464 | printf("%s: expected int l term\n",__PRETTY_FUNCTION__); 465 | return enif_make_badarg(env); 466 | } 467 | 468 | /* set up our resource */ 469 | /* note: we allocate a big enough block of memory to hold the 470 | * state structure, coefficients, and delay line */ 471 | res = enif_alloc_resource(priv->fir_fr16_res_type, 472 | sizeof(struct fir_fr16_nif_res) + 473 | h_term.size + 474 | h_term.size/*dly line same length as coeffs*/); 475 | 476 | /* initialize the state struct ... */ 477 | fir_init(res->state, 478 | (fract16 *)&res->mem[0], 479 | (fract16 *)&res->mem[h_term.size], 480 | h_term.size >> 1, 481 | l); 482 | 483 | /* copy in the coefficients and init the delay line */ 484 | memcpy((unsigned char *)res->state.h, h_term.data, h_term.size); 485 | memset((unsigned char *)res->state.d, 0, h_term.size/*same length as coeffs*/); 486 | 487 | /* create the resource for this NIF function */ 488 | res_term = enif_make_resource(env, res); 489 | 490 | /* this tells Erlang to garbage collect the resource when it 491 | * is no longer referenced */ 492 | enif_release_resource(res); 493 | 494 | /* make tuple { ok, Handle } and return it */ 495 | return enif_make_tuple2(env, 496 | enif_make_atom(env, "ok"), 497 | res_term); 498 | } 499 | 500 | /* the fir_fr16_nif interface function */ 501 | static ERL_NIF_TERM fir_fr16_nif(ErlNifEnv *env, 502 | int argc, 503 | const ERL_NIF_TERM argv[]) 504 | { 505 | ErlNifBinary x_i_term; 506 | ErlNifBinary y_o_term; 507 | struct libdsp_priv_data *priv; 508 | struct fir_fr16_nif_res *res; 509 | 510 | /* grab the private data */ 511 | priv = (struct libdsp_priv_data *)enif_priv_data(env); 512 | 513 | /* first arg is the resource handle */ 514 | if (!enif_get_resource(env, 515 | argv[0], 516 | priv->fir_fr16_res_type, (void **)&res)) { 517 | printf("dsp: expected fir_fr16 resource handle as first arg\n"); 518 | return enif_make_badarg(env); 519 | } 520 | 521 | /* second arg is the input data */ 522 | if (!enif_inspect_binary(env, argv[1], &x_i_term)) { 523 | printf("dsp: expected input binary\n"); 524 | return enif_make_badarg(env); 525 | } 526 | 527 | /* next, allocate an output binary for the result data */ 528 | /* for non-interp/non-decimating FIR filter, output count == input count */ 529 | if (!enif_alloc_binary(x_i_term.size, &y_o_term)) { 530 | printf("dsp: failed to allocate output binary\n"); 531 | return enif_make_atom(env, "alloc_failure"); 532 | } 533 | 534 | /* do the computation */ 535 | fir_fr16((fract16 *)x_i_term.data, /* input data */ 536 | (fract16 *)y_o_term.data, /* output data */ 537 | x_i_term.size >> 1, /* # of samples */ 538 | &res->state); /* the filter state */ 539 | 540 | /* all done! */ 541 | return enif_make_binary(env,&y_o_term); 542 | } 543 | 544 | /* convert floating point coefficients to fr16 into vector for use 545 | * with Direct-form I IIR filter function */ 546 | static ERL_NIF_TERM coeff_iirdf1_fr16_nif(ErlNifEnv *env, 547 | int argc, 548 | const ERL_NIF_TERM argv[]) 549 | { 550 | ErlNifBinary bin; 551 | unsigned int a_count; 552 | unsigned int b_count; 553 | float *a_coeffs; 554 | float *b_coeffs; 555 | double val; 556 | ERL_NIF_TERM head, tail; 557 | int cidx; 558 | int nstages; 559 | int i; 560 | 561 | /* grab the A coefficients */ 562 | 563 | if (!enif_get_list_length(env, argv[0], &a_count)) { 564 | fprintf(stderr, "dsp: expected list\n"); 565 | return enif_make_badarg(env); 566 | } 567 | if (a_count == 0) { 568 | fprintf(stderr, "dsp: expected non-empty list\n"); 569 | return enif_make_badarg(env); 570 | } 571 | 572 | /* allocate memory for the A coefficients */ 573 | a_coeffs = (float *)enif_alloc(sizeof(float) * a_count); 574 | if (a_coeffs == NULL) { 575 | fprintf(stderr, "dsp: failed to allocate memory\n"); 576 | return mk_atom(env, "alloc_failed"); 577 | } 578 | 579 | /* grab all A coefficients */ 580 | 581 | /* get first element */ 582 | if (!enif_get_list_cell(env, argv[0], &head, &tail)) { 583 | fprintf(stderr, "dsp: expected valid list\n"); 584 | enif_free(a_coeffs); 585 | return enif_make_badarg(env); 586 | } 587 | cidx = 0; 588 | do { 589 | /* grab the coefficient */ 590 | if (!enif_get_double(env, head, &val)) { 591 | fprintf(stderr, "dsp: expected double\n"); 592 | enif_free(a_coeffs); 593 | return enif_make_badarg(env); 594 | } 595 | 596 | /* copy in the value */ 597 | a_coeffs[cidx] = (float)val; 598 | 599 | /* increment our index */ 600 | cidx++; 601 | 602 | /* get next element */ 603 | } while(enif_get_list_cell(env, tail, &head, &tail)); 604 | 605 | /* grab the B coefficients */ 606 | 607 | if (!enif_get_list_length(env, argv[1], &b_count)) { 608 | fprintf(stderr, "dsp: expected list\n"); 609 | enif_free(a_coeffs); 610 | return enif_make_badarg(env); 611 | } 612 | if (b_count == 0) { 613 | fprintf(stderr, "dsp: expected non-empty list\n"); 614 | enif_free(a_coeffs); 615 | return enif_make_badarg(env); 616 | } 617 | 618 | /* allocate memory for the B coefficients */ 619 | b_coeffs = (float *)enif_alloc(sizeof(float) * b_count); 620 | if (b_coeffs == NULL) { 621 | fprintf(stderr, "dsp: failed to allocate memory\n"); 622 | enif_free(a_coeffs); 623 | return mk_atom(env, "alloc_failed"); 624 | } 625 | 626 | /* grab all B coefficients */ 627 | 628 | /* get first element */ 629 | if (!enif_get_list_cell(env, argv[1], &head, &tail)) { 630 | fprintf(stderr, "dsp: expected valid list\n"); 631 | enif_free(a_coeffs); 632 | enif_free(b_coeffs); 633 | return enif_make_badarg(env); 634 | } 635 | cidx = 0; 636 | do { 637 | /* grab the coefficient */ 638 | if (!enif_get_double(env, head, &val)) { 639 | fprintf(stderr, "dsp: expected double\n"); 640 | enif_free(a_coeffs); 641 | enif_free(b_coeffs); 642 | return enif_make_badarg(env); 643 | } 644 | 645 | /* copy in the value */ 646 | b_coeffs[cidx] = (float)val; 647 | 648 | /* increment our index */ 649 | cidx++; 650 | 651 | /* get next element */ 652 | } while(enif_get_list_cell(env, tail, &head, &tail)); 653 | 654 | /* determine the number of stages */ 655 | nstages = a_count >> 1; 656 | 657 | /* do some sanity checking */ 658 | if (b_count != ((2*nstages)+1)) { 659 | fprintf(stderr, "dsp: B coefficients length must be 2*NSTAGES + 1\n"); 660 | enif_free(a_coeffs); 661 | enif_free(b_coeffs); 662 | return enif_make_badarg(env); 663 | } 664 | 665 | for (i = 0; i < a_count; i++) { 666 | if ((long)a_coeffs[i] > LONG_MAX) { 667 | printf("dsp: error, A coefficients cannot be larger than %ld\n", LONG_MAX); 668 | enif_free(a_coeffs); 669 | enif_free(b_coeffs); 670 | return enif_make_badarg(env); 671 | } 672 | 673 | if ((long)a_coeffs[i] < LONG_MIN) { 674 | printf("dsp: error, A coefficients cannot be less than %ld\n", LONG_MIN); 675 | enif_free(a_coeffs); 676 | enif_free(b_coeffs); 677 | return enif_make_badarg(env); 678 | } 679 | } 680 | 681 | for (i = 0; i < b_count; i++) { 682 | if ((long)b_coeffs[i] > LONG_MAX) { 683 | printf("dsp: error, B coefficients cannot be larger than %ld\n", LONG_MAX); 684 | enif_free(a_coeffs); 685 | enif_free(b_coeffs); 686 | return enif_make_badarg(env); 687 | } 688 | 689 | if (b_coeffs[i] < LONG_MIN) { 690 | printf("dsp: error, B coefficients cannot be less than %ld\n", LONG_MIN); 691 | enif_free(a_coeffs); 692 | enif_free(b_coeffs); 693 | return enif_make_badarg(env); 694 | } 695 | } 696 | 697 | /* allocate a binary for the converted coefficients */ 698 | if (!enif_alloc_binary((4*nstages + 2) * sizeof(fract16), &bin)) { 699 | fprintf(stderr, "dsp: failed to allocate binary\n"); 700 | enif_free(a_coeffs); 701 | enif_free(b_coeffs); 702 | return mk_atom(env, "alloc_failed"); 703 | } 704 | 705 | /* init the coeffs vector */ 706 | coeff_iirdf1_fr16(a_coeffs, b_coeffs, (fract16 *)bin.data, nstages); 707 | 708 | /* free all alloc'ed memory */ 709 | enif_free(a_coeffs); 710 | enif_free(b_coeffs); 711 | 712 | return enif_make_tuple2(env, mk_atom(env, "ok"), enif_make_binary(env, &bin)); 713 | } 714 | 715 | /* the iirdf1_fr16_init_nif interface function */ 716 | static ERL_NIF_TERM iirdf1_fr16_init_nif(ErlNifEnv *env, 717 | int argc, 718 | const ERL_NIF_TERM argv[]) 719 | { 720 | const ERL_NIF_TERM *state_term; 721 | int state_tuple_len; 722 | char atom[80]; 723 | const ERL_NIF_TERM *substate_term; 724 | int substate_tuple_len; 725 | ERL_NIF_TERM res_term; 726 | 727 | /* filter state terms */ 728 | ErlNifBinary c_term; 729 | int n; 730 | 731 | struct iirdf1_fr16_nif_res *res; 732 | 733 | struct libdsp_priv_data *priv = 734 | (struct libdsp_priv_data *)enif_priv_data(env); 735 | 736 | /* 737 | * State tuple is of the form: 738 | * 739 | * {{ c, binary }, (binary of coeffs) 740 | * { n, int } (number of stages) 741 | * } 742 | * 743 | */ 744 | if (!enif_get_tuple(env, argv[0], &state_tuple_len, &state_term)) { 745 | printf("dsp: expected state tuple\n"); 746 | return enif_make_badarg(env); 747 | } 748 | if (state_tuple_len > 2) { 749 | printf("dsp: invalid fir_fr16 state tuple\n"); 750 | return enif_make_badarg(env); 751 | } 752 | 753 | /* now go through the state tuple, and validate and extract 754 | * the state */ 755 | 756 | /* 757 | * now try to get the coeffs tuple 758 | */ 759 | if (!enif_get_tuple(env, state_term[0], 760 | &substate_tuple_len, &substate_term)) { 761 | printf("dsp: expected substate tuple\n"); 762 | return enif_make_badarg(env); 763 | } 764 | 765 | /* grab and validate the substate atom */ 766 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 767 | printf("dsp: failed to get state atom\n"); 768 | return enif_make_badarg(env); 769 | } 770 | if (strcmp(atom, "c") != 0) { 771 | printf("dsp: invalid substate atom %s\n", atom); 772 | return enif_make_badarg(env); 773 | } 774 | 775 | /* so far so good, grab the coeffs binary */ 776 | if (!enif_inspect_binary(env, substate_term[1], &c_term)) { 777 | printf("dsp: expected coeffs binary\n"); 778 | return enif_make_badarg(env); 779 | } 780 | 781 | /* 782 | * now try to get the number of stages 783 | */ 784 | if (!enif_get_tuple(env, state_term[1], 785 | &substate_tuple_len, &substate_term)) { 786 | printf("dsp: expected substate tuple\n"); 787 | return enif_make_badarg(env); 788 | } 789 | 790 | /* grab and validate the substate atom */ 791 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 792 | printf("dsp: failed to get state atom\n"); 793 | return enif_make_badarg(env); 794 | } 795 | if (strcmp(atom, "n") != 0) { 796 | printf("dsp: invalid substate atom %s\n", atom); 797 | return enif_make_badarg(env); 798 | } 799 | 800 | if (!enif_get_int(env, substate_term[1], &n)) { 801 | printf("dsp: expected int n term\n"); 802 | return enif_make_badarg(env); 803 | } 804 | 805 | /* set up our resource */ 806 | /* note: we allocate a big enough block of memory to hold the 807 | * state structure, coefficients, and delay line */ 808 | res = enif_alloc_resource(priv->iirdf1_fr16_res_type, 809 | sizeof(struct iirdf1_fr16_nif_res) + 810 | c_term.size + /* length of coeffs vector */ 811 | c_term.size/*dly line same length as coeffs*/); 812 | 813 | /* initialize the state struct ... */ 814 | iirdf1_init(res->state, 815 | (fract16 *)&res->mem[0], /* memory for coeffs vector */ 816 | (fract16 *)&res->mem[c_term.size], /* memory for delay line */ 817 | n); /* number of stages */ 818 | 819 | /* copy in the coefficients and init the delay line */ 820 | memcpy((unsigned char *)res->state.c, c_term.data, c_term.size); 821 | memset((unsigned char *)res->state.d, 0, c_term.size/*same length as coeffs*/); 822 | 823 | /* create the resource for this NIF function */ 824 | res_term = enif_make_resource(env, res); 825 | 826 | /* this tells Erlang to garbage collect the resource when it 827 | * is no longer referenced */ 828 | enif_release_resource(res); 829 | 830 | /* make tuple { ok, Handle } and return it */ 831 | return enif_make_tuple2(env, 832 | enif_make_atom(env, "ok"), 833 | res_term); 834 | } 835 | 836 | /* the iirdf1_fr16_nif interface function */ 837 | static ERL_NIF_TERM iirdf1_fr16_nif(ErlNifEnv *env, 838 | int argc, 839 | const ERL_NIF_TERM argv[]) 840 | { 841 | ErlNifBinary x_i_term; 842 | ErlNifBinary y_o_term; 843 | struct libdsp_priv_data *priv; 844 | struct iirdf1_fr16_nif_res *res; 845 | 846 | /* grab the private data */ 847 | priv = (struct libdsp_priv_data *)enif_priv_data(env); 848 | 849 | /* first arg is the resource handle */ 850 | if (!enif_get_resource(env, 851 | argv[0], 852 | priv->iirdf1_fr16_res_type, (void **)&res)) { 853 | printf("dsp: expected iirdf1_fr16 resource handle as first arg\n"); 854 | return enif_make_badarg(env); 855 | } 856 | 857 | /* second arg is the input data */ 858 | if (!enif_inspect_binary(env, argv[1], &x_i_term)) { 859 | printf("dsp: expected input binary\n"); 860 | return enif_make_badarg(env); 861 | } 862 | 863 | /* next, allocate an output binary for the result data */ 864 | if (!enif_alloc_binary(x_i_term.size, &y_o_term)) { 865 | printf("dsp: failed to allocate output binary\n"); 866 | return enif_make_atom(env, "alloc_failure"); 867 | } 868 | 869 | /* do the computation */ 870 | iirdf1_fr16((fract16 *)x_i_term.data, /* input data */ 871 | (fract16 *)y_o_term.data, /* output data */ 872 | x_i_term.size >> 1, /* # of samples */ 873 | &res->state); /* the filter state */ 874 | 875 | /* all done! */ 876 | return enif_make_binary(env,&y_o_term); 877 | } 878 | 879 | /* little helper function to validate power of two fft_size */ 880 | int is_power_of_two (unsigned int x) 881 | { 882 | return ((x != 0) && !(x & (x - 1))); 883 | } 884 | 885 | /* the rfft_fr16_init_nif interface function */ 886 | static ERL_NIF_TERM rfft_fr16_init_nif(ErlNifEnv *env, 887 | int argc, 888 | const ERL_NIF_TERM argv[]) 889 | { 890 | const ERL_NIF_TERM *state_term; 891 | int state_tuple_len; 892 | char atom[80]; 893 | const ERL_NIF_TERM *substate_term; 894 | int substate_tuple_len; 895 | ERL_NIF_TERM res_term; 896 | ErlNifBinary twiddle_table_term; 897 | int fft_size, twiddle_stride, scale_method; 898 | struct rfft_fr16_nif_res *res; 899 | struct libdsp_priv_data *priv = 900 | (struct libdsp_priv_data *)enif_priv_data(env); 901 | 902 | /* 903 | * State tuple is of the form: 904 | * 905 | * {{ fft_size, int }, 906 | * { twiddle_stride, int }, 907 | * { scale_method, int } 908 | * } 909 | * 910 | */ 911 | if (!enif_get_tuple(env, argv[0], &state_tuple_len, &state_term)) { 912 | printf("dsp: expected state tuple\n"); 913 | return enif_make_badarg(env); 914 | } 915 | if (state_tuple_len > 4) { 916 | printf("dsp: invalid rfft_fr16 state tuple\n"); 917 | return enif_make_badarg(env); 918 | } 919 | 920 | /* now go through the state tuple, and validate and extract 921 | * the state */ 922 | 923 | /* 924 | * now try to get the fft_size tuple 925 | */ 926 | if (!enif_get_tuple(env, state_term[0], 927 | &substate_tuple_len, &substate_term)) { 928 | printf("dsp: expected fft_size substate tuple\n"); 929 | return enif_make_badarg(env); 930 | } 931 | /* grab and validate the substate atom */ 932 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 933 | printf("dsp: failed to get fft_size atom\n"); 934 | return enif_make_badarg(env); 935 | } 936 | if (strcmp(atom, "fft_size") != 0) { 937 | printf("dsp: invalid substate atom %s\n", atom); 938 | return enif_make_badarg(env); 939 | } 940 | /* so far so good, grab the value */ 941 | if (!enif_get_int(env, substate_term[1], &fft_size)) { 942 | printf("dsp: expected fft_size value (int)\n"); 943 | return enif_make_badarg(env); 944 | } 945 | /* check that the fft_size is a sane power of two */ 946 | if (!is_power_of_two(fft_size)) { 947 | printf("dsp: invalid fft_size (%d), must be a power of two!\n", fft_size); 948 | return enif_make_badarg(env); 949 | } 950 | 951 | /* 952 | * now try to get the twiddle_stride tuple 953 | */ 954 | if (!enif_get_tuple(env, state_term[1], 955 | &substate_tuple_len, &substate_term)) { 956 | printf("dsp: expected twiddle_stride substate tuple\n"); 957 | return enif_make_badarg(env); 958 | } 959 | /* grab and validate the substate atom */ 960 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 961 | printf("dsp: failed to get twiddle_stride atom\n"); 962 | return enif_make_badarg(env); 963 | } 964 | if (strcmp(atom, "twiddle_stride") != 0) { 965 | printf("dsp: invalid substate atom %s\n", atom); 966 | return enif_make_badarg(env); 967 | } 968 | if (!enif_get_int(env, substate_term[1], &twiddle_stride)) { 969 | printf("dsp: expected twiddle_stride value (int)\n"); 970 | return enif_make_badarg(env); 971 | } 972 | 973 | /* 974 | * now try to get the scale_method tuple 975 | */ 976 | if (!enif_get_tuple(env, state_term[2], 977 | &substate_tuple_len, &substate_term)) { 978 | printf("dsp: expected scale_method substate tuple\n"); 979 | return enif_make_badarg(env); 980 | } 981 | /* grab and validate the substate atom */ 982 | if (!enif_get_atom(env,substate_term[0],atom,80,ERL_NIF_LATIN1)) { 983 | printf("dsp: failed to get scale_method atom\n"); 984 | return enif_make_badarg(env); 985 | } 986 | if (strcmp(atom, "scale_method") != 0) { 987 | printf("dsp: invalid substate atom %s\n", atom); 988 | return enif_make_badarg(env); 989 | } 990 | if (!enif_get_int(env, substate_term[1], &scale_method)) { 991 | printf("dsp: expected scale_method value (int)\n"); 992 | return enif_make_badarg(env); 993 | } 994 | 995 | /* set up our resource */ 996 | 997 | /* allocate a binary for the twiddle table */ 998 | if (!enif_alloc_binary(sizeof(complex_fract16) * (fft_size/2), &twiddle_table_term)) { 999 | printf("dsp: failed to allocate twiddle table binary\n"); 1000 | return enif_make_atom(env, "alloc_failure"); 1001 | } 1002 | 1003 | /* note: we allocate a big enough block of memory to hold the 1004 | * state structure and twiddle table */ 1005 | res = enif_alloc_resource(priv->rfft_fr16_res_type, 1006 | sizeof(struct rfft_fr16_nif_res) + 1007 | twiddle_table_term.size); 1008 | 1009 | /* initialize the state struct ... */ 1010 | res->fft_size = fft_size; 1011 | res->twiddle_stride = twiddle_stride; 1012 | res->scale_method = scale_method; 1013 | res->twiddle_table = (complex_fract16 *)&res->mem[0]; 1014 | twidfftrad2_fr16(res->twiddle_table, fft_size); 1015 | 1016 | /* create the resource for this NIF function */ 1017 | res_term = enif_make_resource(env, res); 1018 | 1019 | /* this tells Erlang to garbage collect the resource when it 1020 | * is no longer referenced */ 1021 | enif_release_resource(res); 1022 | 1023 | /* make tuple { ok, Handle } and return it */ 1024 | return enif_make_tuple2(env, 1025 | enif_make_atom(env, "ok"), 1026 | res_term); 1027 | } 1028 | 1029 | /* the rfft_fr16_nif interface function */ 1030 | static ERL_NIF_TERM rfft_fr16_nif(ErlNifEnv *env, 1031 | int argc, 1032 | const ERL_NIF_TERM argv[]) 1033 | { 1034 | ErlNifBinary x_i_term; 1035 | ErlNifBinary y_o_term; 1036 | struct libdsp_priv_data *priv; 1037 | struct rfft_fr16_nif_res *res; 1038 | int block_exponent; 1039 | 1040 | /* grab the private data */ 1041 | priv = (struct libdsp_priv_data *)enif_priv_data(env); 1042 | 1043 | /* first arg is the resource handle */ 1044 | if (!enif_get_resource(env, 1045 | argv[0], 1046 | priv->rfft_fr16_res_type, (void **)&res)) { 1047 | printf("dsp: expected rfft_fr16 resource handle as first arg\n"); 1048 | return enif_make_badarg(env); 1049 | } 1050 | 1051 | /* second arg is the input data */ 1052 | if (!enif_inspect_binary(env, argv[1], &x_i_term)) { 1053 | printf("dsp: expected input binary\n"); 1054 | return enif_make_badarg(env); 1055 | } 1056 | /* check that the input is fft_size */ 1057 | if (x_i_term.size != (res->fft_size * sizeof(fract16))) { 1058 | printf("dsp: input length must equal fft_size (%d)\n", res->fft_size); 1059 | return enif_make_badarg(env); 1060 | } 1061 | 1062 | /* next, allocate an output binary for the result data */ 1063 | if (!enif_alloc_binary(res->fft_size * sizeof(complex_fract16), &y_o_term)) { 1064 | printf("dsp: failed to allocate output binary\n"); 1065 | return enif_make_atom(env, "alloc_failure"); 1066 | } 1067 | 1068 | /* do the computation */ 1069 | rfft_fr16((fract16 *)x_i_term.data, /* input data */ 1070 | (complex_fract16 *)y_o_term.data, /* output data */ 1071 | res->twiddle_table, /* the twiddle table */ 1072 | res->twiddle_stride, /* twiddle stride */ 1073 | res->fft_size, /* FFT size */ 1074 | &block_exponent, /* block exponent */ 1075 | res->scale_method); /* scale method */ 1076 | 1077 | /* all done! */ 1078 | return enif_make_tuple2(env, 1079 | enif_make_binary(env,&y_o_term), 1080 | enif_make_int(env, block_exponent)); 1081 | } 1082 | 1083 | /* Autocoherence NIF */ 1084 | static ERL_NIF_TERM autocoh_fr16_nif(ErlNifEnv *env, 1085 | int argc, 1086 | const ERL_NIF_TERM argv[]) 1087 | { 1088 | ErlNifBinary x_i_term; 1089 | ErlNifBinary y_o_term; 1090 | fract16 *x; 1091 | fract16 *y; 1092 | int lags; 1093 | 1094 | /* first arg is vector of fr16 data samples */ 1095 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1096 | printf("dsp: expected input binary\n"); 1097 | return enif_make_badarg(env); 1098 | } 1099 | 1100 | /* secong arg is number of lags */ 1101 | if (!enif_get_int(env, argv[1], &lags)) { 1102 | printf("dsp: expected unsigned integer\n"); 1103 | return enif_make_badarg(env); 1104 | } 1105 | 1106 | /* next, allocate an output binary for the result data (output 1107 | * count is equal to number of lags) */ 1108 | if (!enif_alloc_binary(lags * sizeof(fract16), &y_o_term)) { 1109 | printf("dsp: failed to allocate output binary\n"); 1110 | return enif_make_atom(env, "alloc_failure"); 1111 | } 1112 | 1113 | /* set input and output pointers to binary data buffers */ 1114 | x = (fract16 *)x_i_term.data; 1115 | y = (fract16 *)y_o_term.data; 1116 | 1117 | /* to the computation */ 1118 | autocoh_fr16(x, x_i_term.size >> 1, lags, y); 1119 | 1120 | /* all done! */ 1121 | return enif_make_binary(env, &y_o_term); 1122 | } 1123 | 1124 | /* Cross-coherence NIF */ 1125 | static ERL_NIF_TERM crosscoh_fr16_nif(ErlNifEnv *env, 1126 | int argc, 1127 | const ERL_NIF_TERM argv[]) 1128 | { 1129 | ErlNifBinary x1_i_term; 1130 | ErlNifBinary x2_i_term; 1131 | ErlNifBinary y_o_term; 1132 | fract16 *x1; 1133 | fract16 *x2; 1134 | fract16 *y; 1135 | int lags; 1136 | 1137 | /* first arg is vector of fr16 data samples (X) */ 1138 | if (!enif_inspect_binary(env, argv[0], &x1_i_term)) { 1139 | printf("dsp: expected input binary\n"); 1140 | return enif_make_badarg(env); 1141 | } 1142 | 1143 | /* second arg is vector of fr16 data samples (Y) */ 1144 | if (!enif_inspect_binary(env, argv[1], &x2_i_term)) { 1145 | printf("dsp: expected input binary\n"); 1146 | return enif_make_badarg(env); 1147 | } 1148 | 1149 | if (x1_i_term.size != x2_i_term.size) { 1150 | printf("dsp: both input binaries must be the same length\n"); 1151 | return enif_make_badarg(env); 1152 | } 1153 | 1154 | /* third arg is number of lags */ 1155 | if (!enif_get_int(env, argv[2], &lags)) { 1156 | printf("dsp: expected unsigned integer\n"); 1157 | return enif_make_badarg(env); 1158 | } 1159 | 1160 | /* next, allocate an output binary for the result data (output 1161 | * count is equal to number of lags) */ 1162 | if (!enif_alloc_binary(lags * sizeof(fract16), &y_o_term)) { 1163 | printf("dsp: failed to allocate output binary\n"); 1164 | return enif_make_atom(env, "alloc_failure"); 1165 | } 1166 | 1167 | /* set input and output pointers to binary data buffers */ 1168 | x1 = (fract16 *)x1_i_term.data; 1169 | x2 = (fract16 *)x2_i_term.data; 1170 | y = (fract16 *)y_o_term.data; 1171 | 1172 | /* to the computation */ 1173 | crosscoh_fr16(x1, x2, x1_i_term.size >> 1, lags, y); 1174 | 1175 | /* all done! */ 1176 | return enif_make_binary(env, &y_o_term); 1177 | } 1178 | 1179 | /* Autocorrelation NIF */ 1180 | static ERL_NIF_TERM autocorr_fr16_nif(ErlNifEnv *env, 1181 | int argc, 1182 | const ERL_NIF_TERM argv[]) 1183 | { 1184 | ErlNifBinary x_i_term; 1185 | ErlNifBinary y_o_term; 1186 | fract16 *x; 1187 | fract16 *y; 1188 | int lags; 1189 | 1190 | /* first arg is vector of fr16 data samples */ 1191 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1192 | printf("dsp: expected input binary\n"); 1193 | return enif_make_badarg(env); 1194 | } 1195 | 1196 | /* secong arg is number of lags */ 1197 | if (!enif_get_int(env, argv[1], &lags)) { 1198 | printf("dsp: expected unsigned integer\n"); 1199 | return enif_make_badarg(env); 1200 | } 1201 | 1202 | /* next, allocate an output binary for the result data (output 1203 | * count is equal to number of lags) */ 1204 | if (!enif_alloc_binary(lags * sizeof(fract16), &y_o_term)) { 1205 | printf("dsp: failed to allocate output binary\n"); 1206 | return enif_make_atom(env, "alloc_failure"); 1207 | } 1208 | 1209 | /* set input and output pointers to binary data buffers */ 1210 | x = (fract16 *)x_i_term.data; 1211 | y = (fract16 *)y_o_term.data; 1212 | 1213 | /* to the computation */ 1214 | autocorr_fr16(x, x_i_term.size >> 1, lags, y); 1215 | 1216 | /* all done! */ 1217 | return enif_make_binary(env, &y_o_term); 1218 | } 1219 | 1220 | /* Cross-correlation NIF */ 1221 | static ERL_NIF_TERM crosscorr_fr16_nif(ErlNifEnv *env, 1222 | int argc, 1223 | const ERL_NIF_TERM argv[]) 1224 | { 1225 | ErlNifBinary x1_i_term; 1226 | ErlNifBinary x2_i_term; 1227 | ErlNifBinary y_o_term; 1228 | fract16 *x1; 1229 | fract16 *x2; 1230 | fract16 *y; 1231 | int lags; 1232 | 1233 | /* first arg is vector of fr16 data samples (X) */ 1234 | if (!enif_inspect_binary(env, argv[0], &x1_i_term)) { 1235 | printf("dsp: expected input binary\n"); 1236 | return enif_make_badarg(env); 1237 | } 1238 | 1239 | /* second arg is vector of fr16 data samples (Y) */ 1240 | if (!enif_inspect_binary(env, argv[1], &x2_i_term)) { 1241 | printf("dsp: expected input binary\n"); 1242 | return enif_make_badarg(env); 1243 | } 1244 | 1245 | if (x1_i_term.size != x2_i_term.size) { 1246 | printf("dsp: both input binaries must be the same length\n"); 1247 | return enif_make_badarg(env); 1248 | } 1249 | 1250 | /* third arg is number of lags */ 1251 | if (!enif_get_int(env, argv[2], &lags)) { 1252 | printf("dsp: expected unsigned integer\n"); 1253 | return enif_make_badarg(env); 1254 | } 1255 | 1256 | /* next, allocate an output binary for the result data (output 1257 | * count is equal to number of lags) */ 1258 | if (!enif_alloc_binary(lags * sizeof(fract16), &y_o_term)) { 1259 | printf("dsp: failed to allocate output binary\n"); 1260 | return enif_make_atom(env, "alloc_failure"); 1261 | } 1262 | 1263 | /* set input and output pointers to binary data buffers */ 1264 | x1 = (fract16 *)x1_i_term.data; 1265 | x2 = (fract16 *)x2_i_term.data; 1266 | y = (fract16 *)y_o_term.data; 1267 | 1268 | /* to the computation */ 1269 | crosscorr_fr16(x1, x2, x1_i_term.size >> 1, lags, y); 1270 | 1271 | /* all done! */ 1272 | return enif_make_binary(env, &y_o_term); 1273 | } 1274 | 1275 | /* the histogram_fr16_nif interface function */ 1276 | /* NOTE: the output vector is a binary of uint32_t values, not fract16!! */ 1277 | static ERL_NIF_TERM histogram_fr16_nif(ErlNifEnv *env, 1278 | int argc, 1279 | const ERL_NIF_TERM argv[]) 1280 | { 1281 | ErlNifBinary x_i_term; 1282 | ErlNifBinary y_o_term; 1283 | fract16 min_sample, max_sample; 1284 | int bin_count; 1285 | 1286 | /* first arg is vector of fract16s */ 1287 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1288 | printf("dsp: expected input binary\n"); 1289 | return enif_make_badarg(env); 1290 | } 1291 | 1292 | /* second arg is max sample value */ 1293 | if (!enif_get_int(env, argv[1], (int *)&max_sample)) { 1294 | printf("dsp: expected integer\n"); 1295 | return enif_make_badarg(env); 1296 | } 1297 | 1298 | /* arg 3 is min sample value */ 1299 | if (!enif_get_int(env, argv[2], (int *)&min_sample)) { 1300 | printf("dsp: expected integer\n"); 1301 | return enif_make_badarg(env); 1302 | } 1303 | 1304 | /* arg 4 is bin count */ 1305 | if (!enif_get_int(env, argv[3], &bin_count)) { 1306 | printf("dsp: expected integer\n"); 1307 | return enif_make_badarg(env); 1308 | } 1309 | 1310 | /* next, allocate an output binary for the result data */ 1311 | if (!enif_alloc_binary(bin_count * sizeof(int), &y_o_term)) { 1312 | printf("dsp: failed to allocate output binary\n"); 1313 | return enif_make_atom(env, "alloc_failure"); 1314 | } 1315 | 1316 | /* do the computation */ 1317 | histogram_fr16((fract16 *)x_i_term.data, /* input vec */ 1318 | (int *)y_o_term.data, /* output vec */ 1319 | max_sample, 1320 | min_sample, 1321 | x_i_term.size >> 1, /* sample_length */ 1322 | bin_count); 1323 | 1324 | /* all done! */ 1325 | return enif_make_binary(env, &y_o_term); 1326 | } 1327 | 1328 | 1329 | /* the vecvmlt_fr16_nif interface function */ 1330 | static ERL_NIF_TERM vecvmlt_fr16_nif(ErlNifEnv *env, 1331 | int argc, 1332 | const ERL_NIF_TERM argv[]) 1333 | { 1334 | ErlNifBinary xa_i_term; 1335 | ErlNifBinary xb_i_term; 1336 | ErlNifBinary y_o_term; 1337 | 1338 | /* first arg is vector A of fract16s */ 1339 | if (!enif_inspect_binary(env, argv[0], &xa_i_term)) { 1340 | printf("dsp: expected input binary\n"); 1341 | return enif_make_badarg(env); 1342 | } 1343 | 1344 | /* second arg is vector B of fract16s */ 1345 | if (!enif_inspect_binary(env, argv[1], &xb_i_term)) { 1346 | printf("dsp: expected input binary\n"); 1347 | return enif_make_badarg(env); 1348 | } 1349 | 1350 | /* sanity check on lengths */ 1351 | if (xa_i_term.size != xb_i_term.size) { 1352 | printf("dsp: vector lengths must be equal!\n"); 1353 | return enif_make_badarg(env); 1354 | } 1355 | 1356 | /* next, allocate an output binary for the result data (output 1357 | * length equals length of input vectors) */ 1358 | if (!enif_alloc_binary(xa_i_term.size, &y_o_term)) { 1359 | printf("dsp: failed to allocate output binary\n"); 1360 | return enif_make_atom(env, "alloc_failure"); 1361 | } 1362 | 1363 | /* do the computation */ 1364 | vecvmlt_fr16((fract16 *)xa_i_term.data, /* input vec A */ 1365 | (fract16 *)xb_i_term.data, /* input vec B */ 1366 | (fract16 *)y_o_term.data, /* output vec */ 1367 | xa_i_term.size/sizeof(fract16));/* vector len */ 1368 | 1369 | /* all done! */ 1370 | return enif_make_binary(env, &y_o_term); 1371 | } 1372 | 1373 | /* the vecdot_fr1x32_nif interface function */ 1374 | static ERL_NIF_TERM vecdot_fr1x32_nif(ErlNifEnv *env, 1375 | int argc, 1376 | const ERL_NIF_TERM argv[]) 1377 | { 1378 | ErlNifBinary xa_i_term; 1379 | ErlNifBinary xb_i_term; 1380 | ErlNifBinary y_o_term; 1381 | fract32 *py; 1382 | unsigned long addr; 1383 | 1384 | /* first arg is vector A of fract16s */ 1385 | if (!enif_inspect_binary(env, argv[0], &xa_i_term)) { 1386 | printf("dsp: expected input binary\n"); 1387 | return enif_make_badarg(env); 1388 | } 1389 | 1390 | /* second arg is vector B of fract16s */ 1391 | if (!enif_inspect_binary(env, argv[1], &xb_i_term)) { 1392 | printf("dsp: expected input binary\n"); 1393 | return enif_make_badarg(env); 1394 | } 1395 | 1396 | /* sanity check on lengths */ 1397 | if (xa_i_term.size != xb_i_term.size) { 1398 | printf("dsp: vector lengths must be equal!\n"); 1399 | return enif_make_badarg(env); 1400 | } 1401 | 1402 | /* next, allocate an output binary for the result data (single 32-bit binary) */ 1403 | if (!enif_alloc_binary(sizeof(fract32), &y_o_term)) { 1404 | printf("dsp: failed to allocate output binary\n"); 1405 | return enif_make_atom(env, "alloc_failure"); 1406 | } 1407 | py = (fract32 *)y_o_term.data; 1408 | 1409 | addr = (unsigned long)xa_i_term.data; 1410 | if (addr & 3) return enif_make_atom(env, "input_a_mem_unaligned"); 1411 | 1412 | addr = (unsigned long)xb_i_term.data; 1413 | if (addr & 3) return enif_make_atom(env, "input_b_mem_unaligned"); 1414 | 1415 | addr = (unsigned long)y_o_term.data; 1416 | if (addr & 3) return enif_make_atom(env, "output_y_mem_unaligned"); 1417 | 1418 | /* do the computation */ 1419 | *py = vecdot_fr1x32((fract16 *)xa_i_term.data, /* input vec A */ 1420 | (fract16 *)xb_i_term.data, /* input vec B */ 1421 | xa_i_term.size >> 1); /* vector len */ 1422 | 1423 | /* all done! */ 1424 | return enif_make_binary(env, &y_o_term); 1425 | } 1426 | 1427 | /* the vecdot_fr16_sr_nif interface function */ 1428 | static ERL_NIF_TERM vecdot_fr16_sr_nif(ErlNifEnv *env, 1429 | int argc, 1430 | const ERL_NIF_TERM argv[]) 1431 | { 1432 | ErlNifBinary xa_i_term; 1433 | ErlNifBinary xb_i_term; 1434 | ErlNifBinary y_o_term; 1435 | fract16 *py; 1436 | unsigned int srnd; 1437 | unsigned long addr; 1438 | 1439 | /* first arg is vector A of fract16s */ 1440 | if (!enif_inspect_binary(env, argv[0], &xa_i_term)) { 1441 | printf("dsp: expected input binary\n"); 1442 | return enif_make_badarg(env); 1443 | } 1444 | 1445 | /* second arg is vector B of fract16s */ 1446 | if (!enif_inspect_binary(env, argv[1], &xb_i_term)) { 1447 | printf("dsp: expected input binary\n"); 1448 | return enif_make_badarg(env); 1449 | } 1450 | 1451 | /* third arg is a random integer */ 1452 | if (!enif_get_uint(env, argv[2], &srnd)) { 1453 | printf("dsp: expected integer\n"); 1454 | return enif_make_badarg(env); 1455 | } 1456 | 1457 | /* sanity check on lengths */ 1458 | if (xa_i_term.size != xb_i_term.size) { 1459 | printf("dsp: vector lengths must be equal!\n"); 1460 | return enif_make_badarg(env); 1461 | } 1462 | 1463 | /* next, allocate an output binary for the result data (single 32-bit binary) */ 1464 | if (!enif_alloc_binary(sizeof(fract16), &y_o_term)) { 1465 | printf("dsp: failed to allocate output binary\n"); 1466 | return enif_make_atom(env, "alloc_failure"); 1467 | } 1468 | py = (fract16 *)y_o_term.data; 1469 | 1470 | addr = (unsigned long)xa_i_term.data; 1471 | if (addr & 3) return enif_make_atom(env, "input_a_mem_unaligned"); 1472 | 1473 | addr = (unsigned long)xb_i_term.data; 1474 | if (addr & 3) return enif_make_atom(env, "input_b_mem_unaligned"); 1475 | 1476 | addr = (unsigned long)y_o_term.data; 1477 | if (addr & 3) return enif_make_atom(env, "output_y_mem_unaligned"); 1478 | 1479 | /* do the computation */ 1480 | *py = vecdot_fr16_sr((fract16 *)xa_i_term.data, /* input vec A */ 1481 | (fract16 *)xb_i_term.data, /* input vec B */ 1482 | xa_i_term.size >> 1, /* vector len */ 1483 | srnd); /* random number for rounding */ 1484 | 1485 | /* all done! */ 1486 | return enif_make_binary(env, &y_o_term); 1487 | } 1488 | 1489 | 1490 | /* the cabs_fr16_nif interface function */ 1491 | /* NOTE: this function supports single or vector arguments (i.e., the 1492 | * binary can contain a single sample, or a be an entire vector of 1493 | * samples */ 1494 | static ERL_NIF_TERM cabs_fr16_nif(ErlNifEnv *env, 1495 | int argc, 1496 | const ERL_NIF_TERM argv[]) 1497 | { 1498 | ErlNifBinary x_i_term; 1499 | ErlNifBinary y_o_term; 1500 | complex_fract16 *x; 1501 | fract16 *y; 1502 | int length; 1503 | int i; 1504 | 1505 | /* struct libdsp_priv_data *priv; */ 1506 | 1507 | /* first arg is vector of complex_fract16s */ 1508 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1509 | printf("dsp: expected input binary\n"); 1510 | return enif_make_badarg(env); 1511 | } 1512 | 1513 | /* compute complex_fract16 vector length */ 1514 | length = x_i_term.size / sizeof(complex_fract16); 1515 | 1516 | /* next, allocate an output binary for the result data (output 1517 | * count is equal to input count, but size of elements is 1518 | * fract16 */ 1519 | if (!enif_alloc_binary(length * sizeof(fract16), &y_o_term)) { 1520 | printf("dsp: failed to allocate output binary\n"); 1521 | return enif_make_atom(env, "alloc_failure"); 1522 | } 1523 | 1524 | /* set input and output pointers to binary data buffers */ 1525 | x = (complex_fract16 *)x_i_term.data; 1526 | y = (fract16 *)y_o_term.data; 1527 | 1528 | /* do the computation */ 1529 | for (i = 0; i < length; i++) { 1530 | y[i] = cabs_fr16(x[i]); 1531 | } 1532 | 1533 | /* all done! */ 1534 | return enif_make_binary(env, &y_o_term); 1535 | } 1536 | 1537 | /* the gen_hanning_fr16_nif interface function */ 1538 | static ERL_NIF_TERM gen_hanning_fr16_nif(ErlNifEnv *env, 1539 | int argc, 1540 | const ERL_NIF_TERM argv[]) 1541 | { 1542 | ErlNifBinary y_o_term; 1543 | int stride; 1544 | int size; 1545 | 1546 | /* struct libdsp_priv_data *priv; */ 1547 | 1548 | if (!enif_get_int(env, argv[0], &stride)) { 1549 | printf("dsp: expected window stride value (int)\n"); 1550 | return enif_make_badarg(env); 1551 | } 1552 | 1553 | if (!enif_get_int(env, argv[1], &size)) { 1554 | printf("dsp: expected window size value (int)\n"); 1555 | return enif_make_badarg(env); 1556 | } 1557 | 1558 | /* next, allocate an output binary for the result data */ 1559 | if (!enif_alloc_binary(stride * size * sizeof(fract16), &y_o_term)) { 1560 | printf("dsp: failed to allocate output binary\n"); 1561 | return enif_make_atom(env, "alloc_failure"); 1562 | } 1563 | 1564 | /* generate the window */ 1565 | gen_hanning_fr16((fract16 *)y_o_term.data, stride, size); 1566 | 1567 | /* all done! */ 1568 | return enif_make_binary(env, &y_o_term); 1569 | } 1570 | 1571 | /* the mean_fr16_nif interface function */ 1572 | static ERL_NIF_TERM mean_fr16_nif(ErlNifEnv *env, 1573 | int argc, 1574 | const ERL_NIF_TERM argv[]) 1575 | { 1576 | ErlNifBinary x_i_term; 1577 | fract16 *x; 1578 | fract16 y; 1579 | 1580 | /* first arg is vector of fract16s */ 1581 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1582 | printf("dsp: expected input binary\n"); 1583 | return enif_make_badarg(env); 1584 | } 1585 | 1586 | /* set input and output pointers to binary data buffers */ 1587 | x = (fract16 *)x_i_term.data; 1588 | 1589 | /* do the computation */ 1590 | y = mean_fr16(x, x_i_term.size >> 1); 1591 | 1592 | /* all done! */ 1593 | return enif_make_int(env, y); 1594 | } 1595 | 1596 | /* the var_fr16_nif interface function */ 1597 | static ERL_NIF_TERM var_fr16_nif(ErlNifEnv *env, 1598 | int argc, 1599 | const ERL_NIF_TERM argv[]) 1600 | { 1601 | ErlNifBinary x_i_term; 1602 | fract16 *x; 1603 | fract16 y; 1604 | 1605 | /* first arg is vector of fract16s */ 1606 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1607 | printf("dsp: expected input binary\n"); 1608 | return enif_make_badarg(env); 1609 | } 1610 | 1611 | /* set input and output pointers to binary data buffers */ 1612 | x = (fract16 *)x_i_term.data; 1613 | 1614 | /* do the computation */ 1615 | y = var_fr16(x, x_i_term.size >> 1); 1616 | 1617 | /* all done! */ 1618 | return enif_make_int(env, y); 1619 | } 1620 | 1621 | 1622 | /* the min_fr16_nif interface function */ 1623 | /* NOTE: this function supports single or vector arguments (i.e., the 1624 | * binary can contain a single sample, or a be a vector of samples */ 1625 | static ERL_NIF_TERM min_fr16_nif(ErlNifEnv *env, 1626 | int argc, 1627 | const ERL_NIF_TERM argv[]) 1628 | { 1629 | ErlNifBinary x_i_term; 1630 | fract16 *x; 1631 | fract16 min; 1632 | int i; 1633 | 1634 | /* first arg is vector of fract16s */ 1635 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1636 | printf("dsp: expected input binary\n"); 1637 | return enif_make_badarg(env); 1638 | } 1639 | 1640 | /* set input pointer to binary data buffer */ 1641 | x = (fract16 *)x_i_term.data; 1642 | 1643 | /* do the computation */ 1644 | min = 0x7fff; 1645 | for (i = 0; i < (x_i_term.size >> 1); i++) { 1646 | min = min_fr16(min, x[i]); 1647 | } 1648 | 1649 | /* all done! */ 1650 | return enif_make_int(env, (int)min); 1651 | } 1652 | 1653 | /* the max_fr16_nif interface function */ 1654 | /* NOTE: this function supports single or vector arguments (i.e., the 1655 | * binary can contain a single sample, or a be a vector of samples */ 1656 | static ERL_NIF_TERM max_fr16_nif(ErlNifEnv *env, 1657 | int argc, 1658 | const ERL_NIF_TERM argv[]) 1659 | { 1660 | ErlNifBinary x_i_term; 1661 | fract16 *x; 1662 | fract16 max; 1663 | int i; 1664 | 1665 | /* first arg is vector of fract16s */ 1666 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1667 | printf("dsp: expected input binary\n"); 1668 | return enif_make_badarg(env); 1669 | } 1670 | 1671 | /* set input pointer to binary data buffer */ 1672 | x = (fract16 *)x_i_term.data; 1673 | 1674 | /* do the computation */ 1675 | max = 0x8000; 1676 | for (i = 0; i < (x_i_term.size >> 1); i++) { 1677 | max = max_fr16(max, x[i]); 1678 | } 1679 | 1680 | /* all done! */ 1681 | return enif_make_int(env, (int)max); 1682 | } 1683 | 1684 | /* the interleave_nif interface function */ 1685 | /* this NIF provides for interleaving of multiple binaries given 1686 | * chunk_size number of bytes */ 1687 | static ERL_NIF_TERM interleave_nif(ErlNifEnv *env, 1688 | int argc, 1689 | const ERL_NIF_TERM argv[]) 1690 | { 1691 | ErlNifBinary y_o_term; 1692 | ErlNifBinary *x_i_term; 1693 | ERL_NIF_TERM head, tail; 1694 | unsigned int bin_count; 1695 | int bin_len; 1696 | int chunk_size; 1697 | int cidx, bidx, iidx; 1698 | 1699 | /* first arg is a list of binaries */ 1700 | if (!enif_get_list_length(env, argv[0], &bin_count)) { 1701 | fprintf(stderr, "dsp: expected list\n"); 1702 | return enif_make_badarg(env); 1703 | } 1704 | if (bin_count == 0) { 1705 | fprintf(stderr, "dsp: expected non-empty list\n"); 1706 | return enif_make_badarg(env); 1707 | } 1708 | 1709 | /* allocate array of binaries */ 1710 | x_i_term = enif_alloc(sizeof(ErlNifBinary) * bin_count); 1711 | if (x_i_term == NULL) { 1712 | fprintf(stderr, "dsp: failed to allocate memory\n"); 1713 | return mk_atom(env, "alloc_failed"); 1714 | } 1715 | 1716 | /* go through list and grab each binary */ 1717 | 1718 | /* get first element */ 1719 | if (!enif_get_list_cell(env, argv[0], &head, &tail)) { 1720 | fprintf(stderr, "dsp: expected valid list\n"); 1721 | enif_free(x_i_term); 1722 | return enif_make_badarg(env); 1723 | } 1724 | cidx = 0; 1725 | bin_len = 0; 1726 | do { 1727 | /* grab the binary */ 1728 | if (!enif_inspect_binary(env, head, &x_i_term[cidx])) { 1729 | printf("dsp: expected input binary\n"); 1730 | enif_free(x_i_term); 1731 | return enif_make_badarg(env); 1732 | } 1733 | /* check that it isn't empty */ 1734 | if (x_i_term[cidx].size == 0) { 1735 | printf("dsp: expected non empty binary\n"); 1736 | enif_free(x_i_term); 1737 | return enif_make_badarg(env); 1738 | } 1739 | 1740 | /* grab first binary size or check against the next */ 1741 | if (cidx == 0) { 1742 | bin_len = x_i_term[cidx].size; 1743 | } else if (bin_len != x_i_term[cidx].size) { 1744 | printf("dsp: all binaries must be the same length\n"); 1745 | enif_free(x_i_term); 1746 | return enif_make_badarg(env); 1747 | } 1748 | 1749 | /* increment our index */ 1750 | cidx++; 1751 | 1752 | /* get next element */ 1753 | } while(enif_get_list_cell(env, tail, &head, &tail)); 1754 | 1755 | /* second arg is chunk size */ 1756 | if (!enif_get_int(env, argv[1], &chunk_size)) { 1757 | printf("dsp: expected integer\n"); 1758 | enif_free(x_i_term); 1759 | return enif_make_badarg(env); 1760 | } 1761 | 1762 | /* check that binary size is a multiple of the chunk size */ 1763 | if (bin_len % chunk_size) { 1764 | printf("dsp: binary must be a multiple of the chunk size\n"); 1765 | enif_free(x_i_term); 1766 | return enif_make_badarg(env); 1767 | } 1768 | 1769 | /* allocate a new binary which is bin_count * bin_len */ 1770 | if (!enif_alloc_binary(bin_count * bin_len, &y_o_term)) { 1771 | fprintf(stderr, "dsp: failed to allocate binary\n"); 1772 | enif_free(x_i_term); 1773 | return mk_atom(env, "alloc_failed"); 1774 | } 1775 | 1776 | /* loop through binaries memcpying in the chunksizes (with 1777 | * common index, which we bump at end of the loop body) */ 1778 | iidx = 0; 1779 | for (cidx = 0; cidx < bin_len; cidx+=chunk_size) { 1780 | for (bidx = 0; bidx < bin_count; bidx++) { 1781 | //printf("copying %d bytes from bin%d:%d to index %d\n", chunk_size, bidx, cidx, iidx); 1782 | memcpy(&y_o_term.data[iidx], &x_i_term[bidx].data[cidx], chunk_size); 1783 | iidx+=chunk_size; 1784 | } 1785 | } 1786 | 1787 | /* free memory allocated */ 1788 | enif_free(x_i_term); 1789 | 1790 | /* all done! */ 1791 | return enif_make_binary(env, &y_o_term); 1792 | } 1793 | 1794 | /* the deinterleave_nif interface function */ 1795 | /* this NIF provides for deinterleaving of an input binary into 1796 | * multiple binaries given chunk_size number of bytes */ 1797 | static ERL_NIF_TERM deinterleave_nif(ErlNifEnv *env, 1798 | int argc, 1799 | const ERL_NIF_TERM argv[]) 1800 | { 1801 | unsigned char **y_o_bin; 1802 | ErlNifBinary x_i_term; 1803 | ERL_NIF_TERM *y_o_term; 1804 | ERL_NIF_TERM head; 1805 | int bin_count; 1806 | int bin_len; 1807 | int chunk_size; 1808 | int cidx, bidx, iidx; 1809 | 1810 | /* grab input binary */ 1811 | if (!enif_inspect_binary(env, argv[0], &x_i_term)) { 1812 | printf("dsp: expected binary\n"); 1813 | return enif_make_badarg(env); 1814 | } 1815 | 1816 | /* grab bin_count */ 1817 | if (!enif_get_int(env, argv[1], &bin_count)) { 1818 | printf("dsp: expected integer\n"); 1819 | return enif_make_badarg(env); 1820 | } 1821 | 1822 | /* grab chunk_size */ 1823 | if (!enif_get_int(env, argv[2], &chunk_size)) { 1824 | printf("dsp: expected integer\n"); 1825 | return enif_make_badarg(env); 1826 | } 1827 | 1828 | /* check that input length is a multiple of the chunk_size * 1829 | * bin_count */ 1830 | if (x_i_term.size % (chunk_size * bin_count)) { 1831 | printf("dsp: input must be a multiple of the chunk size * bin_count\n"); 1832 | return enif_make_badarg(env); 1833 | } 1834 | 1835 | /* allocate array of bin_count binary data pointers */ 1836 | bin_len = x_i_term.size / bin_count; 1837 | y_o_bin = (unsigned char **)enif_alloc(sizeof(unsigned char *) * bin_count); 1838 | if (y_o_bin == NULL) { 1839 | fprintf(stderr, "dsp: failed to allocate memory\n"); 1840 | return mk_atom(env, "alloc_failed"); 1841 | } 1842 | 1843 | y_o_term = enif_alloc(sizeof(ERL_NIF_TERM) * bin_count); 1844 | if (y_o_term == NULL) { 1845 | fprintf(stderr, "dsp: failed to allocate memory\n"); 1846 | enif_free(y_o_bin); 1847 | return mk_atom(env, "alloc_failed"); 1848 | } 1849 | 1850 | for (bidx = 0; bidx < bin_count; bidx++) { 1851 | y_o_bin[bidx] = enif_make_new_binary(env, bin_len, &y_o_term[bidx]); 1852 | if (y_o_bin[bidx] == NULL) { 1853 | fprintf(stderr, "dsp: failed to allocate binary\n"); 1854 | enif_free(y_o_bin); 1855 | return mk_atom(env, "alloc_failed"); 1856 | } 1857 | } 1858 | 1859 | /* copy in data from input binary into output binaries */ 1860 | cidx = 0; 1861 | for (iidx = 0; iidx < x_i_term.size;) { 1862 | for (bidx = 0; bidx < bin_count; bidx++) { 1863 | memcpy(&y_o_bin[bidx][cidx], &x_i_term.data[iidx], chunk_size); 1864 | iidx+=chunk_size; 1865 | } 1866 | cidx+=chunk_size; 1867 | } 1868 | 1869 | /* build list of the output binaries */ 1870 | head = enif_make_list_from_array(env, y_o_term, bin_count); 1871 | 1872 | /* free the pointer arrays */ 1873 | enif_free(y_o_bin); 1874 | enif_free(y_o_term); 1875 | 1876 | /* return the list */ 1877 | return head; 1878 | } 1879 | 1880 | /* XXX TODO implement reload() and upgrade() */ 1881 | 1882 | /* Loads the NIF module and initializes private data */ 1883 | static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) 1884 | { 1885 | struct libdsp_priv_data *priv; 1886 | 1887 | if (!(priv = enif_alloc(sizeof(struct libdsp_priv_data)))) { 1888 | printf("dsp: failed to allocate private data!\n"); 1889 | return -1; 1890 | } 1891 | 1892 | /* create resources */ 1893 | priv->fir_fr16_res_type = 1894 | enif_open_resource_type(env, 1895 | NULL/*module name not used*/, 1896 | "fir_fr16_state", 1897 | NULL /*no DTOR needed*/, 1898 | ERL_NIF_RT_CREATE, 1899 | NULL/*no needed*/); 1900 | if (!priv->fir_fr16_res_type) { 1901 | printf("dsp: failed to open fir_fr16 resource type\n"); 1902 | return -1; 1903 | } 1904 | 1905 | priv->iirdf1_fr16_res_type = 1906 | enif_open_resource_type(env, 1907 | NULL/*module name not used*/, 1908 | "iirdf1_fr16_state", 1909 | NULL /*no DTOR needed*/, 1910 | ERL_NIF_RT_CREATE, 1911 | NULL/*no needed*/); 1912 | if (!priv->iirdf1_fr16_res_type) { 1913 | printf("dsp: failed to open iirdf1_fr16 resource type\n"); 1914 | return -1; 1915 | } 1916 | 1917 | priv->rfft_fr16_res_type = 1918 | enif_open_resource_type(env, 1919 | NULL/*module name not used*/, 1920 | "rfft_fr16_state", 1921 | NULL /*no DTOR needed*/, 1922 | ERL_NIF_RT_CREATE, 1923 | NULL/*no needed*/); 1924 | if (!priv->rfft_fr16_res_type) { 1925 | printf("dsp: failed to open rfft_fr16 resource type\n"); 1926 | return -1; 1927 | } 1928 | 1929 | *priv_data = priv; 1930 | 1931 | return 0; 1932 | } 1933 | 1934 | static void unload(ErlNifEnv *env, void *priv_data) 1935 | { 1936 | struct libdsp_priv_data *priv = 1937 | (struct libdsp_priv_data *)priv_data; 1938 | 1939 | /* free the private data */ 1940 | enif_free(priv); 1941 | } 1942 | 1943 | static ErlNifFunc nif_funcs[] = { 1944 | { "fir_fr16_init", 1, fir_fr16_init_nif }, 1945 | { "fir_fr16", 2, fir_fr16_nif }, 1946 | { "coeff_iirdf1_fr16", 2, coeff_iirdf1_fr16_nif }, 1947 | { "iirdf1_fr16_init", 1, iirdf1_fr16_init_nif }, 1948 | { "iirdf1_fr16", 2, iirdf1_fr16_nif }, 1949 | { "rfft_fr16_init", 1, rfft_fr16_init_nif }, 1950 | { "rfft_fr16", 2, rfft_fr16_nif }, 1951 | { "vecvmlt_fr16", 2, vecvmlt_fr16_nif }, 1952 | { "vecdot_fr1x32", 2, vecdot_fr1x32_nif }, 1953 | { "vecdot_fr16_sr", 3, vecdot_fr16_sr_nif }, 1954 | { "cabs_fr16", 1, cabs_fr16_nif }, 1955 | { "gen_hanning_fr16", 2, gen_hanning_fr16_nif }, 1956 | { "autocoh_fr16", 2, autocoh_fr16_nif }, 1957 | { "crosscoh_fr16", 3, crosscoh_fr16_nif }, 1958 | { "autocorr_fr16", 2, autocorr_fr16_nif }, 1959 | { "crosscorr_fr16", 3, crosscorr_fr16_nif }, 1960 | { "histogram_fr16", 4, histogram_fr16_nif }, 1961 | { "mean_fr16", 1, mean_fr16_nif }, 1962 | { "var_fr16", 1, var_fr16_nif }, 1963 | { "max_fr16", 1, max_fr16_nif }, 1964 | { "min_fr16", 1, min_fr16_nif }, 1965 | { "interleave", 2, interleave_nif }, 1966 | { "deinterleave", 3, deinterleave_nif }, 1967 | }; 1968 | 1969 | ERL_NIF_INIT(dsp, nif_funcs, load, NULL, NULL, unload) 1970 | 1971 | /* dsp.c ends here */ 1972 | --------------------------------------------------------------------------------