├── README.md ├── hardware ├── Readme.txt ├── board.pdf ├── docs │ └── ML2722_Datasheet.pdf └── schematic.pdf └── uat-decode ├── LICENSE.txt ├── Makefile ├── README.txt ├── decode.c ├── dlac.h ├── example_output ├── UAT_datastream ├── big_radar.png └── small_radar.png ├── fec └── fec-3.0 │ ├── INSTALL │ ├── README │ ├── ccsds.h │ ├── char.h │ ├── config.guess │ ├── config.h │ ├── config.h.in │ ├── config.log │ ├── config.status │ ├── config.sub │ ├── configure │ ├── configure.in │ ├── cpu_features.s │ ├── cpu_mode_ppc.c │ ├── cpu_mode_x86.c │ ├── decode_rs.c │ ├── decode_rs.h │ ├── decode_rs_8.c │ ├── decode_rs_ccsds.c │ ├── decode_rs_char.c │ ├── decode_rs_int.c │ ├── dotprod.c │ ├── dotprod.c~ │ ├── dotprod.h │ ├── dotprod_av.c │ ├── dotprod_mmx.c │ ├── dotprod_mmx_assist.s │ ├── dotprod_port.c │ ├── dotprod_sse2.c │ ├── dotprod_sse2_assist.s │ ├── dsp.3 │ ├── dtest.c │ ├── encode_rs.c │ ├── encode_rs.h │ ├── encode_rs_8.c │ ├── encode_rs_av.c │ ├── encode_rs_ccsds.c │ ├── encode_rs_char.c │ ├── encode_rs_int.c │ ├── exercise.c │ ├── fec.c │ ├── fec.h │ ├── fec.h~ │ ├── fixed.h │ ├── gen_ccsds.c │ ├── gen_ccsds_tal.c │ ├── init_rs.c │ ├── init_rs.h │ ├── init_rs_char.c │ ├── init_rs_int.c │ ├── install-sh │ ├── int.h │ ├── lesser.txt │ ├── makefile │ ├── makefile.in │ ├── makefile.in~ │ ├── makefile~ │ ├── mmxbfly27.s │ ├── mmxbfly29.s │ ├── peak_mmx_assist.s │ ├── peak_sse2_assist.s │ ├── peak_sse_assist.s │ ├── peaktest.c │ ├── peakval.c │ ├── peakval_av.c │ ├── peakval_mmx.c │ ├── peakval_mmx_assist.s │ ├── peakval_port.c │ ├── peakval_sse.c │ ├── peakval_sse2.c │ ├── peakval_sse2_assist.s │ ├── peakval_sse_assist.s │ ├── rs-common.h │ ├── rs.3 │ ├── rs_speedtest.c │ ├── rstest.c │ ├── sim.c │ ├── simd-viterbi.3 │ ├── sqtest.c │ ├── sse2bfly27.s │ ├── sse2bfly29.s │ ├── ssebfly27.s │ ├── ssebfly29.s │ ├── sumsq.c │ ├── sumsq_av.c │ ├── sumsq_mmx.c │ ├── sumsq_mmx_assist.s │ ├── sumsq_port.c │ ├── sumsq_sse2.c │ ├── sumsq_sse2_assist.s │ ├── sumsq_test.c │ ├── viterbi27.c │ ├── viterbi27_av.c │ ├── viterbi27_mmx.c │ ├── viterbi27_port.c │ ├── viterbi27_sse.c │ ├── viterbi27_sse2.c │ ├── viterbi29.c │ ├── viterbi29_av.c │ ├── viterbi29_mmx.c │ ├── viterbi29_port.c │ ├── viterbi29_sse.c │ ├── viterbi29_sse2.c │ ├── viterbi39.c │ ├── viterbi39_av.c │ ├── viterbi39_mmx.c │ ├── viterbi39_port.c │ ├── viterbi39_sse.c │ ├── viterbi39_sse2.c │ ├── viterbi615.c │ ├── viterbi615_av.c │ ├── viterbi615_mmx.c │ ├── viterbi615_port.c │ ├── viterbi615_sse.c │ ├── viterbi615_sse2.c │ ├── vtest27.c │ ├── vtest29.c │ ├── vtest39.c │ └── vtest615.c └── radar.py /README.md: -------------------------------------------------------------------------------- 1 | ads-b 2 | ===== 3 | 4 | Lone Star open hardware and software ADS-B UAT receiver and decoder 5 | -------------------------------------------------------------------------------- /hardware/Readme.txt: -------------------------------------------------------------------------------- 1 | Introduction 2 | 3 | This directory contains the schematic, PCB design, and some documentation for a ML2722 based UAT receiver. 4 | 5 | Theory of operation 6 | 7 | The 978 MHz signal from the antenna enters from CONN1 and passes through an impedance matching network formed by L2 and C4. A ML2722 FSK receiver IC originally designed for use with cordless phones is used to decode the 1.0417Mbps FSK signal. The ML2722 is not officially specified to operate at 978MHz, but it appears to operate well in this range. Internally, the ML2722 downconverts and demodulates the incoming 978MHz signal, however the internal bit slicer is disabled because its time constant is not appropriate for the UAT datastream. Instead, the raw discriminator output (availble in test mode) is fed through a low pass filter formed by L1 and C27 and then on to the TS3021 comparator. A MCP4725 I2C DAC is used to provide an adjustable reference level to the comparator for accurate bit slicing. The output of the DAC which is the raw FSK datastream is then output via J1. J1 is intended to be connected to a USB capture device sampling the 1-bit value at 6.25Msps. Here a Cypress EZ-USB FX2LP was used to perform this function. 8 | 9 | 10 | -------------------------------------------------------------------------------- /hardware/board.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digidocs/ads-b/6afab256194f51c471572187dcfa37c688d155e3/hardware/board.pdf -------------------------------------------------------------------------------- /hardware/docs/ML2722_Datasheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digidocs/ads-b/6afab256194f51c471572187dcfa37c688d155e3/hardware/docs/ML2722_Datasheet.pdf -------------------------------------------------------------------------------- /hardware/schematic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digidocs/ads-b/6afab256194f51c471572187dcfa37c688d155e3/hardware/schematic.pdf -------------------------------------------------------------------------------- /uat-decode/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -O2 2 | 3 | SOURCES = decode.c 4 | 5 | scope: *$(SOURCES) 6 | gcc $(CFLAGS) -o decode $(SOURCES) -lfec -lm 7 | 8 | clean: 9 | rm decode -------------------------------------------------------------------------------- /uat-decode/README.txt: -------------------------------------------------------------------------------- 1 | Introduction 2 | 3 | This is a set of software tools that decode 978 MHZ ADS-B UAT data streams. All contained files are licensed under the GPL V3, which is included for reference. 4 | 5 | Getting Started 6 | 7 | 1. Build and install Forward Error Correction library 8 | - cd fec/fec-3.0 9 | - make 10 | - [sudo] make install 11 | - cd ../../ 12 | 13 | 2. Build UAT decoder 14 | - make 15 | 16 | 3. Fetch and uncompress example data file 17 | - wget https://www.dropbox.com/s/ilp21hlf7gl6wbj/out-glo-3.gz (or download with web browser) 18 | - gunzip out-glo-3.gz 19 | 20 | 4. Run decoder 21 | - ./decode out-glo-3 > data_out (This may take a minute.) 22 | - decoded UAT data stream is now in human readable format in data_out 23 | 24 | 5. Decode FIS-B radar data from data stream 25 | - ./radar.py data_out 26 | - decoded high resolution radar image of NE US is in out.png. The decoder can decode full US radar maps, but the short example data file does not contain the full image. 27 | 28 | Raw Data Format 29 | 30 | The example data stream was recorded near Gloucester, MA. It consists of a stream of 8-bit integers containing either the value 0 or 1. (eg: 0x00, 0x01, 0x01, 0x00). These were sampled at 6.25MHz from the output an FSK receiver tuned to 978MHz. This corresponds to six samples per UAT datastream bit. (eg: The UAT stream for '1' would be 0x01, 0x01, 0x01, 0x01, 0x01, 0x010) The values may be inverted from the specification (I cannot remember). 31 | 32 | -------------------------------------------------------------------------------- /uat-decode/dlac.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file (C) David Carr 2012. 3 | * All rights reserved. 4 | */ 5 | 6 | #ifndef DLAC_H 7 | #define DLAC_H 8 | 9 | char dlac[64] = {'e', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 10 | 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 11 | 'V', 'W', 'X', 'Y', 'Z', 'n', '!', '\n', '\n', '|', 12 | ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', 13 | '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', 14 | '6', '7', '8', '9', ':', ';', '<', '=', '>', '?'}; 15 | 16 | 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /uat-decode/example_output/big_radar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digidocs/ads-b/6afab256194f51c471572187dcfa37c688d155e3/uat-decode/example_output/big_radar.png -------------------------------------------------------------------------------- /uat-decode/example_output/small_radar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digidocs/ads-b/6afab256194f51c471572187dcfa37c688d155e3/uat-decode/example_output/small_radar.png -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/INSTALL: -------------------------------------------------------------------------------- 1 | INSTALLATION INSTRUCTIONS 2 | 3 | To build and install the libfec libraries, simply say 4 | 5 | ./configure 6 | make 7 | make test (optional) 8 | make install (as root) 9 | 10 | By default, "make install" puts the libfec libraries in 11 | /usr/local/lib, the include files in /usr/local/include, and the 12 | manual page in /usr/local/man. 13 | 14 | You may have an old version of the GNU assembler that cannot handle 15 | the relatively new SSE2 mnemonics. Update your version of the GNU 16 | "binutils" package. 17 | 18 | You may obtain the latest binutils package through your normal 19 | distribution channels or from: 20 | 21 | http://sources.redhat.com/binutils/ 22 | 23 | TESTING THE FEC LIBRARY 24 | 25 | After running the ./configure script, optional tests can be built and 26 | run as follows: 27 | 28 | make test 29 | 30 | "make test" tests each routine, using the SIMD versions as 31 | appropriate, verifying correct operation and estimating Viterbi 32 | decoding speeds. These tests should always succeed unless something is 33 | broken. 34 | 35 | 28 Mar 2004 36 | Phil Karn, karn@ka9q.net 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/README: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | This package is copyright 2006 by Phil Karn, KA9Q. It may be used 4 | under the terms of the GNU Lesser General Public License (LGPL). See 5 | the file "lesser.txt" in this package for license details. 6 | 7 | INTRODUCTION 8 | 9 | This package provides a set of functions that implement several 10 | popular forward error correction (FEC) algorithms and several low-level routines 11 | useful in modems implemented with digital signal processing (DSP). 12 | 13 | The following routines are provided: 14 | 15 | 1. Viterbi decoders for the following convolutional codes: 16 | 17 | r=1/2 k=7 ("Voyager" code, now a widely used industry standard) 18 | r=1/2 k=9 (Used on the IS-95 CDMA forward link) 19 | r=1/6 k=15 ("Cassini" code, used by several NASA/JPL deep space missions) 20 | 21 | 2. Reed-Solomon encoders and decoders for any user-specified code. 22 | 23 | 3. Optimized encoder and decoder for the CCSDS-standard (255,223) 24 | Reed-Solomon code, with and without the CCSDS-standard "dual basis" 25 | symbol representation. 26 | 27 | 4. Compute dot product between a 16-bit buffer and a set of 16-bit 28 | coefficients. This is the basic DSP primitive for digital filtering 29 | and correlation. 30 | 31 | 4. Compute sum of squares of a buffer of 16-bit signed integers. This is 32 | useful in DSP for finding the total energy in a signal. 33 | 34 | 5. Find peak value in a buffer of 16-bit signed integers, useful for 35 | scaling a signal to prevent overflow. 36 | 37 | SIMD SUPPORT 38 | 39 | This package automatically makes use of various SIMD (Single 40 | Instruction stream, Multiple Data stream) instruction sets, when 41 | available: MMX, SSE and SSE2 on the IA-32 (Intel) architecture, and 42 | Altivec on the PowerPC G4 and G5 used by Power Macintoshes. 43 | 44 | "Altivec" is a Motorola trademark; Apple calls it "Velocity Engine", 45 | and IBM calls it "VMX". Altivec is roughly comparable to SSE2 on the 46 | IA-32. 47 | 48 | Many of the SIMD versions run more than an order of 49 | magnitude faster than their portable C versions. The available SIMD 50 | instruction sets, if any, are determined at run time and the proper 51 | version of each routine is automatically selected. If no SIMD 52 | instructions are available, the portable C version is invoked by 53 | default. On targets other than IA-32 and PPC, only the portable C 54 | version is built. 55 | 56 | The SIMD-assisted versions generally produce the same results as the C 57 | versions, with a few minor exceptions. The Viterbi decoders in C have 58 | a very slightly greater Eb/No performance due to their use of 32-bit 59 | path metrics. On the other hand, the SIMD versions use the 60 | "saturating" arithmetic available in these instructions to avoid the 61 | integer wraparounds that can occur in C when argument ranges are not 62 | properly constrained. This applies primarily to the "dotprod" (dot 63 | product) function. 64 | 65 | The MMX (MultiMedia eXtensions) instruction set was introduced on 66 | later Pentium CPUs; it is also implemented on the Pentium II and most 67 | AMD CPUs starting with the K6. SSE (SIMD Streaming Extensions) was 68 | introduced in the Pentium III; AMD calls it "3D Now! Professional". 69 | Intel introduced SSE2 on the Pentium 4, and it has been picked up by 70 | later AMD CPUs. SSE support implies MMX support, while SSE2 support 71 | implies both SSE and MMX support. 72 | 73 | The latest IA-32 SIMD instruction set, SSE3 (also known as "Prescott 74 | New Instructions") was introduced in early 2004 with the latest 75 | ("Prescott") revision of the Pentium 4. Relatively little was 76 | introduced with SSE3, and this library currently makes no use of it. 77 | 78 | See the various manual pages for details on how to use the library 79 | routines. 80 | 81 | Copyright 2006, Phil Karn, KA9Q 82 | karn@ka9q.net 83 | http://www.ka9q.net/ 84 | 85 | This software may be used under the terms of the GNU Lesser General 86 | Public License (LGPL); see the file lesser.txt for details. 87 | 88 | Revision history: 89 | Version 1.0 released 29 May 2001 90 | 91 | Version 2.0 released 3 Dec 2001: 92 | Restructured to add support for shared libraries. 93 | 94 | Version 2.0.1 released 8 Dec 2001: 95 | Includes autoconf/configure script 96 | 97 | Version 2.0.2 released 4 Feb 2002: 98 | Add SIMD version override options 99 | Test for lack of SSE2 mnemonic support in 'as' 100 | Build only selected version 101 | 102 | Version 2.0.3 released 6 Feb 2002: 103 | Fix to parityb function in parity.h 104 | 105 | feclib version 1.0 released November 2003 106 | Merged SIMD-Viterbi, RS and DSP libraries 107 | Changed SIMD Viterbi decoder to detect SSE2/SSE/MMX at runtime rather than build time 108 | 109 | feclib version 2.0 (unreleased) Mar 2004 110 | General speedups and cleanups 111 | Switch from 4 to 8-bit input symbols on all Viterbi decoders 112 | Support for Altivec on PowerPC 113 | Support for k=15 r=1/6 Cassini/Mars Pathfinder/Mars Exploration Rover/STEREO code 114 | Changed license to GNU Lesser General Public License (LGPL) 115 | 116 | feclib version 2.1 June 5 2006 117 | Added error checking, fixed alignment bug in SSE2 versions of Viterbi decoders causing segfaults 118 | 119 | feclib version 2.1.1 June 6 2006 120 | Fix test/benchmark time measurement on Linux 121 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/ccsds.h: -------------------------------------------------------------------------------- 1 | typedef unsigned char data_t; 2 | extern unsigned char Taltab[],Tal1tab[]; 3 | #define NN 255 4 | #define NROOTS 32 5 | 6 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/char.h: -------------------------------------------------------------------------------- 1 | /* Stuff specific to the 8-bit symbol version of the general purpose RS codecs 2 | * 3 | * Copyright 2003, Phil Karn, KA9Q 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | typedef unsigned char data_t; 7 | 8 | #define MODNN(x) modnn(rs,x) 9 | 10 | #define MM (rs->mm) 11 | #define NN (rs->nn) 12 | #define ALPHA_TO (rs->alpha_to) 13 | #define INDEX_OF (rs->index_of) 14 | #define GENPOLY (rs->genpoly) 15 | #define NROOTS (rs->nroots) 16 | #define FCR (rs->fcr) 17 | #define PRIM (rs->prim) 18 | #define IPRIM (rs->iprim) 19 | #define PAD (rs->pad) 20 | #define A0 (NN) 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/config.h: -------------------------------------------------------------------------------- 1 | /* config.h. Generated by configure. */ 2 | /* config.h.in. Generated automatically from configure.in by autoheader. */ 3 | 4 | /* Define if you have the getopt_long function. */ 5 | #define HAVE_GETOPT_LONG 1 6 | 7 | /* Define if you have the header file. */ 8 | #define HAVE_GETOPT_H 1 9 | 10 | /* Define if you have the header file. */ 11 | #define HAVE_MEMORY_H 1 12 | 13 | /* Define if you have the header file. */ 14 | #define HAVE_STDIO_H 1 15 | 16 | /* Define if you have the header file. */ 17 | #define HAVE_STDLIB_H 1 18 | 19 | /* Define if you have the c library (-lc). */ 20 | #define HAVE_LIBC 1 21 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated automatically from configure.in by autoheader. */ 2 | 3 | /* Define if you have the getopt_long function. */ 4 | #undef HAVE_GETOPT_LONG 5 | 6 | /* Define if you have the header file. */ 7 | #undef HAVE_GETOPT_H 8 | 9 | /* Define if you have the header file. */ 10 | #undef HAVE_MEMORY_H 11 | 12 | /* Define if you have the header file. */ 13 | #undef HAVE_STDIO_H 14 | 15 | /* Define if you have the header file. */ 16 | #undef HAVE_STDLIB_H 17 | 18 | /* Define if you have the c library (-lc). */ 19 | #undef HAVE_LIBC 20 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/configure.in: -------------------------------------------------------------------------------- 1 | dnl Process this file with autoconf to produce a configure script. 2 | AC_INIT(viterbi27.c) 3 | AC_CONFIG_HEADER(config.h) 4 | SO_NAME=3 5 | VERSION=3.0.0 6 | AC_SUBST(SO_NAME) 7 | AC_SUBST(VERSION) 8 | 9 | dnl Checks for programs. 10 | AC_PROG_CC 11 | if test $GCC != "yes" 12 | then 13 | AC_MSG_ERROR([Need GNU C compiler]) 14 | fi 15 | dnl Checks for libraries. 16 | AC_CHECK_LIB(c, malloc) 17 | 18 | dnl Checks for header files. 19 | AC_CHECK_HEADERS(getopt.h stdio.h stdlib.h memory.h string.h) 20 | if test -z "$HAVE_stdio.h" 21 | then 22 | AC_MSG_ERROR([Need stdio.h!]) 23 | fi 24 | if test -z "$HAVE_stdlib.h" 25 | then 26 | AC_MSG_ERROR([Need stdlib.h!]) 27 | fi 28 | if test -z "$HAVE_stdlib.h" 29 | then 30 | AC_MSG_ERROR([Need memory.h!]) 31 | fi 32 | if test -z "$HAVE_string.h" 33 | then 34 | AC_MSG_ERROR([Need string.h]) 35 | fi 36 | 37 | AC_CANONICAL_SYSTEM 38 | case $target_cpu in 39 | i386|i486|i586|i686) 40 | ARCH_OPTION="-march=$target_cpu" 41 | MLIBS="viterbi27_mmx.o mmxbfly27.o viterbi27_sse.o ssebfly27.o viterbi27_sse2.o sse2bfly27.o \ 42 | viterbi29_mmx.o mmxbfly29.o viterbi29_sse.o ssebfly29.o viterbi29_sse2.o sse2bfly29.o \ 43 | viterbi39_sse2.o viterbi39_sse.o viterbi39_mmx.o \ 44 | viterbi615_mmx.o viterbi615_sse.o viterbi615_sse2.o \ 45 | dotprod_mmx.o dotprod_mmx_assist.o \ 46 | dotprod_sse2.o dotprod_sse2_assist.o \ 47 | peakval_mmx.o peakval_mmx_assist.o \ 48 | peakval_sse.o peakval_sse_assist.o \ 49 | peakval_sse2.o peakval_sse2_assist.o \ 50 | sumsq.o sumsq_port.o \ 51 | sumsq_sse2.o sumsq_sse2_assist.o \ 52 | sumsq_mmx.o sumsq_mmx_assist.o \ 53 | cpu_features.o cpu_mode_x86.o" 54 | ;; 55 | powerpc*) 56 | ARCH_OPTION="-fno-common -faltivec" 57 | MLIBS="viterbi27_av.o viterbi29_av.o viterbi39_av.o viterbi615_av.o \ 58 | encode_rs_av.o \ 59 | dotprod_av.o sumsq_av.o peakval_av.o cpu_mode_ppc.o" 60 | ;; 61 | *) 62 | MLIBS= 63 | esac 64 | case $target_os in 65 | darwin*) 66 | SH_LIB=libfec.dylib 67 | REBIND="" 68 | ;; 69 | *) 70 | SH_LIB=libfec.so 71 | REBIND=ldconfig 72 | ;; 73 | esac 74 | AC_SUBST(SH_LIB) 75 | AC_SUBST(REBIND) 76 | AC_SUBST(MLIBS) 77 | AC_SUBST(ARCH_OPTION) 78 | 79 | 80 | dnl Checks for library functions. 81 | AC_CHECK_FUNCS(getopt_long memset memmove) 82 | 83 | AC_OUTPUT(makefile) 84 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/cpu_features.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global cpu_features 3 | .type cpu_features,@function 4 | cpu_features: 5 | pushl %ebx 6 | pushl %ecx 7 | pushl %edx 8 | movl $1,%eax 9 | cpuid 10 | movl %edx,%eax 11 | popl %edx 12 | popl %ecx 13 | popl %ebx 14 | ret 15 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/cpu_mode_ppc.c: -------------------------------------------------------------------------------- 1 | /* Determine CPU support for SIMD on Power PC 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include "fec.h" 6 | #ifdef __VEC__ 7 | #include 8 | #endif 9 | 10 | /* Various SIMD instruction set names */ 11 | char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)", 12 | "x86 Streaming SIMD Extensions (SSE)", 13 | "x86 Streaming SIMD Extensions 2 (SSE2)", 14 | "PowerPC G4/G5 Altivec/Velocity Engine"}; 15 | 16 | enum cpu_mode Cpu_mode; 17 | 18 | void find_cpu_mode(void){ 19 | 20 | if(Cpu_mode != UNKNOWN) 21 | return; 22 | 23 | #ifdef __VEC__ 24 | { 25 | /* Ask the OS if we have Altivec support */ 26 | int selectors[2] = { CTL_HW, HW_VECTORUNIT }; 27 | int hasVectorUnit = 0; 28 | size_t length = sizeof(hasVectorUnit); 29 | int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); 30 | if(0 == error && hasVectorUnit) 31 | Cpu_mode = ALTIVEC; 32 | else 33 | Cpu_mode = PORT; 34 | } 35 | #else 36 | Cpu_mode = PORT; 37 | #endif 38 | 39 | fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]); 40 | } 41 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/cpu_mode_x86.c: -------------------------------------------------------------------------------- 1 | /* Determine CPU support for SIMD 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include "fec.h" 6 | 7 | /* Various SIMD instruction set names */ 8 | char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)", 9 | "x86 Streaming SIMD Extensions (SSE)", 10 | "x86 Streaming SIMD Extensions 2 (SSE2)", 11 | "PowerPC G4/G5 Altivec/Velocity Engine"}; 12 | 13 | enum cpu_mode Cpu_mode; 14 | 15 | void find_cpu_mode(void){ 16 | 17 | int f; 18 | if(Cpu_mode != UNKNOWN) 19 | return; 20 | 21 | /* Figure out what kind of CPU we have */ 22 | f = cpu_features(); 23 | if(f & (1<<26)){ /* SSE2 is present */ 24 | Cpu_mode = SSE2; 25 | } else if(f & (1<<25)){ /* SSE is present */ 26 | Cpu_mode = SSE; 27 | } else if(f & (1<<23)){ /* MMX is present */ 28 | Cpu_mode = MMX; 29 | } else { /* No SIMD at all */ 30 | Cpu_mode = PORT; 31 | } 32 | fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]); 33 | } 34 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/decode_rs_8.c: -------------------------------------------------------------------------------- 1 | /* General purpose Reed-Solomon decoder for 8-bit symbols or less 2 | * Copyright 2003 Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | 6 | #ifdef DEBUG 7 | #include 8 | #endif 9 | 10 | #include 11 | 12 | #include "fixed.h" 13 | 14 | int decode_rs_8(data_t *data, int *eras_pos, int no_eras, int pad){ 15 | int retval; 16 | 17 | if(pad < 0 || pad > 222){ 18 | return -1; 19 | } 20 | 21 | #include "decode_rs.h" 22 | 23 | return retval; 24 | } 25 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/decode_rs_ccsds.c: -------------------------------------------------------------------------------- 1 | /* This function wraps around the fixed 8-bit decoder, performing the 2 | * basis transformations necessary to meet the CCSDS standard 3 | * 4 | * Copyright 2002, Phil Karn, KA9Q 5 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 6 | */ 7 | #include "ccsds.h" 8 | #include "fec.h" 9 | 10 | int decode_rs_ccsds(data_t *data,int *eras_pos,int no_eras,int pad){ 11 | int i,r; 12 | data_t cdata[NN]; 13 | 14 | /* Convert data from dual basis to conventional */ 15 | for(i=0;i 0){ 21 | /* Convert from conventional to dual basis */ 22 | for(i=0;i 8 | #endif 9 | 10 | #include 11 | 12 | #include "char.h" 13 | #include "rs-common.h" 14 | 15 | int decode_rs_char(void *p, data_t *data, int *eras_pos, int no_eras){ 16 | int retval; 17 | struct rs *rs = (struct rs *)p; 18 | 19 | #include "decode_rs.h" 20 | 21 | return retval; 22 | } 23 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/decode_rs_int.c: -------------------------------------------------------------------------------- 1 | /* General purpose Reed-Solomon decoder 2 | * Copyright 2003 Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | 6 | #ifdef DEBUG 7 | #include 8 | #endif 9 | 10 | #include 11 | 12 | #include "int.h" 13 | #include "rs-common.h" 14 | 15 | int decode_rs_int(void *p, data_t *data, int *eras_pos, int no_eras){ 16 | int retval; 17 | struct rs *rs = (struct rs *)p; 18 | 19 | #include "decode_rs.h" 20 | 21 | return retval; 22 | } 23 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod.c: -------------------------------------------------------------------------------- 1 | /* 16-bit signed integer dot product 2 | * Switch to appropriate versions 3 | * Copyright 2004 Phil Karn 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #include 7 | #include "fec.h" 8 | 9 | void *initdp_port(signed short coeffs[],int len); 10 | long dotprod_port(void *p,signed short *b); 11 | void freedp_port(void *p); 12 | 13 | #ifdef __i386__ 14 | void *initdp_mmx(signed short coeffs[],int len); 15 | void *initdp_sse2(signed short coeffs[],int len); 16 | long dotprod_mmx(void *p,signed short *b); 17 | long dotprod_sse2(void *p,signed short *b); 18 | void freedp_mmx(void *p); 19 | void freedp_sse2(void *p); 20 | #endif 21 | 22 | #ifdef __VEC__ 23 | void *initdp_av(signed short coeffs[],int len); 24 | long dotprod_av(void *p,signed short *b); 25 | void freedp_av(void *p); 26 | #endif 27 | 28 | /* Create and return a descriptor for use with the dot product function */ 29 | void *initdp(signed short coeffs[],int len){ 30 | find_cpu_mode(); 31 | 32 | switch(Cpu_mode){ 33 | case PORT: 34 | default: 35 | return initdp_port(coeffs,len); 36 | #ifdef __i386__ 37 | case MMX: 38 | case SSE: 39 | return initdp_mmx(coeffs,len); 40 | case SSE2: 41 | return initdp_sse2(coeffs,len); 42 | #endif 43 | 44 | #ifdef __VEC__ 45 | case ALTIVEC: 46 | return initdp_av(coeffs,len); 47 | #endif 48 | } 49 | } 50 | 51 | 52 | /* Free a dot product descriptor created earlier */ 53 | void freedp(void *p){ 54 | switch(Cpu_mode){ 55 | case PORT: 56 | default: 57 | ; 58 | #ifdef __i386__ 59 | case MMX: 60 | case SSE: 61 | return freedp_mmx(p); 62 | case SSE2: 63 | return freedp_sse2(p); 64 | #endif 65 | #ifdef __VEC__ 66 | case ALTIVEC: 67 | return freedp_av(p); 68 | #endif 69 | } 70 | } 71 | 72 | /* Compute a dot product given a descriptor and an input array 73 | * The length is taken from the descriptor 74 | */ 75 | long dotprod(void *p,signed short a[]){ 76 | switch(Cpu_mode){ 77 | case PORT: 78 | default: 79 | return dotprod_port(p,a); 80 | #ifdef __i386__ 81 | case MMX: 82 | case SSE: 83 | return dotprod_mmx(p,a); 84 | case SSE2: 85 | return dotprod_sse2(p,a); 86 | #endif 87 | 88 | #ifdef __VEC__ 89 | case ALTIVEC: 90 | return dotprod_av(p,a); 91 | #endif 92 | } 93 | } 94 | 95 | 96 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod.c~: -------------------------------------------------------------------------------- 1 | /* 16-bit signed integer dot product 2 | * Switch to appropriate versions 3 | * Copyright 2004 Phil Karn 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #include 7 | #include "fec.h" 8 | 9 | void *initdp_port(signed short coeffs[],int len); 10 | long dotprod_port(void *p,signed short *b); 11 | void freedp_port(void *p); 12 | 13 | #ifdef __i386__ 14 | void *initdp_mmx(signed short coeffs[],int len); 15 | void *initdp_sse2(signed short coeffs[],int len); 16 | long dotprod_mmx(void *p,signed short *b); 17 | long dotprod_sse2(void *p,signed short *b); 18 | void freedp_mmx(void *p); 19 | void freedp_sse2(void *p); 20 | #endif 21 | 22 | #ifdef __VEC__ 23 | void *initdp_av(signed short coeffs[],int len); 24 | long dotprod_av(void *p,signed short *b); 25 | void freedp_av(void *p); 26 | #endif 27 | 28 | /* Create and return a descriptor for use with the dot product function */ 29 | void *initdp(signed short coeffs[],int len){ 30 | find_cpu_mode(); 31 | 32 | switch(Cpu_mode){ 33 | case PORT: 34 | default: 35 | return initdp_port(coeffs,len); 36 | #ifdef __i386__ 37 | case MMX: 38 | case SSE: 39 | return initdp_mmx(coeffs,len); 40 | case SSE2: 41 | return initdp_sse2(coeffs,len); 42 | #endif 43 | 44 | #ifdef __VEC__ 45 | case ALTIVEC: 46 | return initdp_av(coeffs,len); 47 | #endif 48 | } 49 | } 50 | 51 | 52 | /* Free a dot product descriptor created earlier */ 53 | void freedp(void *p){ 54 | switch(Cpu_mode){ 55 | case PORT: 56 | default: 57 | #ifdef __i386__ 58 | case MMX: 59 | case SSE: 60 | return freedp_mmx(p); 61 | case SSE2: 62 | return freedp_sse2(p); 63 | #endif 64 | #ifdef __VEC__ 65 | case ALTIVEC: 66 | return freedp_av(p); 67 | #endif 68 | } 69 | } 70 | 71 | /* Compute a dot product given a descriptor and an input array 72 | * The length is taken from the descriptor 73 | */ 74 | long dotprod(void *p,signed short a[]){ 75 | switch(Cpu_mode){ 76 | case PORT: 77 | default: 78 | return dotprod_port(p,a); 79 | #ifdef __i386__ 80 | case MMX: 81 | case SSE: 82 | return dotprod_mmx(p,a); 83 | case SSE2: 84 | return dotprod_sse2(p,a); 85 | #endif 86 | 87 | #ifdef __VEC__ 88 | case ALTIVEC: 89 | return dotprod_av(p,a); 90 | #endif 91 | } 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod.h: -------------------------------------------------------------------------------- 1 | /* Internal definitions for dotproduct function */ 2 | 3 | struct dotprod { 4 | int len; /* Number of coefficients */ 5 | 6 | /* On a MMX or SSE machine, these hold 4 copies of the coefficients, 7 | * preshifted by 0,1,2,3 words to meet all possible input data 8 | * alignments (see Intel ap559 on MMX dot products). 9 | * 10 | * SSE2 is similar, but with 8 words at a time 11 | * 12 | * On a non-MMX machine, only one copy is present 13 | */ 14 | signed short *coeffs[8]; 15 | }; 16 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod_av.c: -------------------------------------------------------------------------------- 1 | /* 16-bit signed integer dot product 2 | * Altivec-assisted version 3 | * Copyright 2004 Phil Karn 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #include 7 | #include "fec.h" 8 | 9 | struct dotprod { 10 | int len; /* Number of coefficients */ 11 | 12 | /* On an Altivec machine, these hold 8 copies of the coefficients, 13 | * preshifted by 0,1,..7 words to meet all possible input data 14 | */ 15 | signed short *coeffs[8]; 16 | }; 17 | 18 | /* Create and return a descriptor for use with the dot product function */ 19 | void *initdp_av(signed short coeffs[],int len){ 20 | struct dotprod *dp; 21 | int i,j; 22 | 23 | if(len == 0) 24 | return NULL; 25 | 26 | dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); 27 | dp->len = len; 28 | 29 | /* Make 8 copies of coefficients, one for each data alignment, 30 | * each aligned to 16-byte boundary 31 | */ 32 | for(i=0;i<8;i++){ 33 | dp->coeffs[i] = calloc(1+(len+i-1)/8,sizeof(vector signed short)); 34 | for(j=0;jcoeffs[i][j+i] = coeffs[j]; 36 | } 37 | return (void *)dp; 38 | } 39 | 40 | 41 | /* Free a dot product descriptor created earlier */ 42 | void freedp_av(void *p){ 43 | struct dotprod *dp = (struct dotprod *)p; 44 | int i; 45 | 46 | for(i=0;i<8;i++) 47 | if(dp->coeffs[i] != NULL) 48 | free(dp->coeffs[i]); 49 | free(dp); 50 | } 51 | 52 | /* Compute a dot product given a descriptor and an input array 53 | * The length is taken from the descriptor 54 | */ 55 | long dotprod_av(void *p,signed short a[]){ 56 | struct dotprod *dp = (struct dotprod *)p; 57 | int al; 58 | vector signed short *ar,*d; 59 | vector signed int sums0,sums1,sums2,sums3; 60 | union { vector signed int v; signed int w[4];} s; 61 | int nblocks; 62 | 63 | /* round ar down to beginning of 16-byte block containing 0th element of 64 | * input buffer. Then set d to one of 8 sets of shifted coefficients 65 | */ 66 | ar = (vector signed short *)((int)a & ~15); 67 | al = ((int)a & 15)/sizeof(signed short); 68 | d = (vector signed short *)dp->coeffs[al]; 69 | 70 | nblocks = (dp->len+al-1)/8+1; 71 | 72 | /* Sum into four vectors each holding four 32-bit partial sums */ 73 | sums3 = sums2 = sums1 = sums0 = (vector signed int)(0); 74 | while(nblocks >= 4){ 75 | sums0 = vec_msums(ar[nblocks-1],d[nblocks-1],sums0); 76 | sums1 = vec_msums(ar[nblocks-2],d[nblocks-2],sums1); 77 | sums2 = vec_msums(ar[nblocks-3],d[nblocks-3],sums2); 78 | sums3 = vec_msums(ar[nblocks-4],d[nblocks-4],sums3); 79 | nblocks -= 4; 80 | } 81 | sums0 = vec_adds(sums0,sums1); 82 | sums2 = vec_adds(sums2,sums3); 83 | sums0 = vec_adds(sums0,sums2); 84 | while(nblocks-- > 0){ 85 | sums0 = vec_msums(ar[nblocks],d[nblocks],sums0); 86 | } 87 | /* Sum 4 partial sums into final result */ 88 | s.v = vec_sums(sums0,(vector signed int)(0)); 89 | 90 | return s.w[3]; 91 | } 92 | 93 | 94 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod_mmx.c: -------------------------------------------------------------------------------- 1 | /* 16-bit signed integer dot product 2 | * MMX assisted version; also for SSE 3 | * 4 | * Copyright 2004 Phil Karn 5 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 6 | */ 7 | #include 8 | #include "fec.h" 9 | 10 | struct dotprod { 11 | int len; /* Number of coefficients */ 12 | 13 | /* On a MMX or SSE machine, these hold 4 copies of the coefficients, 14 | * preshifted by 0,1,2,3 words to meet all possible input data 15 | * alignments (see Intel ap559 on MMX dot products). 16 | */ 17 | signed short *coeffs[4]; 18 | }; 19 | long dotprod_mmx_assist(signed short *a,signed short *b,int cnt); 20 | 21 | /* Create and return a descriptor for use with the dot product function */ 22 | void *initdp_mmx(signed short coeffs[],int len){ 23 | struct dotprod *dp; 24 | int i,j; 25 | 26 | 27 | if(len == 0) 28 | return NULL; 29 | 30 | dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); 31 | dp->len = len; 32 | 33 | /* Make 4 copies of coefficients, one for each data alignment */ 34 | for(i=0;i<4;i++){ 35 | dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4, 36 | 4*sizeof(signed short)); 37 | for(j=0;jcoeffs[i][j+i] = coeffs[j]; 39 | } 40 | return (void *)dp; 41 | } 42 | 43 | 44 | /* Free a dot product descriptor created earlier */ 45 | void freedp_mmx(void *p){ 46 | struct dotprod *dp = (struct dotprod *)p; 47 | int i; 48 | 49 | for(i=0;i<4;i++) 50 | if(dp->coeffs[i] != NULL) 51 | free(dp->coeffs[i]); 52 | free(dp); 53 | } 54 | 55 | /* Compute a dot product given a descriptor and an input array 56 | * The length is taken from the descriptor 57 | */ 58 | long dotprod_mmx(void *p,signed short a[]){ 59 | struct dotprod *dp = (struct dotprod *)p; 60 | int al; 61 | signed short *ar; 62 | 63 | /* Round input data address down to 8 byte boundary 64 | * NB: depending on the alignment of a[], memory 65 | * before a[] will be accessed. The contents don't matter since they'll 66 | * be multiplied by zero coefficients. I can't conceive of any 67 | * situation where this could cause a segfault since memory protection 68 | * in the x86 machines is done on much larger boundaries 69 | */ 70 | ar = (signed short *)((int)a & ~7); 71 | 72 | /* Choose one of 4 sets of pre-shifted coefficients. al is both the 73 | * index into dp->coeffs[] and the number of 0 words padded onto 74 | * that coefficients array for alignment purposes 75 | */ 76 | al = a - ar; 77 | 78 | /* Call assembler routine to do the work, passing number of 4-word blocks */ 79 | return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1); 80 | } 81 | 82 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod_mmx_assist.s: -------------------------------------------------------------------------------- 1 | # SIMD MMX dot product 2 | # Equivalent to the following C code: 3 | # long dotprod(signed short *a,signed short *b,int cnt) 4 | # { 5 | # long sum = 0; 6 | # cnt *= 4; 7 | # while(cnt--) 8 | # sum += *a++ + *b++; 9 | # return sum; 10 | # } 11 | # a and b should also be 64-bit aligned, or speed will suffer greatly 12 | # Copyright 1999, Phil Karn KA9Q 13 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 14 | 15 | .text 16 | .global dotprod_mmx_assist 17 | .type dotprod_mmx_assist,@function 18 | dotprod_mmx_assist: 19 | pushl %ebp 20 | movl %esp,%ebp 21 | pushl %esi 22 | pushl %edi 23 | pushl %ecx 24 | pushl %ebx 25 | movl 8(%ebp),%esi # a 26 | movl 12(%ebp),%edi # b 27 | movl 16(%ebp),%ecx # cnt 28 | pxor %mm0,%mm0 # clear running sum (in two 32-bit halves) 29 | 30 | # MMX dot product loop unrolled 4 times, crunching 16 terms per loop 31 | .align 16 32 | .Loop1: subl $4,%ecx 33 | jl .Loop1Done 34 | 35 | movq (%esi),%mm1 # mm1 = a[3],a[2],a[1],a[0] 36 | pmaddwd (%edi),%mm1 # mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0] 37 | paddd %mm1,%mm0 38 | 39 | movq 8(%esi),%mm1 40 | pmaddwd 8(%edi),%mm1 41 | paddd %mm1,%mm0 42 | 43 | movq 16(%esi),%mm1 44 | pmaddwd 16(%edi),%mm1 45 | paddd %mm1,%mm0 46 | 47 | movq 24(%esi),%mm1 48 | addl $32,%esi 49 | pmaddwd 24(%edi),%mm1 50 | addl $32,%edi 51 | paddd %mm1,%mm0 52 | 53 | jmp .Loop1 54 | .Loop1Done: 55 | 56 | addl $4,%ecx 57 | 58 | # MMX dot product loop, not unrolled, crunching 4 terms per loop 59 | # This could be redone as Duff's Device on the unrolled loop above 60 | .Loop2: subl $1,%ecx 61 | jl .Loop2Done 62 | 63 | movq (%esi),%mm1 64 | addl $8,%esi 65 | pmaddwd (%edi),%mm1 66 | addl $8,%edi 67 | paddd %mm1,%mm0 68 | jmp .Loop2 69 | .Loop2Done: 70 | 71 | movd %mm0,%ebx # right-hand word to ebx 72 | punpckhdq %mm0,%mm0 # left-hand word to right side of %mm0 73 | movd %mm0,%eax 74 | addl %ebx,%eax # running sum now in %eax 75 | emms # done with MMX 76 | 77 | popl %ebx 78 | popl %ecx 79 | popl %edi 80 | popl %esi 81 | movl %ebp,%esp 82 | popl %ebp 83 | ret 84 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod_port.c: -------------------------------------------------------------------------------- 1 | /* 16-bit signed integer dot product 2 | * Portable C version 3 | * Copyright 2004 Phil Karn 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #include 7 | #include "fec.h" 8 | 9 | struct dotprod { 10 | int len; /* Number of coefficients */ 11 | 12 | signed short *coeffs; 13 | }; 14 | 15 | /* Create and return a descriptor for use with the dot product function */ 16 | void *initdp_port(signed short coeffs[],int len){ 17 | struct dotprod *dp; 18 | int j; 19 | 20 | if(len == 0) 21 | return NULL; 22 | 23 | dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); 24 | dp->len = len; 25 | 26 | /* Just one copy of the coefficients for the C version */ 27 | dp->coeffs = (signed short *)calloc(len,sizeof(signed short)); 28 | for(j=0;jcoeffs[j] = coeffs[j]; 30 | return (void *)dp; 31 | } 32 | 33 | 34 | /* Free a dot product descriptor created earlier */ 35 | void freedp_port(void *p){ 36 | struct dotprod *dp = (struct dotprod *)p; 37 | 38 | if(dp->coeffs != NULL) 39 | free(dp->coeffs); 40 | free(dp); 41 | } 42 | 43 | /* Compute a dot product given a descriptor and an input array 44 | * The length is taken from the descriptor 45 | */ 46 | long dotprod_port(void *p,signed short a[]){ 47 | struct dotprod *dp = (struct dotprod *)p; 48 | long corr; 49 | int i; 50 | 51 | corr = 0; 52 | for(i=0;ilen;i++){ 53 | corr += (long)a[i] * dp->coeffs[i]; 54 | } 55 | return corr; 56 | } 57 | 58 | 59 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod_sse2.c: -------------------------------------------------------------------------------- 1 | /* 16-bit signed integer dot product 2 | * SSE2 version 3 | * Copyright 2004 Phil Karn 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #define _XOPEN_SOURCE 600 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | struct dotprod { 12 | int len; /* Number of coefficients */ 13 | 14 | /* On a SSE2 machine, these hold 8 copies of the coefficients, 15 | * preshifted by 0,1,..7 words to meet all possible input data 16 | * alignments (see Intel ap559 on MMX dot products). 17 | */ 18 | signed short *coeffs[8]; 19 | }; 20 | 21 | long dotprod_sse2_assist(signed short *a,signed short *b,int cnt); 22 | 23 | /* Create and return a descriptor for use with the dot product function */ 24 | void *initdp_sse2(signed short coeffs[],int len){ 25 | struct dotprod *dp; 26 | int i,j,blksize; 27 | 28 | if(len == 0) 29 | return NULL; 30 | 31 | dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); 32 | dp->len = len; 33 | 34 | /* Make 8 copies of coefficients, one for each data alignment, 35 | * each aligned to 16-byte boundary 36 | */ 37 | for(i=0;i<8;i++){ 38 | blksize = (1+(len+i-1)/8) * 8*sizeof(signed short); 39 | posix_memalign((void **)&dp->coeffs[i],16,blksize); 40 | memset(dp->coeffs[i],0,blksize); 41 | for(j=0;jcoeffs[i][j+i] = coeffs[j]; 43 | } 44 | return (void *)dp; 45 | } 46 | 47 | 48 | /* Free a dot product descriptor created earlier */ 49 | void freedp_sse2(void *p){ 50 | struct dotprod *dp = (struct dotprod *)p; 51 | int i; 52 | 53 | for(i=0;i<8;i++) 54 | if(dp->coeffs[i] != NULL) 55 | free(dp->coeffs[i]); 56 | free(dp); 57 | } 58 | 59 | /* Compute a dot product given a descriptor and an input array 60 | * The length is taken from the descriptor 61 | */ 62 | long dotprod_sse2(void *p,signed short a[]){ 63 | struct dotprod *dp = (struct dotprod *)p; 64 | int al; 65 | signed short *ar; 66 | 67 | ar = (signed short *)((int)a & ~15); 68 | al = a - ar; 69 | 70 | /* Call assembler routine to do the work, passing number of 8-word blocks */ 71 | return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1); 72 | } 73 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dotprod_sse2_assist.s: -------------------------------------------------------------------------------- 1 | # SIMD SSE2 dot product 2 | # Equivalent to the following C code: 3 | # long dotprod(signed short *a,signed short *b,int cnt) 4 | # { 5 | # long sum = 0; 6 | # cnt *= 8; 7 | # while(cnt--) 8 | # sum += *a++ + *b++; 9 | # return sum; 10 | # } 11 | # a and b must be 128-bit aligned 12 | # Copyright 2001, Phil Karn KA9Q 13 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 14 | 15 | .text 16 | .global dotprod_sse2_assist 17 | .type dotprod_sse2_assist,@function 18 | dotprod_sse2_assist: 19 | pushl %ebp 20 | movl %esp,%ebp 21 | pushl %esi 22 | pushl %edi 23 | pushl %ecx 24 | pushl %ebx 25 | movl 8(%ebp),%esi # a 26 | movl 12(%ebp),%edi # b 27 | movl 16(%ebp),%ecx # cnt 28 | pxor %xmm0,%xmm0 # clear running sum (in two 32-bit halves) 29 | 30 | # SSE2 dot product loop unrolled 4 times, crunching 32 terms per loop 31 | .align 16 32 | .Loop1: subl $4,%ecx 33 | jl .Loop1Done 34 | 35 | movdqa (%esi),%xmm1 36 | pmaddwd (%edi),%xmm1 37 | paddd %xmm1,%xmm0 38 | 39 | movdqa 16(%esi),%xmm1 40 | pmaddwd 16(%edi),%xmm1 41 | paddd %xmm1,%xmm0 42 | 43 | movdqa 32(%esi),%xmm1 44 | pmaddwd 32(%edi),%xmm1 45 | paddd %xmm1,%xmm0 46 | 47 | movdqa 48(%esi),%xmm1 48 | addl $64,%esi 49 | pmaddwd 48(%edi),%xmm1 50 | addl $64,%edi 51 | paddd %xmm1,%xmm0 52 | 53 | jmp .Loop1 54 | .Loop1Done: 55 | 56 | addl $4,%ecx 57 | 58 | # SSE2 dot product loop, not unrolled, crunching 4 terms per loop 59 | # This could be redone as Duff's Device on the unrolled loop above 60 | .Loop2: subl $1,%ecx 61 | jl .Loop2Done 62 | 63 | movdqa (%esi),%xmm1 64 | addl $16,%esi 65 | pmaddwd (%edi),%xmm1 66 | addl $16,%edi 67 | paddd %xmm1,%xmm0 68 | jmp .Loop2 69 | .Loop2Done: 70 | 71 | movdqa %xmm0,%xmm1 72 | psrldq $8,%xmm0 73 | paddd %xmm1,%xmm0 74 | movd %xmm0,%eax # right-hand word to eax 75 | psrldq $4,%xmm0 76 | movd %xmm0,%ebx 77 | addl %ebx,%eax 78 | 79 | popl %ebx 80 | popl %ecx 81 | popl %edi 82 | popl %esi 83 | movl %ebp,%esp 84 | popl %ebp 85 | ret 86 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dsp.3: -------------------------------------------------------------------------------- 1 | .TH DSP 3 2 | .SH NAME 3 | initdp, freedp, dotprod, sumsq, peakval -\ SIMD-assisted 4 | digital signal processing primitives 5 | .SH SYNOPSIS 6 | .nf 7 | .ft 8 | #include "fec.h" 9 | 10 | void *initdp(signed short *coeffs,int len); 11 | long dotprod(void *p,signed short *a); 12 | void freedp(void *p); 13 | 14 | unsigned long long sumsq(signed short *in,int cnt); 15 | 16 | int peakval(signed short *b,int cnt); 17 | 18 | .SH DESCRIPTION 19 | These functions provide several basic primitives useful in digital 20 | signal processing (DSP), especially in modems. The \fBinitdp\fR, 21 | \fBdotprod\fR and \fBfreedp\fR functions implement an integer dot 22 | product useful in correlation and filtering operations on signed 23 | 16-bit integers. \fBsumsq\fR computes the sum 24 | of the squares of an array of signed 16-bit integers, 25 | useful for measuring the energy of a signal. \fBpeakval\fR returns the 26 | absolute value of the largest magitude element in the input array, 27 | useful for scaling a signal's amplitude. 28 | 29 | Each function uses IA32 or PowerPC Altivec instructions when 30 | available; otherwise, a portable C version is used. 31 | 32 | .SH USAGE 33 | To create a FIR filter or correlator, call \fBinitdp\fR with the 34 | coefficients in \fBcoeff\fR and their number in \fBlen\fR. This 35 | creates the appropriate data structures and returns a handle. 36 | 37 | To compute a dot product, pass the handle from \fBinitdp\fR and the 38 | input array to \fBdotprod\fR. No length field is needed as the number 39 | of samples will be taken from the \fBlen\fR parameter originally given 40 | to \fBinitdp\fR. There must be at least as many samples in the input 41 | array as there were coefficients passed to \fBinitdp\fR. 42 | 43 | When the filter or correlator is no longer needed, the data structures 44 | may be freed by passing the handle to \fBfreedp\fR. 45 | 46 | The user is responsible for scaling the inputs to \fBinitdp\fR and 47 | \fBdotprod\fR, as the 32-bit result from \fBdotprod\fR will silently 48 | wrap around in the event of overflow. 49 | 50 | To compute the sum of the squares of an array of signed 16-bit 51 | integers, use sumsq\fR. This returns a 64 bit sum. 52 | 53 | \fBpeakval\fR computes the absolute value of each 16-bit element in 54 | the input array and returns the largest. 55 | 56 | .SH RETURN VALUES 57 | 58 | \fBinitdp\fR returns a handle that points to a control block, or NULL in 59 | the event of an error (such as a memory allocation failure). \fBsumsq\fR 60 | and \fBpeakval\fR have no error returns. 61 | 62 | .SH AUTHOR and COPYRIGHT 63 | Phil Karn, KA9Q (karn@ka9q.net) 64 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/dtest.c: -------------------------------------------------------------------------------- 1 | /* Test dot-product function */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "config.h" 8 | #ifdef HAVE_GETOPT_H 9 | #include 10 | #endif 11 | #include "fec.h" 12 | 13 | #if HAVE_GETOPT_LONG 14 | struct option Options[] = { 15 | {"force-altivec",0,NULL,'a'}, 16 | {"force-port",0,NULL,'p'}, 17 | {"force-mmx",0,NULL,'m'}, 18 | {"force-sse",0,NULL,'s'}, 19 | {"force-sse2",0,NULL,'t'}, 20 | {"trials",0,NULL,'n'}, 21 | {NULL}, 22 | }; 23 | #endif 24 | 25 | int main(int argc,char *argv[]){ 26 | short coeffs[512]; 27 | short input[2048]; 28 | int trials=1000,d; 29 | int errors = 0; 30 | 31 | #if HAVE_GETOPT_LONG 32 | while((d = getopt_long(argc,argv,"apmstn:",Options,NULL)) != EOF){ 33 | #else 34 | while((d = getopt(argc,argv,"apmstn:")) != EOF){ 35 | #endif 36 | switch(d){ 37 | case 'a': 38 | Cpu_mode = ALTIVEC; 39 | break; 40 | case 'p': 41 | Cpu_mode = PORT; 42 | break; 43 | case 'm': 44 | Cpu_mode = MMX; 45 | break; 46 | case 's': 47 | Cpu_mode = SSE; 48 | break; 49 | case 't': 50 | Cpu_mode = SSE2; 51 | break; 52 | case 'n': 53 | trials = atoi(optarg); 54 | break; 55 | } 56 | } 57 | 58 | while(trials--){ 59 | long port_result; 60 | long simd_result; 61 | int ntaps; 62 | int i; 63 | int csum = 0; 64 | int offset; 65 | void *dp_simd,*dp_port; 66 | 67 | /* Generate set of coefficients 68 | * limit sum of absolute values to 32767 to avoid overflow 69 | */ 70 | memset(coeffs,0,sizeof(coeffs)); 71 | for(i=0;i<512;i++){ 72 | double gv; 73 | 74 | gv = normal_rand(0.,100.); 75 | if(csum + fabs(gv) > 32767) 76 | break; 77 | coeffs[i] = gv; 78 | csum += fabs(gv); 79 | } 80 | ntaps = i; 81 | 82 | /* Compare results to portable C version for a bunch of random data buffers and offsets */ 83 | dp_simd = initdp(coeffs,ntaps); 84 | dp_port = initdp_port(coeffs,ntaps); 85 | 86 | for(i=0;i<2048;i++) 87 | input[i] = random(); 88 | 89 | offset = random() & 511; 90 | 91 | simd_result = dotprod(dp_simd,input+offset); 92 | port_result = dotprod_port(dp_port,input+offset); 93 | if(simd_result != port_result){ 94 | errors++; 95 | } 96 | } 97 | printf("dtest: %d errors\n",errors); 98 | exit(0); 99 | } 100 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/encode_rs.c: -------------------------------------------------------------------------------- 1 | /* Reed-Solomon encoder 2 | * Copyright 2002, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | 7 | #ifdef FIXED 8 | #include "fixed.h" 9 | #elif defined(BIGSYM) 10 | #include "int.h" 11 | #else 12 | #include "char.h" 13 | #endif 14 | 15 | void ENCODE_RS( 16 | #ifdef FIXED 17 | data_t *data, data_t *bb,int pad){ 18 | #else 19 | void *p,data_t *data, data_t *bb){ 20 | struct rs *rs = (struct rs *)p; 21 | #endif 22 | int i, j; 23 | data_t feedback; 24 | 25 | #ifdef FIXED 26 | /* Check pad parameter for validity */ 27 | if(pad < 0 || pad >= NN) 28 | return; 29 | #endif 30 | 31 | memset(bb,0,NROOTS*sizeof(data_t)); 32 | 33 | for(i=0;i) must be included by the calling 23 | * program. 24 | 25 | * Copyright 2004, Phil Karn, KA9Q 26 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 27 | */ 28 | 29 | 30 | #undef A0 31 | #define A0 (NN) /* Special reserved value encoding zero in index form */ 32 | 33 | { 34 | int i, j; 35 | data_t feedback; 36 | 37 | memset(parity,0,NROOTS*sizeof(data_t)); 38 | 39 | for(i=0;i 6 | #include "fixed.h" 7 | #ifdef __VEC__ 8 | #include 9 | #endif 10 | 11 | 12 | static enum {UNKNOWN=0,MMX,SSE,SSE2,ALTIVEC,PORT} cpu_mode; 13 | 14 | static void encode_rs_8_c(data_t *data, data_t *parity,int pad); 15 | #if __vec__ 16 | static void encode_rs_8_av(data_t *data, data_t *parity,int pad); 17 | #endif 18 | #if __i386__ 19 | int cpu_features(void); 20 | #endif 21 | 22 | void encode_rs_8(data_t *data, data_t *parity,int pad){ 23 | if(cpu_mode == UNKNOWN){ 24 | #ifdef __i386__ 25 | int f; 26 | /* Figure out what kind of CPU we have */ 27 | f = cpu_features(); 28 | if(f & (1<<26)){ /* SSE2 is present */ 29 | cpu_mode = SSE2; 30 | } else if(f & (1<<25)){ /* SSE is present */ 31 | cpu_mode = SSE; 32 | } else if(f & (1<<23)){ /* MMX is present */ 33 | cpu_mode = MMX; 34 | } else { /* No SIMD at all */ 35 | cpu_mode = PORT; 36 | } 37 | #elif __VEC__ 38 | /* Ask the OS if we have Altivec support */ 39 | int selectors[2] = { CTL_HW, HW_VECTORUNIT }; 40 | int hasVectorUnit = 0; 41 | size_t length = sizeof(hasVectorUnit); 42 | int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); 43 | if(0 == error && hasVectorUnit) 44 | cpu_mode = ALTIVEC; 45 | else 46 | cpu_mode = PORT; 47 | #else 48 | cpu_mode = PORT; 49 | #endif 50 | } 51 | switch(cpu_mode){ 52 | #if __vec__ 53 | case ALTIVEC: 54 | encode_rs_8_av(data,parity,pad); 55 | return; 56 | #endif 57 | #if __i386__ 58 | case MMX: 59 | case SSE: 60 | case SSE2: 61 | #endif 62 | default: 63 | encode_rs_8_c(data,parity,pad); 64 | return; 65 | } 66 | } 67 | 68 | #if __vec__ /* PowerPC G4/G5 Altivec instructions are available */ 69 | 70 | static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1); 71 | static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30); 72 | 73 | /* Lookup table for feedback multiplications 74 | * These are the low half of the coefficients. Since the generator polynomial is 75 | * palindromic, we form the other half by reversing this one 76 | */ 77 | extern static union { vector unsigned char v; unsigned char c[16]; } table[256]; 78 | 79 | static void encode_rs_8_av(data_t *data, data_t *parity,int pad){ 80 | union { vector unsigned char v[2]; unsigned char c[32]; } shift_register; 81 | int i; 82 | 83 | shift_register.v[0] = (vector unsigned char)(0); 84 | shift_register.v[1] = (vector unsigned char)(0); 85 | 86 | for(i=0;i 6 | #include 7 | #include "fixed.h" 8 | 9 | /* Lookup table for feedback multiplications 10 | * These are the low half of the coefficients. Since the generator polynomial is 11 | * palindromic, we form it by reversing these on the fly 12 | */ 13 | static union { vector unsigned char v; unsigned char c[16]; } table[256]; 14 | 15 | static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1); 16 | static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30); 17 | 18 | extern data_t CCSDS_alpha_to[]; 19 | extern data_t CCSDS_index_of[]; 20 | extern data_t CCSDS_poly[]; 21 | 22 | void rs_init_av(){ 23 | int i,j; 24 | 25 | /* The PowerPC is big-endian, so the low-order byte of each vector contains the highest order term in the polynomial */ 26 | for(j=0;j<16;j++){ 27 | table[0].c[j] = 0; 28 | for(i=1;i<256;i++){ 29 | table[i].c[16-j-1] = CCSDS_alpha_to[MODNN(CCSDS_poly[j+1] + CCSDS_index_of[i])]; 30 | } 31 | } 32 | #if 0 33 | for(i=0;i<256;i++){ 34 | printf("table[%3d] = %3vu\n",i,table[i].v); 35 | } 36 | #endif 37 | } 38 | 39 | void encode_rs_av(unsigned char *data,unsigned char *parity,int pad){ 40 | union { vector unsigned char v[2]; unsigned char c[32]; } shift_register; 41 | int i; 42 | 43 | shift_register.v[0] = (vector unsigned char)(0); 44 | shift_register.v[1] = (vector unsigned char)(0); 45 | 46 | for(i=0;i 6 | 7 | #include "char.h" 8 | #include "rs-common.h" 9 | 10 | void encode_rs_char(void *p,data_t *data, data_t *parity){ 11 | struct rs *rs = (struct rs *)p; 12 | 13 | #include "encode_rs.h" 14 | 15 | } 16 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/encode_rs_int.c: -------------------------------------------------------------------------------- 1 | /* Reed-Solomon encoder 2 | * Copyright 2003, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | 7 | #include "int.h" 8 | #include "rs-common.h" 9 | 10 | void encode_rs_int(void *p,data_t *data, data_t *parity){ 11 | struct rs *rs = (struct rs *)p; 12 | 13 | #include "encode_rs.h" 14 | 15 | } 16 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/exercise.c: -------------------------------------------------------------------------------- 1 | /* Exercise an RS codec a specified number of times using random 2 | * data and error patterns 3 | * 4 | * Copyright 2002 Phil Karn, KA9Q 5 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 6 | */ 7 | #define FLAG_ERASURE 1 /* Randomly flag 50% of errors as erasures */ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef FIXED 14 | #include "fixed.h" 15 | #define EXERCISE exercise_8 16 | #elif defined(CCSDS) 17 | #include "fixed.h" 18 | #include "ccsds.h" 19 | #define EXERCISE exercise_ccsds 20 | #elif defined(BIGSYM) 21 | #include "int.h" 22 | #define EXERCISE exercise_int 23 | #else 24 | #include "char.h" 25 | #define EXERCISE exercise_char 26 | #endif 27 | 28 | #ifdef FIXED 29 | #define PRINTPARM printf("(255,223):"); 30 | #elif defined(CCSDS) 31 | #define PRINTPARM printf("CCSDS (255,223):"); 32 | #else 33 | #define PRINTPARM printf("(%d,%d):",rs->nn,rs->nn-rs->nroots); 34 | #endif 35 | 36 | /* Exercise the RS codec passed as an argument */ 37 | int EXERCISE( 38 | #if !defined(CCSDS) && !defined(FIXED) 39 | void *p, 40 | #endif 41 | int trials){ 42 | #if !defined(CCSDS) && !defined(FIXED) 43 | struct rs *rs = (struct rs *)p; 44 | #endif 45 | data_t block[NN],tblock[NN]; 46 | int i; 47 | int errors; 48 | int errlocs[NN]; 49 | int derrlocs[NROOTS]; 50 | int derrors; 51 | int errval,errloc; 52 | int erasures; 53 | int decoder_errors = 0; 54 | 55 | while(trials-- != 0){ 56 | /* Test up to the error correction capacity of the code */ 57 | for(errors=0;errors <= NROOTS/2;errors++){ 58 | 59 | /* Load block with random data and encode */ 60 | for(i=0;i 6 | #include "fec.h" 7 | 8 | unsigned char Partab[256]; 9 | int P_init; 10 | 11 | /* Create 256-entry odd-parity lookup table 12 | * Needed only on non-ia32 machines 13 | */ 14 | void partab_init(void){ 15 | int i,cnt,ti; 16 | 17 | /* Initialize parity lookup table */ 18 | for(i=0;i<256;i++){ 19 | cnt = 0; 20 | ti = i; 21 | while(ti){ 22 | if(ti & 1) 23 | cnt++; 24 | ti >>= 1; 25 | } 26 | Partab[i] = cnt & 1; 27 | } 28 | P_init=1; 29 | } 30 | 31 | /* Lookup table giving count of 1 bits for integers 0-255 */ 32 | int Bitcnt[] = { 33 | 0, 1, 1, 2, 1, 2, 2, 3, 34 | 1, 2, 2, 3, 2, 3, 3, 4, 35 | 1, 2, 2, 3, 2, 3, 3, 4, 36 | 2, 3, 3, 4, 3, 4, 4, 5, 37 | 1, 2, 2, 3, 2, 3, 3, 4, 38 | 2, 3, 3, 4, 3, 4, 4, 5, 39 | 2, 3, 3, 4, 3, 4, 4, 5, 40 | 3, 4, 4, 5, 4, 5, 5, 6, 41 | 1, 2, 2, 3, 2, 3, 3, 4, 42 | 2, 3, 3, 4, 3, 4, 4, 5, 43 | 2, 3, 3, 4, 3, 4, 4, 5, 44 | 3, 4, 4, 5, 4, 5, 5, 6, 45 | 2, 3, 3, 4, 3, 4, 4, 5, 46 | 3, 4, 4, 5, 4, 5, 5, 6, 47 | 3, 4, 4, 5, 4, 5, 5, 6, 48 | 4, 5, 5, 6, 5, 6, 6, 7, 49 | 1, 2, 2, 3, 2, 3, 3, 4, 50 | 2, 3, 3, 4, 3, 4, 4, 5, 51 | 2, 3, 3, 4, 3, 4, 4, 5, 52 | 3, 4, 4, 5, 4, 5, 5, 6, 53 | 2, 3, 3, 4, 3, 4, 4, 5, 54 | 3, 4, 4, 5, 4, 5, 5, 6, 55 | 3, 4, 4, 5, 4, 5, 5, 6, 56 | 4, 5, 5, 6, 5, 6, 6, 7, 57 | 2, 3, 3, 4, 3, 4, 4, 5, 58 | 3, 4, 4, 5, 4, 5, 5, 6, 59 | 3, 4, 4, 5, 4, 5, 5, 6, 60 | 4, 5, 5, 6, 5, 6, 6, 7, 61 | 3, 4, 4, 5, 4, 5, 5, 6, 62 | 4, 5, 5, 6, 5, 6, 6, 7, 63 | 4, 5, 5, 6, 5, 6, 6, 7, 64 | 5, 6, 6, 7, 6, 7, 7, 8, 65 | }; 66 | 67 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/fixed.h: -------------------------------------------------------------------------------- 1 | /* Stuff specific to the CCSDS (255,223) RS codec 2 | * (255,223) code over GF(256). Note: the conventional basis is still 3 | * used; the dual-basis mappings are performed in [en|de]code_rs_ccsds.c 4 | * 5 | * Copyright 2003 Phil Karn, KA9Q 6 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 7 | */ 8 | typedef unsigned char data_t; 9 | 10 | static inline int mod255(int x){ 11 | while (x >= 255) { 12 | x -= 255; 13 | x = (x >> 8) + (x & 255); 14 | } 15 | return x; 16 | } 17 | #define MODNN(x) mod255(x) 18 | 19 | extern data_t CCSDS_alpha_to[]; 20 | extern data_t CCSDS_index_of[]; 21 | extern data_t CCSDS_poly[]; 22 | 23 | #define MM 8 24 | #define NN 255 25 | #define ALPHA_TO CCSDS_alpha_to 26 | #define INDEX_OF CCSDS_index_of 27 | #define GENPOLY CCSDS_poly 28 | #define NROOTS 32 29 | #define FCR 112 30 | #define PRIM 11 31 | #define IPRIM 116 32 | #define PAD pad 33 | 34 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/gen_ccsds.c: -------------------------------------------------------------------------------- 1 | /* Generate tables for CCSDS code 2 | * Copyright 2002 Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "char.h" 9 | #include "rs-common.h" 10 | #include "fec.h" 11 | 12 | int main(){ 13 | struct rs *rs; 14 | int i; 15 | 16 | rs = init_rs_char(8,0x187,112,11,32,0); /* CCSDS standard */ 17 | assert(rs != NULL); 18 | printf("char CCSDS_alpha_to[] = {"); 19 | for(i=0;i<256;i++){ 20 | if((i % 16) == 0) 21 | printf("\n"); 22 | printf("0x%02x,",rs->alpha_to[i]); 23 | } 24 | printf("\n};\n\nchar CCSDS_index_of[] = {"); 25 | for(i=0;i<256;i++){ 26 | if((i % 16) == 0) 27 | printf("\n"); 28 | printf("%3d,",rs->index_of[i]); 29 | } 30 | printf("\n};\n\nchar CCSDS_poly[] = {"); 31 | for(i=0;i<33;i++){ 32 | if((i % 16) == 0) 33 | printf("\n"); 34 | 35 | printf("%3d,",rs->genpoly[i]); 36 | } 37 | printf("\n};\n"); 38 | exit(0); 39 | } 40 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/gen_ccsds_tal.c: -------------------------------------------------------------------------------- 1 | /* Conversion lookup tables from conventional alpha to Berlekamp's 2 | * dual-basis representation. Used in the CCSDS version only. 3 | * taltab[] -- convert conventional to dual basis 4 | * tal1tab[] -- convert dual basis to conventional 5 | 6 | * Note: the actual RS encoder/decoder works with the conventional basis. 7 | * So data is converted from dual to conventional basis before either 8 | * encoding or decoding and then converted back. 9 | * 10 | * Copyright 2002 Phil Karn, KA9Q 11 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 12 | */ 13 | #include 14 | #include 15 | 16 | #define DTYPE unsigned char 17 | DTYPE Taltab[256],Tal1tab[256]; 18 | 19 | static DTYPE tal[] = { 0x8d, 0xef, 0xec, 0x86, 0xfa, 0x99, 0xaf, 0x7b }; 20 | 21 | /* Generate conversion lookup tables between conventional alpha representation 22 | * (@**7, @**6, ...@**0) 23 | * and Berlekamp's dual basis representation 24 | * (l0, l1, ...l7) 25 | */ 26 | int main(){ 27 | int i,j,k; 28 | 29 | for(i=0;i<256;i++){/* For each value of input */ 30 | Taltab[i] = 0; 31 | for(j=0;j<8;j++) /* for each column of matrix */ 32 | for(k=0;k<8;k++){ /* for each row of matrix */ 33 | if(i & (1< 7 | #include "fec.h" 8 | 9 | #if !defined(NULL) 10 | #define NULL ((void *)0) 11 | #endif 12 | 13 | #include "rs-common.h" 14 | 15 | void free_rs(void *p){ 16 | struct rs *rs = (struct rs *)p; 17 | 18 | free(rs->alpha_to); 19 | free(rs->index_of); 20 | free(rs->genpoly); 21 | free(rs); 22 | } 23 | 24 | /* Initialize a Reed-Solomon codec 25 | * symsize = symbol size, bits 26 | * gfpoly = Field generator polynomial coefficients 27 | * fcr = first root of RS code generator polynomial, index form 28 | * prim = primitive element to generate polynomial roots 29 | * nroots = RS code generator polynomial degree (number of roots) 30 | * pad = padding bytes at front of shortened block 31 | */ 32 | void *init_rs_common(int symsize,int gfpoly,int fcr,int prim, 33 | int nroots,int pad){ 34 | struct rs *rs; 35 | 36 | #include "init_rs.h" 37 | 38 | return rs; 39 | } 40 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/init_rs.h: -------------------------------------------------------------------------------- 1 | /* Common code for intializing a Reed-Solomon control block (char or int symbols) 2 | * Copyright 2004 Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #undef NULL 6 | #define NULL ((void *)0) 7 | 8 | { 9 | int i, j, sr,root,iprim; 10 | 11 | rs = NULL; 12 | /* Check parameter ranges */ 13 | if(symsize < 0 || symsize > 8*sizeof(data_t)){ 14 | goto done; 15 | } 16 | 17 | if(fcr < 0 || fcr >= (1<= (1<= (1<= ((1<mm = symsize; 31 | rs->nn = (1<pad = pad; 33 | 34 | rs->alpha_to = (data_t *)malloc(sizeof(data_t)*(rs->nn+1)); 35 | if(rs->alpha_to == NULL){ 36 | free(rs); 37 | rs = NULL; 38 | goto done; 39 | } 40 | rs->index_of = (data_t *)malloc(sizeof(data_t)*(rs->nn+1)); 41 | if(rs->index_of == NULL){ 42 | free(rs->alpha_to); 43 | free(rs); 44 | rs = NULL; 45 | goto done; 46 | } 47 | 48 | /* Generate Galois field lookup tables */ 49 | rs->index_of[0] = A0; /* log(zero) = -inf */ 50 | rs->alpha_to[A0] = 0; /* alpha**-inf = 0 */ 51 | sr = 1; 52 | for(i=0;inn;i++){ 53 | rs->index_of[sr] = i; 54 | rs->alpha_to[i] = sr; 55 | sr <<= 1; 56 | if(sr & (1<nn; 59 | } 60 | if(sr != 1){ 61 | /* field generator polynomial is not primitive! */ 62 | free(rs->alpha_to); 63 | free(rs->index_of); 64 | free(rs); 65 | rs = NULL; 66 | goto done; 67 | } 68 | 69 | /* Form RS code generator polynomial from its roots */ 70 | rs->genpoly = (data_t *)malloc(sizeof(data_t)*(nroots+1)); 71 | if(rs->genpoly == NULL){ 72 | free(rs->alpha_to); 73 | free(rs->index_of); 74 | free(rs); 75 | rs = NULL; 76 | goto done; 77 | } 78 | rs->fcr = fcr; 79 | rs->prim = prim; 80 | rs->nroots = nroots; 81 | 82 | /* Find prim-th root of 1, used in decoding */ 83 | for(iprim=1;(iprim % prim) != 0;iprim += rs->nn) 84 | ; 85 | rs->iprim = iprim / prim; 86 | 87 | rs->genpoly[0] = 1; 88 | for (i = 0,root=fcr*prim; i < nroots; i++,root += prim) { 89 | rs->genpoly[i+1] = 1; 90 | 91 | /* Multiply rs->genpoly[] by @**(root + x) */ 92 | for (j = i; j > 0; j--){ 93 | if (rs->genpoly[j] != 0) 94 | rs->genpoly[j] = rs->genpoly[j-1] ^ rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[j]] + root)]; 95 | else 96 | rs->genpoly[j] = rs->genpoly[j-1]; 97 | } 98 | /* rs->genpoly[0] can never be zero */ 99 | rs->genpoly[0] = rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[0]] + root)]; 100 | } 101 | /* convert rs->genpoly[] to index form for quicker encoding */ 102 | for (i = 0; i <= nroots; i++) 103 | rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; 104 | done:; 105 | 106 | } 107 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/init_rs_char.c: -------------------------------------------------------------------------------- 1 | /* Initialize a RS codec 2 | * 3 | * Copyright 2002 Phil Karn, KA9Q 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #include 7 | 8 | #include "char.h" 9 | #include "rs-common.h" 10 | 11 | void free_rs_char(void *p){ 12 | struct rs *rs = (struct rs *)p; 13 | 14 | free(rs->alpha_to); 15 | free(rs->index_of); 16 | free(rs->genpoly); 17 | free(rs); 18 | } 19 | 20 | /* Initialize a Reed-Solomon codec 21 | * symsize = symbol size, bits 22 | * gfpoly = Field generator polynomial coefficients 23 | * fcr = first root of RS code generator polynomial, index form 24 | * prim = primitive element to generate polynomial roots 25 | * nroots = RS code generator polynomial degree (number of roots) 26 | * pad = padding bytes at front of shortened block 27 | */ 28 | void *init_rs_char(int symsize,int gfpoly,int fcr,int prim, 29 | int nroots,int pad){ 30 | struct rs *rs; 31 | 32 | #include "init_rs.h" 33 | 34 | return rs; 35 | } 36 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/init_rs_int.c: -------------------------------------------------------------------------------- 1 | /* Initialize a RS codec 2 | * 3 | * Copyright 2002 Phil Karn, KA9Q 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | #include 7 | 8 | #include "int.h" 9 | #include "rs-common.h" 10 | 11 | void free_rs_int(void *p){ 12 | struct rs *rs = (struct rs *)p; 13 | 14 | free(rs->alpha_to); 15 | free(rs->index_of); 16 | free(rs->genpoly); 17 | free(rs); 18 | } 19 | 20 | /* Initialize a Reed-Solomon codec 21 | * symsize = symbol size, bits 22 | * gfpoly = Field generator polynomial coefficients 23 | * fcr = first root of RS code generator polynomial, index form 24 | * prim = primitive element to generate polynomial roots 25 | * nroots = RS code generator polynomial degree (number of roots) 26 | * pad = padding bytes at front of shortened block 27 | */ 28 | void *init_rs_int(int symsize,int gfpoly,int fcr,int prim, 29 | int nroots,int pad){ 30 | struct rs *rs; 31 | 32 | #include "init_rs.h" 33 | 34 | return rs; 35 | } 36 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/int.h: -------------------------------------------------------------------------------- 1 | /* Stuff specific to the general (integer) version of the Reed-Solomon codecs 2 | * 3 | * Copyright 2003, Phil Karn, KA9Q 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | typedef unsigned int data_t; 7 | 8 | #define MODNN(x) modnn(rs,x) 9 | 10 | #define MM (rs->mm) 11 | #define NN (rs->nn) 12 | #define ALPHA_TO (rs->alpha_to) 13 | #define INDEX_OF (rs->index_of) 14 | #define GENPOLY (rs->genpoly) 15 | #define NROOTS (rs->nroots) 16 | #define FCR (rs->fcr) 17 | #define PRIM (rs->prim) 18 | #define IPRIM (rs->iprim) 19 | #define PAD (rs->pad) 20 | #define A0 (NN) 21 | 22 | 23 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/mmxbfly27.s: -------------------------------------------------------------------------------- 1 | /* Intel SIMD MMX implementation of Viterbi ACS butterflies 2 | for 64-state (k=7) convolutional code 3 | Copyright 2004 Phil Karn, KA9Q 4 | This code may be used under the terms of the GNU Lesser General Public License (LGPL) 5 | 6 | int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ; 7 | */ 8 | # MMX (64-bit SIMD) version 9 | # requires Pentium-MMX, Pentium-II or better 10 | 11 | # These are offsets into struct v27, defined in viterbi27_mmx.c 12 | .set DP,128 13 | .set OLDMETRICS,132 14 | .set NEWMETRICS,136 15 | .text 16 | .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2 17 | .type update_viterbi27_blk_mmx,@function 18 | .align 16 19 | 20 | update_viterbi27_blk_mmx: 21 | pushl %ebp 22 | movl %esp,%ebp 23 | pushl %esi 24 | pushl %edi 25 | pushl %edx 26 | pushl %ebx 27 | 28 | movl 8(%ebp),%edx # edx = vp 29 | testl %edx,%edx 30 | jnz 0f 31 | movl -1,%eax 32 | jmp err 33 | 0: movl OLDMETRICS(%edx),%esi # esi -> old metrics 34 | movl NEWMETRICS(%edx),%edi # edi -> new metrics 35 | movl DP(%edx),%edx # edx -> decisions 36 | 37 | 1: movl 16(%ebp),%eax # eax = nbits 38 | decl %eax 39 | jl 2f # passed zero, we're done 40 | movl %eax,16(%ebp) 41 | 42 | movl 12(%ebp),%ebx # ebx = syms 43 | movw (%ebx),%ax # ax = second symbol : first symbol 44 | addl $2,%ebx 45 | movl %ebx,12(%ebp) 46 | 47 | movb %ah,%bl 48 | andl $255,%eax 49 | andl $255,%ebx 50 | 51 | # shift into first array index dimension slot 52 | shll $5,%eax 53 | shll $5,%ebx 54 | 55 | # each invocation of this macro will do 8 butterflies in parallel 56 | .MACRO butterfly GROUP 57 | # Compute branch metrics 58 | movq (Mettab27_1+8*\GROUP)(%eax),%mm3 59 | movq fifteens,%mm0 60 | 61 | paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3 62 | paddb ones,%mm3 # emulate pavgb - this may not be necessary 63 | psrlq $1,%mm3 64 | pand %mm0,%mm3 65 | 66 | movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 67 | movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1 68 | movq %mm6,%mm1 69 | movq %mm2,%mm7 70 | 71 | paddb %mm3,%mm6 72 | paddb %mm3,%mm2 73 | pxor %mm0,%mm3 # invert branch metric 74 | paddb %mm3,%mm7 # path metric for inverted symbols 75 | paddb %mm3,%mm1 76 | 77 | # live registers 1 2 6 7 78 | # Compare mm6 and mm7; mm1 and mm2 79 | pxor %mm3,%mm3 80 | movq %mm6,%mm4 81 | movq %mm1,%mm5 82 | psubb %mm7,%mm4 # mm4 = mm6 - mm7 83 | psubb %mm2,%mm5 # mm5 = mm1 - mm2 84 | pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) 85 | pcmpgtb %mm3,%mm5 # mm5 = second set of decisions 86 | 87 | # live registers 1 2 4 5 6 7 88 | # select survivors 89 | movq %mm4,%mm0 90 | pand %mm4,%mm7 91 | movq %mm5,%mm3 92 | pand %mm5,%mm2 93 | pandn %mm6,%mm0 94 | pandn %mm1,%mm3 95 | por %mm0,%mm7 # mm7 = first set of survivors 96 | por %mm3,%mm2 # mm2 = second set of survivors 97 | 98 | # live registers 2 4 5 7 99 | # interleave & store decisions in mm4, mm5 100 | # interleave & store new branch metrics in mm2, mm7 101 | movq %mm4,%mm3 102 | movq %mm7,%mm0 103 | punpckhbw %mm5,%mm4 104 | punpcklbw %mm5,%mm3 105 | punpcklbw %mm2,%mm7 # interleave second 8 new metrics 106 | punpckhbw %mm2,%mm0 # interleave first 8 new metrics 107 | movq %mm4,(16*\GROUP+8)(%edx) 108 | movq %mm3,(16*\GROUP)(%edx) 109 | movq %mm7,(16*\GROUP)(%edi) 110 | movq %mm0,(16*\GROUP+8)(%edi) 111 | 112 | .endm 113 | 114 | # invoke macro 4 times for a total of 32 butterflies 115 | butterfly GROUP=0 116 | butterfly GROUP=1 117 | butterfly GROUP=2 118 | butterfly GROUP=3 119 | 120 | addl $64,%edx # bump decision pointer 121 | 122 | # swap metrics 123 | movl %esi,%eax 124 | movl %edi,%esi 125 | movl %eax,%edi 126 | jmp 1b 127 | 128 | 2: emms 129 | movl 8(%ebp),%ebx # ebx = vp 130 | # stash metric pointers 131 | movl %esi,OLDMETRICS(%ebx) 132 | movl %edi,NEWMETRICS(%ebx) 133 | movl %edx,DP(%ebx) # stash incremented value of vp->dp 134 | xorl %eax,%eax 135 | err: popl %ebx 136 | popl %edx 137 | popl %edi 138 | popl %esi 139 | popl %ebp 140 | ret 141 | 142 | .data 143 | .align 8 144 | fifteens: 145 | .byte 15,15,15,15,15,15,15,15 146 | 147 | .align 8 148 | ones: .byte 1,1,1,1,1,1,1,1 149 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/mmxbfly29.s: -------------------------------------------------------------------------------- 1 | /* Intel SIMD MMX implementation of Viterbi ACS butterflies 2 | for 256-state (k=9) convolutional code 3 | Copyright 2004 Phil Karn, KA9Q 4 | This code may be used under the terms of the GNU Lesser General Public License (LGPL) 5 | 6 | void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits); 7 | */ 8 | 9 | # These are offsets into struct v29, defined in viterbi29.h 10 | .set DP,512 11 | .set OLDMETRICS,516 12 | .set NEWMETRICS,520 13 | .text 14 | .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2 15 | .type update_viterbi29_blk_mmx,@function 16 | .align 16 17 | 18 | # MMX (64-bit SIMD) version 19 | # requires Pentium-MMX, Pentium-II or better 20 | 21 | update_viterbi29_blk_mmx: 22 | pushl %ebp 23 | movl %esp,%ebp 24 | pushl %esi 25 | pushl %edi 26 | pushl %edx 27 | pushl %ebx 28 | 29 | movl 8(%ebp),%edx # edx = vp 30 | movl 8(%ebp),%edx # edx = vp 31 | testl %edx,%edx 32 | jnz 0f 33 | movl -1,%eax 34 | jmp err 35 | 0: movl OLDMETRICS(%edx),%esi # esi -> old metrics 36 | movl NEWMETRICS(%edx),%edi # edi -> new metrics 37 | movl DP(%edx),%edx # edx -> decisions 38 | 39 | 1: movl 16(%ebp),%eax # eax = nbits 40 | decl %eax 41 | jl 2f # passed zero, we're done 42 | movl %eax,16(%ebp) 43 | 44 | movl 12(%ebp),%ebx # ebx = syms 45 | movw (%ebx),%ax # ax = second symbol : first symbol 46 | addl $2,%ebx 47 | movl %ebx,12(%ebp) 48 | 49 | movb %ah,%bl 50 | andl $255,%eax 51 | andl $255,%ebx 52 | 53 | # shift into first array index dimension slot 54 | shll $7,%eax 55 | shll $7,%ebx 56 | 57 | # each invocation of this macro will do 8 butterflies in parallel 58 | .MACRO butterfly GROUP 59 | # Compute branch metrics 60 | movq (Mettab29_1+8*\GROUP)(%eax),%mm3 61 | movq fifteens,%mm0 62 | paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3 63 | paddb ones,%mm3 # emulate pavgb - this may not be necessary 64 | psrlq $1,%mm3 65 | pand %mm0,%mm3 66 | 67 | movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 68 | movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1 69 | movq %mm6,%mm1 70 | movq %mm2,%mm7 71 | 72 | paddb %mm3,%mm6 73 | paddb %mm3,%mm2 74 | pxor %mm0,%mm3 # invert branch metric 75 | paddb %mm3,%mm7 # path metric for inverted symbols 76 | paddb %mm3,%mm1 77 | 78 | # live registers 1 2 6 7 79 | # Compare mm6 and mm7; mm1 and mm2 80 | pxor %mm3,%mm3 81 | movq %mm6,%mm4 82 | movq %mm1,%mm5 83 | psubb %mm7,%mm4 # mm4 = mm6 - mm7 84 | psubb %mm2,%mm5 # mm5 = mm1 - mm2 85 | pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) 86 | pcmpgtb %mm3,%mm5 # mm5 = second set of decisions 87 | 88 | # live registers 1 2 4 5 6 7 89 | # select survivors 90 | movq %mm4,%mm0 91 | pand %mm4,%mm7 92 | movq %mm5,%mm3 93 | pand %mm5,%mm2 94 | pandn %mm6,%mm0 95 | pandn %mm1,%mm3 96 | por %mm0,%mm7 # mm7 = first set of survivors 97 | por %mm3,%mm2 # mm2 = second set of survivors 98 | 99 | # live registers 2 4 5 7 100 | # interleave & store decisions in mm4, mm5 101 | # interleave & store new branch metrics in mm2, mm7 102 | movq %mm4,%mm3 103 | movq %mm7,%mm0 104 | punpckhbw %mm5,%mm4 105 | punpcklbw %mm5,%mm3 106 | punpcklbw %mm2,%mm7 # interleave second 8 new metrics 107 | punpckhbw %mm2,%mm0 # interleave first 8 new metrics 108 | movq %mm4,(16*\GROUP+8)(%edx) 109 | movq %mm3,(16*\GROUP)(%edx) 110 | movq %mm7,(16*\GROUP)(%edi) 111 | movq %mm0,(16*\GROUP+8)(%edi) 112 | 113 | .endm 114 | 115 | # invoke macro 16 times for a total of 128 butterflies 116 | butterfly GROUP=0 117 | butterfly GROUP=1 118 | butterfly GROUP=2 119 | butterfly GROUP=3 120 | butterfly GROUP=4 121 | butterfly GROUP=5 122 | butterfly GROUP=6 123 | butterfly GROUP=7 124 | butterfly GROUP=8 125 | butterfly GROUP=9 126 | butterfly GROUP=10 127 | butterfly GROUP=11 128 | butterfly GROUP=12 129 | butterfly GROUP=13 130 | butterfly GROUP=14 131 | butterfly GROUP=15 132 | 133 | addl $256,%edx # bump decision pointer 134 | 135 | # swap metrics 136 | movl %esi,%eax 137 | movl %edi,%esi 138 | movl %eax,%edi 139 | jmp 1b 140 | 141 | 2: emms 142 | movl 8(%ebp),%ebx # ebx = vp 143 | # stash metric pointers 144 | movl %esi,OLDMETRICS(%ebx) 145 | movl %edi,NEWMETRICS(%ebx) 146 | movl %edx,DP(%ebx) # stash incremented value of vp->dp 147 | xorl %eax,%eax 148 | err: popl %ebx 149 | popl %edx 150 | popl %edi 151 | popl %esi 152 | popl %ebp 153 | ret 154 | 155 | .data 156 | .align 8 157 | fifteens: 158 | .byte 15,15,15,15,15,15,15,15 159 | 160 | .align 8 161 | ones: .byte 1,1,1,1,1,1,1,1 162 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peak_mmx_assist.s: -------------------------------------------------------------------------------- 1 | # MMX assist routines for peakval 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | 5 | .text 6 | 7 | # Find peak value in signed 16-bit input samples 8 | # int peakval_mmx(signed short *in,int cnt); 9 | .global peakval_mmx 10 | .type peakval_mmx,@function 11 | .align 16 12 | peakval_mmx: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | pushl %ebx 18 | 19 | movl 8(%ebp),%esi 20 | movl 12(%ebp),%ecx 21 | 22 | pxor %mm7,%mm7 # clear peak 23 | 24 | 1: subl $4,%ecx 25 | jl 2f 26 | movq (%esi),%mm0 27 | movq %mm0,%mm1 28 | psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive 29 | pxor %mm1,%mm0 # complement negatives 30 | psubw %mm1,%mm0 # add 1 to negatives 31 | movq %mm7,%mm6 # copy previous peak 32 | pcmpgtw %mm0,%mm6 # ff == old peak greater 33 | pand %mm6,%mm7 # select old peaks that are greater 34 | pandn %mm0,%mm6 # select new values that are greater 35 | por %mm6,%mm7 36 | 37 | addl $8,%esi 38 | jmp 1b 39 | 40 | 2: movd %mm7,%eax 41 | psrlq $16,%mm7 42 | andl $0xffff,%eax 43 | 44 | movd %mm7,%edx 45 | psrlq $16,%mm7 46 | andl $0xffff,%edx 47 | cmpl %edx,%eax 48 | jnl 3f 49 | movl %edx,%eax 50 | 3: 51 | movd %mm7,%edx 52 | psrlq $16,%mm7 53 | andl $0xffff,%edx 54 | cmpl %edx,%eax 55 | jnl 4f 56 | movl %edx,%eax 57 | 4: 58 | movd %mm7,%edx 59 | andl $0xffff,%edx 60 | cmpl %edx,%eax 61 | jnl 5f 62 | movl %edx,%eax 63 | 5: 64 | emms 65 | popl %ebx 66 | popl %ecx 67 | popl %esi 68 | popl %ebp 69 | ret 70 | 71 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peak_sse2_assist.s: -------------------------------------------------------------------------------- 1 | # SSE2 assist routines for peakval 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Public License (GPL) 4 | 5 | .text 6 | 7 | # Find peak absolute value in signed 16-bit input samples 8 | # int peakval_sse2(signed short *in,int cnt); 9 | .global peakval_sse2 10 | .type peakval_sse2,@function 11 | .align 16 12 | peakval_sse2: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | 18 | movl 8(%ebp),%esi 19 | movl 12(%ebp),%ecx 20 | 21 | pxor %xmm7,%xmm7 # clear peak 22 | 23 | 1: subl $8,%ecx 24 | jl 2f 25 | movaps (%esi),%xmm0 26 | movaps %xmm0,%xmm1 27 | psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive 28 | pxor %xmm1,%xmm0 # complement negatives 29 | psubw %xmm1,%xmm0 # add 1 to negatives 30 | pmaxsw %xmm0,%xmm7 # store peak 31 | 32 | addl $16,%esi 33 | jmp 1b 34 | 35 | 2: movaps %xmm7,%xmm0 36 | psrldq $8,%xmm0 37 | pmaxsw %xmm0,%xmm7 38 | movaps %xmm7,%xmm0 39 | psrlq $32,%xmm0 40 | pmaxsw %xmm0,%xmm7 41 | movaps %xmm7,%xmm0 42 | psrlq $16,%xmm0 43 | pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7 44 | 45 | movd %xmm7,%eax 46 | andl $0xffff,%eax 47 | 48 | popl %ecx 49 | popl %esi 50 | popl %ebp 51 | ret 52 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peak_sse_assist.s: -------------------------------------------------------------------------------- 1 | # SSE assist routines for peakval 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | 5 | .text 6 | 7 | # Find peak absolute value in signed 16-bit input samples 8 | # int peakval_sse(signed short *in,int cnt); 9 | .global peakval_sse 10 | .type peakval_sse,@function 11 | .align 16 12 | peakval_sse: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | 18 | movl 8(%ebp),%esi 19 | movl 12(%ebp),%ecx 20 | 21 | pxor %mm7,%mm7 # clear peak 22 | 23 | 1: subl $4,%ecx 24 | jl 2f 25 | movq (%esi),%mm0 26 | movq %mm0,%mm1 27 | psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive 28 | pxor %mm1,%mm0 # complement negatives 29 | psubw %mm1,%mm0 # add 1 to negatives 30 | pmaxsw %mm0,%mm7 # store peak 31 | 32 | addl $8,%esi 33 | jmp 1b 34 | 35 | 2: movq %mm7,%mm0 36 | psrlq $32,%mm0 37 | pmaxsw %mm0,%mm7 38 | movq %mm7,%mm0 39 | psrlq $16,%mm0 40 | pmaxsw %mm0,%mm7 # min value in low word of %mm7 41 | 42 | movd %mm7,%eax 43 | andl $0xffff,%eax 44 | 45 | emms 46 | popl %ecx 47 | popl %esi 48 | popl %ebp 49 | ret 50 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peaktest.c: -------------------------------------------------------------------------------- 1 | /* Verify correctness of the peak routine 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | 8 | /* These values should trigger leading/trailing array fragment handling */ 9 | #define NSAMP 200002 10 | #define OFFSET 1 11 | 12 | int peakval(signed short *,int); 13 | int peakval_port(signed short *,int); 14 | 15 | int main(){ 16 | int i,s; 17 | int result,rresult; 18 | signed short samples[NSAMP]; 19 | 20 | srandom(time(NULL)); 21 | 22 | for(i=0;i 6 | #include "fec.h" 7 | 8 | int peakval_port(signed short *b,int cnt); 9 | #ifdef __i386__ 10 | int peakval_mmx(signed short *b,int cnt); 11 | int peakval_sse(signed short *b,int cnt); 12 | int peakval_sse2(signed short *b,int cnt); 13 | #endif 14 | 15 | #ifdef __VEC__ 16 | int peakval_av(signed short *b,int cnt); 17 | #endif 18 | 19 | int peakval(signed short *b,int cnt){ 20 | find_cpu_mode(); 21 | 22 | switch(Cpu_mode){ 23 | case PORT: 24 | default: 25 | return peakval_port(b,cnt); 26 | #ifdef __i386__ 27 | case MMX: 28 | return peakval_mmx(b,cnt); 29 | case SSE: 30 | return peakval_sse(b,cnt); 31 | case SSE2: 32 | return peakval_sse2(b,cnt); 33 | #endif 34 | #ifdef __VEC__ 35 | case ALTIVEC: 36 | return peakval_av(b,cnt); 37 | #endif 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_av.c: -------------------------------------------------------------------------------- 1 | /* Return the largest absolute value of a vector of signed shorts 2 | 3 | * This is the Altivec SIMD version. 4 | 5 | * Copyright 2004 Phil Karn, KA9Q 6 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 7 | */ 8 | 9 | #include "fec.h" 10 | 11 | signed short peakval_av(signed short *in,int cnt){ 12 | vector signed short x; 13 | int pad; 14 | union { vector signed char cv; vector signed short hv; signed short s[8]; signed char c[16];} s; 15 | vector signed short smallest,largest; 16 | 17 | smallest = (vector signed short)(0); 18 | largest = (vector signed short)(0); 19 | if((pad = (int)in & 15)!=0){ 20 | /* Load unaligned leading word */ 21 | x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in)); 22 | if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */ 23 | s.c[15] = (8-cnt)<<4; 24 | x = vec_sro(x,s.cv); 25 | } 26 | smallest = vec_min(smallest,x); 27 | largest = vec_max(largest,x); 28 | in += 8-pad/2; 29 | cnt -= 8-pad/2; 30 | } 31 | /* Everything is now aligned, rip through most of the block */ 32 | while(cnt >= 8){ 33 | x = vec_ld(0,in); 34 | smallest = vec_min(smallest,x); 35 | largest = vec_max(largest,x); 36 | in += 8; 37 | cnt -= 8; 38 | } 39 | /* Handle trailing fragment, if any */ 40 | if(cnt > 0){ 41 | x = vec_ld(0,in); 42 | s.c[15] = (8-cnt)<<4; 43 | x = vec_sro(x,s.cv); 44 | smallest = vec_min(smallest,x); 45 | largest = vec_max(largest,x); 46 | } 47 | /* Combine and extract result */ 48 | largest = vec_max(largest,vec_abs(smallest)); 49 | 50 | s.c[15] = 64; /* Shift right four 16-bit words */ 51 | largest = vec_max(largest,vec_sro(largest,s.cv)); 52 | 53 | s.c[15] = 32; /* Shift right two 16-bit words */ 54 | largest = vec_max(largest,vec_sro(largest,s.cv)); 55 | 56 | s.c[15] = 16; /* Shift right one 16-bit word */ 57 | largest = vec_max(largest,vec_sro(largest,s.cv)); 58 | 59 | s.hv = largest; 60 | return s.s[7]; 61 | } 62 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_mmx.c: -------------------------------------------------------------------------------- 1 | /* Wrapper for the MMX version of peakval 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | 5 | #include 6 | 7 | int peakval_mmx_assist(signed short *,int); 8 | 9 | int peakval_mmx(signed short *b,int cnt){ 10 | int peak = 0; 11 | int a; 12 | 13 | while(((int)b & 7) != 0 && cnt != 0){ 14 | a = abs(*b); 15 | if(a > peak) 16 | peak = a; 17 | b++; 18 | cnt--; 19 | } 20 | a = peakval_mmx_assist(b,cnt); 21 | if(a > peak) 22 | peak = a; 23 | b += cnt & ~3; 24 | cnt &= 3; 25 | 26 | while(cnt != 0){ 27 | a = abs(*b); 28 | if(a > peak) 29 | peak = a; 30 | b++; 31 | cnt--; 32 | } 33 | return peak; 34 | } 35 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_mmx_assist.s: -------------------------------------------------------------------------------- 1 | # MMX assist routines for peakval 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | 5 | .text 6 | 7 | # Find peak value in signed 16-bit input samples 8 | # int peakval_mmx_assist(signed short *in,int cnt); 9 | .global peakval_mmx_assist 10 | .type peakval_mmx_assist,@function 11 | .align 16 12 | peakval_mmx_assist: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | pushl %ebx 18 | 19 | movl 8(%ebp),%esi 20 | movl 12(%ebp),%ecx 21 | 22 | pxor %mm7,%mm7 # clear peak 23 | 24 | 1: subl $4,%ecx 25 | jl 2f 26 | movq (%esi),%mm0 27 | movq %mm0,%mm1 28 | psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive 29 | pxor %mm1,%mm0 # complement negatives 30 | psubw %mm1,%mm0 # add 1 to negatives 31 | movq %mm7,%mm6 # copy previous peak 32 | pcmpgtw %mm0,%mm6 # ff == old peak greater 33 | pand %mm6,%mm7 # select old peaks that are greater 34 | pandn %mm0,%mm6 # select new values that are greater 35 | por %mm6,%mm7 36 | 37 | addl $8,%esi 38 | jmp 1b 39 | 40 | 2: movd %mm7,%eax 41 | psrlq $16,%mm7 42 | andl $0xffff,%eax 43 | 44 | movd %mm7,%edx 45 | psrlq $16,%mm7 46 | andl $0xffff,%edx 47 | cmpl %edx,%eax 48 | jnl 3f 49 | movl %edx,%eax 50 | 3: 51 | movd %mm7,%edx 52 | psrlq $16,%mm7 53 | andl $0xffff,%edx 54 | cmpl %edx,%eax 55 | jnl 4f 56 | movl %edx,%eax 57 | 4: 58 | movd %mm7,%edx 59 | andl $0xffff,%edx 60 | cmpl %edx,%eax 61 | jnl 5f 62 | movl %edx,%eax 63 | 5: 64 | emms 65 | popl %ebx 66 | popl %ecx 67 | popl %esi 68 | popl %ebp 69 | ret 70 | 71 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_port.c: -------------------------------------------------------------------------------- 1 | /* Portable C version of peakval 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include "fec.h" 6 | int peakval_port(signed short *b,int len){ 7 | int peak = 0; 8 | int a,i; 9 | 10 | for(i=0;i peak) 13 | peak = a; 14 | } 15 | return peak; 16 | } 17 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_sse.c: -------------------------------------------------------------------------------- 1 | /* IA-32 SSE version of peakval 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | 5 | #include 6 | #include "fec.h" 7 | 8 | int peakval_sse_assist(signed short *,int); 9 | 10 | int peakval_sse(signed short *b,int cnt){ 11 | int peak = 0; 12 | int a; 13 | 14 | while(((int)b & 7) != 0 && cnt != 0){ 15 | a = abs(*b); 16 | if(a > peak) 17 | peak = a; 18 | b++; 19 | cnt--; 20 | } 21 | a = peakval_sse_assist(b,cnt); 22 | if(a > peak) 23 | peak = a; 24 | b += cnt & ~3; 25 | cnt &= 3; 26 | 27 | while(cnt != 0){ 28 | a = abs(*b); 29 | if(a > peak) 30 | peak = a; 31 | b++; 32 | cnt--; 33 | } 34 | return peak; 35 | } 36 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_sse2.c: -------------------------------------------------------------------------------- 1 | /* Portable C version of peakval 2 | * Copyright 2004 Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include "fec.h" 6 | 7 | int peakval_sse2_assist(signed short *,int); 8 | 9 | int peakval_sse2(signed short *b,int cnt){ 10 | int peak = 0; 11 | int a; 12 | 13 | while(((int)b & 15) != 0 && cnt != 0){ 14 | a = abs(*b); 15 | if(a > peak) 16 | peak = a; 17 | b++; 18 | cnt--; 19 | } 20 | a = peakval_sse2_assist(b,cnt); 21 | if(a > peak) 22 | peak = a; 23 | b += cnt & ~7; 24 | cnt &= 7; 25 | 26 | while(cnt != 0){ 27 | a = abs(*b); 28 | if(a > peak) 29 | peak = a; 30 | b++; 31 | cnt--; 32 | } 33 | return peak; 34 | } 35 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_sse2_assist.s: -------------------------------------------------------------------------------- 1 | # SSE2 assist routines for peakval 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | 5 | .text 6 | 7 | # Find peak absolute value in signed 16-bit input samples 8 | # int peakval_sse2_assist(signed short *in,int cnt); 9 | .global peakval_sse2_assist 10 | .type peakval_sse2_assist,@function 11 | .align 16 12 | peakval_sse2_assist: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | 18 | movl 8(%ebp),%esi 19 | movl 12(%ebp),%ecx 20 | 21 | pxor %xmm7,%xmm7 # clear peak 22 | 23 | 1: subl $8,%ecx 24 | jl 2f 25 | movaps (%esi),%xmm0 26 | movaps %xmm0,%xmm1 27 | psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive 28 | pxor %xmm1,%xmm0 # complement negatives 29 | psubw %xmm1,%xmm0 # add 1 to negatives 30 | pmaxsw %xmm0,%xmm7 # store peak 31 | 32 | addl $16,%esi 33 | jmp 1b 34 | 35 | 2: movaps %xmm7,%xmm0 36 | psrldq $8,%xmm0 37 | pmaxsw %xmm0,%xmm7 38 | movaps %xmm7,%xmm0 39 | psrlq $32,%xmm0 40 | pmaxsw %xmm0,%xmm7 41 | movaps %xmm7,%xmm0 42 | psrlq $16,%xmm0 43 | pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7 44 | 45 | movd %xmm7,%eax 46 | andl $0xffff,%eax 47 | 48 | popl %ecx 49 | popl %esi 50 | popl %ebp 51 | ret 52 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/peakval_sse_assist.s: -------------------------------------------------------------------------------- 1 | # SSE assist routines for peakval 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | 5 | .text 6 | 7 | # Find peak absolute value in signed 16-bit input samples 8 | # int peakval_sse_assist(signed short *in,int cnt); 9 | .global peakval_sse_assist 10 | .type peakval_sse_assist,@function 11 | .align 16 12 | peakval_sse_assist: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | 18 | movl 8(%ebp),%esi 19 | movl 12(%ebp),%ecx 20 | 21 | pxor %mm7,%mm7 # clear peak 22 | 23 | 1: subl $4,%ecx 24 | jl 2f 25 | movq (%esi),%mm0 26 | movq %mm0,%mm1 27 | psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive 28 | pxor %mm1,%mm0 # complement negatives 29 | psubw %mm1,%mm0 # add 1 to negatives 30 | pmaxsw %mm0,%mm7 # store peak 31 | 32 | addl $8,%esi 33 | jmp 1b 34 | 35 | 2: movq %mm7,%mm0 36 | psrlq $32,%mm0 37 | pmaxsw %mm0,%mm7 38 | movq %mm7,%mm0 39 | psrlq $16,%mm0 40 | pmaxsw %mm0,%mm7 # min value in low word of %mm7 41 | 42 | movd %mm7,%eax 43 | andl $0xffff,%eax 44 | 45 | emms 46 | popl %ecx 47 | popl %esi 48 | popl %ebp 49 | ret 50 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/rs-common.h: -------------------------------------------------------------------------------- 1 | /* Stuff common to all the general-purpose Reed-Solomon codecs 2 | * Copyright 2004 Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | 6 | /* Reed-Solomon codec control block */ 7 | struct rs { 8 | int mm; /* Bits per symbol */ 9 | int nn; /* Symbols per block (= (1<= rs->nn) { 22 | x -= rs->nn; 23 | x = (x >> rs->mm) + (x & rs->nn); 24 | } 25 | return x; 26 | } 27 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/rs_speedtest.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "fec.h" 8 | 9 | int main(){ 10 | unsigned char block[255]; 11 | int i; 12 | void *rs; 13 | struct rusage start,finish; 14 | double extime; 15 | int trials = 10000; 16 | 17 | for(i=0;i<223;i++) 18 | block[i] = 0x01; 19 | 20 | rs = init_rs_char(8,0x187,112,11,32,0); 21 | encode_rs_char(rs,block,&block[223]); 22 | 23 | getrusage(RUSAGE_SELF,&start); 24 | for(i=0;i 2 | #include 3 | #include "fec.h" 4 | 5 | #define MAX_RANDOM 0x7fffffff 6 | 7 | /* Generate gaussian random double with specified mean and std_dev */ 8 | double normal_rand(double mean, double std_dev) 9 | { 10 | double fac,rsq,v1,v2; 11 | static double gset; 12 | static int iset; 13 | 14 | if(iset){ 15 | /* Already got one */ 16 | iset = 0; 17 | return mean + std_dev*gset; 18 | } 19 | /* Generate two evenly distributed numbers between -1 and +1 20 | * that are inside the unit circle 21 | */ 22 | do { 23 | v1 = 2.0 * (double)random() / MAX_RANDOM - 1; 24 | v2 = 2.0 * (double)random() / MAX_RANDOM - 1; 25 | rsq = v1*v1 + v2*v2; 26 | } while(rsq >= 1.0 || rsq == 0.0); 27 | fac = sqrt(-2.0*log(rsq)/rsq); 28 | gset = v1*fac; 29 | iset++; 30 | return mean + std_dev*v2*fac; 31 | } 32 | 33 | unsigned char addnoise(int sym,double amp,double gain,double offset,int clip){ 34 | int sample; 35 | 36 | sample = offset + gain*normal_rand(sym?amp:-amp,1.0); 37 | /* Clip to 8-bit offset range */ 38 | if(sample < 0) 39 | sample = 0; 40 | else if(sample > clip) 41 | sample = clip; 42 | return sample; 43 | } 44 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/sqtest.c: -------------------------------------------------------------------------------- 1 | /* Verify correctness of the sum-of-square routines */ 2 | #include 3 | #include 4 | #include 5 | 6 | /* These values should trigger leading/trailing array fragment handling */ 7 | #define NSAMP 200002 8 | #define OFFSET 1 9 | 10 | long long sumsq_wq(signed short *in,int cnt); 11 | long long sumsq_wq_ref(signed short *in,int cnt); 12 | 13 | int main(){ 14 | int i; 15 | long long result,rresult; 16 | signed short samples[NSAMP]; 17 | 18 | srandom(time(NULL)); 19 | 20 | for(i=0;i old metrics 35 | movl NEWMETRICS(%edx),%edi # edi -> new metrics 36 | movl DP(%edx),%edx # edx -> decisions 37 | 38 | 1: movl 16(%ebp),%eax # eax = nbits 39 | decl %eax 40 | jl 2f # passed zero, we're done 41 | movl %eax,16(%ebp) 42 | 43 | xorl %eax,%eax 44 | movl 12(%ebp),%ebx # %ebx = syms 45 | movb (%ebx),%al 46 | movd %eax,%mm6 # mm6[0] = first symbol 47 | movb 1(%ebx),%al 48 | movd %eax,%mm5 # mm5[0] = second symbol 49 | addl $2,%ebx 50 | movl %ebx,12(%ebp) 51 | 52 | punpcklbw %mm6,%mm6 # mm6[1] = mm6[0] 53 | punpcklbw %mm5,%mm5 54 | movq thirtyones,%mm7 55 | 56 | pshufw $0,%mm6,%mm6 # copy low word to upper 3 57 | pshufw $0,%mm5,%mm5 58 | # mm6 now contains first symbol in each byte, mm5 the second 59 | 60 | # each invocation of this macro does 8 butterflies in parallel 61 | .MACRO butterfly GROUP 62 | # compute branch metrics 63 | movq Branchtab27_sse+(8*\GROUP),%mm4 64 | movq Branchtab27_sse+32+(8*\GROUP),%mm3 65 | pxor %mm6,%mm4 66 | pxor %mm5,%mm3 67 | pavgb %mm3,%mm4 # mm4 contains branch metrics 68 | psrlw $3,%mm4 69 | pand %mm7,%mm4 70 | 71 | movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0 72 | movq ((8*\GROUP)+32)(%esi),%mm3 # Incoming path metric, high bit = 1 73 | movq %mm0,%mm2 74 | movq %mm3,%mm1 75 | paddusb %mm4,%mm0 76 | paddusb %mm4,%mm3 77 | 78 | # invert branch metrics. This works only because they're 5 bits 79 | pxor %mm7,%mm4 80 | 81 | paddusb %mm4,%mm1 82 | paddusb %mm4,%mm2 83 | 84 | # Find survivors, leave in mm0,2 85 | pminub %mm1,%mm0 86 | pminub %mm3,%mm2 87 | # get decisions, leave in mm1,3 88 | pcmpeqb %mm0,%mm1 89 | pcmpeqb %mm2,%mm3 90 | 91 | # interleave and store new branch metrics in mm0,2 92 | movq %mm0,%mm4 93 | punpckhbw %mm2,%mm0 # interleave second 8 new metrics 94 | punpcklbw %mm2,%mm4 # interleave first 8 new metrics 95 | movq %mm0,(16*\GROUP+8)(%edi) 96 | movq %mm4,(16*\GROUP)(%edi) 97 | 98 | # interleave decisions, accumulate into %ebx 99 | movq %mm1,%mm4 100 | punpckhbw %mm3,%mm1 101 | punpcklbw %mm3,%mm4 102 | # Due to an error in the Intel instruction set ref (the register 103 | # fields are swapped), gas assembles pmovmskb incorrectly 104 | # See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html 105 | .byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax 106 | shll $((16*\GROUP+8)&31),%eax 107 | orl %eax,%ebx 108 | .byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax 109 | shll $((16*\GROUP)&31),%eax 110 | orl %eax,%ebx 111 | .endm 112 | 113 | # invoke macro 4 times for a total of 32 butterflies 114 | xorl %ebx,%ebx # clear decisions 115 | butterfly GROUP=0 116 | butterfly GROUP=1 117 | movl %ebx,(%edx) # stash first 32 decisions 118 | xorl %ebx,%ebx 119 | butterfly GROUP=2 120 | butterfly GROUP=3 121 | movl %ebx,4(%edx) # stash second 32 decisions 122 | 123 | addl $8,%edx # bump decision pointer 124 | 125 | # see if we have to normalize 126 | movl (%edi),%eax # extract first output metric 127 | andl $255,%eax 128 | cmpl $150,%eax # is it greater than 150? 129 | movl $0,%eax 130 | jle done # No, no need to normalize 131 | 132 | # Normalize by finding smallest metric and subtracting it 133 | # from all metrics 134 | movq (%edi),%mm0 135 | pminub 8(%edi),%mm0 136 | pminub 16(%edi),%mm0 137 | pminub 24(%edi),%mm0 138 | pminub 32(%edi),%mm0 139 | pminub 40(%edi),%mm0 140 | pminub 48(%edi),%mm0 141 | pminub 56(%edi),%mm0 142 | # mm0 contains 8 smallest metrics 143 | # crunch down to single lowest metric 144 | movq %mm0,%mm1 145 | psrlq $32,%mm0 146 | pminub %mm1,%mm0 147 | movq %mm0,%mm1 148 | psrlq $16,%mm0 149 | pminub %mm1,%mm0 150 | movq %mm0,%mm1 151 | psrlq $8,%mm0 152 | pminub %mm1,%mm0 153 | punpcklbw %mm0,%mm0 # expand to all 8 bytes 154 | pshufw $0,%mm0,%mm0 155 | 156 | # mm0 now contains lowest metric in all 8 bytes 157 | # subtract it from every output metric 158 | # Trashes %mm7 159 | .macro PSUBUSBM REG,MEM 160 | movq \MEM,%mm7 161 | psubusb \REG,%mm7 162 | movq %mm7,\MEM 163 | .endm 164 | 165 | PSUBUSBM %mm0,(%edi) 166 | PSUBUSBM %mm0,8(%edi) 167 | PSUBUSBM %mm0,16(%edi) 168 | PSUBUSBM %mm0,24(%edi) 169 | PSUBUSBM %mm0,32(%edi) 170 | PSUBUSBM %mm0,40(%edi) 171 | PSUBUSBM %mm0,48(%edi) 172 | PSUBUSBM %mm0,56(%edi) 173 | 174 | movd %mm0,%eax 175 | and $0xff,%eax 176 | 177 | done: # swap metrics 178 | movl %esi,%eax 179 | movl %edi,%esi 180 | movl %eax,%edi 181 | jmp 1b 182 | 183 | 2: emms 184 | movl 8(%ebp),%ebx # ebx = vp 185 | # stash metric pointers 186 | movl %esi,OLDMETRICS(%ebx) 187 | movl %edi,NEWMETRICS(%ebx) 188 | movl %edx,DP(%ebx) # stash incremented value of vp->dp 189 | xorl %eax,%eax 190 | err: popl %ebx 191 | popl %edx 192 | popl %edi 193 | popl %esi 194 | popl %ebp 195 | 196 | ret 197 | 198 | .data 199 | 200 | .align 16 201 | thirtyones: 202 | .byte 31,31,31,31,31,31,31,31 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/sumsq.c: -------------------------------------------------------------------------------- 1 | /* Compute the sum of the squares of a vector of signed shorts 2 | 3 | * Copyright 2004 Phil Karn, KA9Q 4 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 | */ 6 | 7 | #include 8 | #include "fec.h" 9 | 10 | unsigned long long sumsq_port(signed short *,int); 11 | 12 | #ifdef __i386__ 13 | unsigned long long sumsq_mmx(signed short *,int); 14 | unsigned long long sumsq_sse(signed short *,int); 15 | unsigned long long sumsq_sse2(signed short *,int); 16 | #endif 17 | 18 | #ifdef __VEC__ 19 | unsigned long long sumsq_av(signed short *,int); 20 | #endif 21 | 22 | unsigned long long sumsq(signed short *in,int cnt){ 23 | switch(Cpu_mode){ 24 | case PORT: 25 | default: 26 | return sumsq_port(in,cnt); 27 | #ifdef __i386__ 28 | case SSE: 29 | case MMX: 30 | return sumsq_mmx(in,cnt); 31 | case SSE2: 32 | return sumsq_sse2(in,cnt); 33 | #endif 34 | 35 | #ifdef __VEC__ 36 | case ALTIVEC: 37 | return sumsq_av(in,cnt); 38 | #endif 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/sumsq_av.c: -------------------------------------------------------------------------------- 1 | /* Compute the sum of the squares of a vector of signed shorts 2 | 3 | * This is the Altivec SIMD version. It's a little hairy because Altivec 4 | * does not do 64-bit operations directly, so we have to accumulate separate 5 | * 32-bit sums and carries 6 | 7 | * Copyright 2004 Phil Karn, KA9Q 8 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 9 | */ 10 | 11 | #include "fec.h" 12 | 13 | unsigned long long sumsq_av(signed short *in,int cnt){ 14 | long long sum; 15 | vector signed short x; 16 | vector unsigned int sums,carries,s1,s2; 17 | int pad; 18 | union { vector unsigned char cv; vector unsigned int iv; unsigned int w[4]; unsigned char c[16];} s; 19 | 20 | carries = sums = (vector unsigned int)(0); 21 | if((pad = (int)in & 15)!=0){ 22 | /* Load unaligned leading word */ 23 | x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in)); 24 | if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */ 25 | s.c[15] = (8-cnt)<<4; 26 | x = vec_sro(x,s.cv); 27 | } 28 | sums = (vector unsigned int)vec_msum(x,x,(vector signed int)(0)); 29 | in += 8-pad/2; 30 | cnt -= 8-pad/2; 31 | } 32 | /* Everything is now aligned, rip through most of the block */ 33 | while(cnt >= 8){ 34 | x = vec_ld(0,in); 35 | /* A single vec_msum cannot overflow, but we have to sum it with 36 | * the earlier terms separately to handle the carries 37 | * The cast to unsigned is OK because squares are always positive 38 | */ 39 | s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0)); 40 | carries = vec_add(carries,vec_addc(sums,s1)); 41 | sums = vec_add(sums,s1); 42 | in += 8; 43 | cnt -= 8; 44 | } 45 | /* Handle trailing fragment, if any */ 46 | if(cnt > 0){ 47 | x = vec_ld(0,in); 48 | s.c[15] = (8-cnt)<<4; 49 | x = vec_sro(x,s.cv); 50 | s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0)); 51 | carries = vec_add(carries,vec_addc(sums,s1)); 52 | sums = vec_add(sums,s1); 53 | } 54 | /* Combine 4 sub-sums and carries */ 55 | s.c[15] = 64; /* Shift right two 32-bit words */ 56 | s1 = vec_sro(sums,s.cv); 57 | s2 = vec_sro(carries,s.cv); 58 | carries = vec_add(carries,vec_addc(sums,s1)); 59 | sums = vec_add(sums,s1); 60 | carries = vec_add(carries,s2); 61 | 62 | s.c[15] = 32; /* Shift right one 32-bit word */ 63 | s1 = vec_sro(sums,s.cv); 64 | s2 = vec_sro(carries,s.cv); 65 | carries = vec_add(carries,vec_addc(sums,s1)); 66 | sums = vec_add(sums,s1); 67 | carries = vec_add(carries,s2); 68 | 69 | /* Extract sum and carries from right-hand words and combine into result */ 70 | s.iv = sums; 71 | sum = s.w[3]; 72 | 73 | s.iv = carries; 74 | sum += (long long)s.w[3] << 32; 75 | 76 | return sum; 77 | } 78 | 79 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/sumsq_mmx.c: -------------------------------------------------------------------------------- 1 | /* Compute the sum of the squares of a vector of signed shorts 2 | 3 | * MMX-assisted version (also used on SSE) 4 | 5 | * The SSE2 and MMX assist routines both operate on multiples of 6 | * 8 words; they differ only in their alignment requirements (8 bytes 7 | * for MMX, 16 bytes for SSE2) 8 | 9 | * Copyright 2004 Phil Karn, KA9Q 10 | * May be used under the terms of the GNU Lesser Public License (LGPL) 11 | */ 12 | 13 | long long sumsq_mmx_assist(signed short *,int); 14 | 15 | long long sumsq_mmx(signed short *in,int cnt){ 16 | long long sum = 0; 17 | 18 | /* Handle stuff before the next 8-byte boundary */ 19 | while(((int)in & 7) != 0 && cnt != 0){ 20 | sum += (long)in[0] * in[0]; 21 | in++; 22 | cnt--; 23 | } 24 | sum += sumsq_mmx_assist(in,cnt); 25 | in += cnt & ~7; 26 | cnt &= 7; 27 | 28 | /* Handle up to 7 words at end */ 29 | while(cnt != 0){ 30 | sum += (long)in[0] * in[0]; 31 | in++; 32 | cnt--; 33 | } 34 | return sum; 35 | } 36 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/sumsq_mmx_assist.s: -------------------------------------------------------------------------------- 1 | # MMX assist routines for sumsq 2 | # Copyright 2001 Phil Karn, KA9Q 3 | # May be used under the terms of the GNU Public License (GPL) 4 | 5 | .text 6 | 7 | # Evaluate sum of squares of signed 16-bit input samples 8 | # long long sumsq_mmx_assist(signed short *in,int cnt); 9 | .global sumsq_mmx_assist 10 | .type sumsq_mmx_assist,@function 11 | .align 16 12 | sumsq_mmx_assist: 13 | pushl %ebp 14 | movl %esp,%ebp 15 | pushl %esi 16 | pushl %ecx 17 | pushl %ebx 18 | 19 | movl 8(%ebp),%esi 20 | movl 12(%ebp),%ecx 21 | xor %eax,%eax 22 | xor %edx,%edx 23 | 24 | # Since 4 * 32767**2 < 2**32, we can accumulate two at a time 25 | 1: subl $8,%ecx 26 | jl 2f 27 | movq (%esi),%mm0 # S0 S1 S2 S3 28 | pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2) 29 | movq 8(%esi),%mm6 # S4 S5 S6 S7 30 | pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2) 31 | paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2) 32 | movd %mm0,%ebx 33 | addl %ebx,%eax 34 | adcl $0,%edx 35 | psrlq $32,%mm0 36 | movd %mm0,%ebx 37 | addl %ebx,%eax 38 | adcl $0,%edx 39 | addl $16,%esi 40 | jmp 1b 41 | 42 | 2: emms 43 | popl %ebx 44 | popl %ecx 45 | popl %esi 46 | popl %ebp 47 | ret 48 | 49 | # Evaluate sum of squares of signed 16-bit input samples 50 | # long sumsq_wd_mmx_assist(signed short *in,int cnt); 51 | # Quick version, only safe for small numbers of small input values... 52 | .global sumsq_wd_mmx_assist 53 | .type sumsq_wd_mmx_assist,@function 54 | .align 16 55 | sumsq_wd_mmx_assist: 56 | pushl %ebp 57 | movl %esp,%ebp 58 | pushl %esi 59 | 60 | movl 8(%ebp),%esi 61 | movl 12(%ebp),%ecx 62 | pxor %mm2,%mm2 # zero sum 63 | 64 | 1: subl $8,%ecx 65 | jl 2f 66 | movq (%esi),%mm0 # S0 S1 S2 S3 67 | pmaddwd %mm0,%mm0 # (S0*S0+S1*S1) (S2*S2+S3*S3) 68 | movq 8(%esi),%mm1 69 | pmaddwd %mm1,%mm1 70 | paddd %mm1,%mm2 71 | paddd %mm0,%mm2 # accumulate 72 | 73 | addl $16,%esi 74 | jmp 1b 75 | 76 | 2: movd %mm2,%eax # even sum 77 | psrlq $32,%mm2 78 | movd %mm2,%edx # odd sum 79 | addl %edx,%eax 80 | emms 81 | popl %esi 82 | popl %ebp 83 | ret 84 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/sumsq_port.c: -------------------------------------------------------------------------------- 1 | /* Compute the sum of the squares of a vector of signed shorts 2 | 3 | * Portable C version 4 | * Copyright 2004 Phil Karn, KA9Q 5 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 6 | */ 7 | 8 | unsigned long long sumsq_port(signed short *in,int cnt){ 9 | long long sum = 0; 10 | int i; 11 | 12 | for(i=0;i 2 | #include 3 | #include 4 | #include 5 | #include "config.h" 6 | #ifdef HAVE_GETOPT_H 7 | #include 8 | #endif 9 | #include "fec.h" 10 | 11 | #if HAVE_GETOPT_LONG 12 | struct option Options[] = { 13 | {"frame-length",1,NULL,'l'}, 14 | {"frame-count",1,NULL,'n'}, 15 | {"verbose",0,NULL,'v'}, 16 | {"force-altivec",0,NULL,'a'}, 17 | {"force-port",0,NULL,'p'}, 18 | {"force-mmx",0,NULL,'m'}, 19 | {"force-sse",0,NULL,'s'}, 20 | {"force-sse2",0,NULL,'t'}, 21 | {NULL}, 22 | }; 23 | #endif 24 | 25 | int Verbose = 0; 26 | 27 | int main(int argc,char *argv[]){ 28 | signed short *buf; 29 | int i,d,trial,trials=10000; 30 | int bufsize = 2048; 31 | long long port_sum,simd_sum; 32 | time_t t; 33 | int timetrials=0; 34 | 35 | find_cpu_mode(); 36 | time(&t); 37 | srandom(t); 38 | 39 | #if HAVE_GETOPT_LONG 40 | while((d = getopt_long(argc,argv,"vapmstl:n:T",Options,NULL)) != EOF){ 41 | #else 42 | while((d = getopt(argc,argv,"vapmstl:n:T")) != EOF){ 43 | #endif 44 | switch(d){ 45 | case 'a': 46 | Cpu_mode = ALTIVEC; 47 | break; 48 | case 'p': 49 | Cpu_mode = PORT; 50 | break; 51 | case 'm': 52 | Cpu_mode = MMX; 53 | break; 54 | case 's': 55 | Cpu_mode = SSE; 56 | break; 57 | case 't': 58 | Cpu_mode = SSE2; 59 | break; 60 | case 'l': 61 | bufsize = atoi(optarg); 62 | break; 63 | case 'n': 64 | trials = atoi(optarg); 65 | break; 66 | case 'v': 67 | Verbose++; 68 | break; 69 | case 'T': 70 | timetrials++; 71 | break; 72 | } 73 | } 74 | 75 | buf = (signed short *)calloc(bufsize,sizeof(signed short)); 76 | if(timetrials){ 77 | for(trial=0;trial 5 | #include 6 | #include 7 | #include "fec.h" 8 | 9 | /* Create a new instance of a Viterbi decoder */ 10 | void *create_viterbi27(int len){ 11 | find_cpu_mode(); 12 | 13 | switch(Cpu_mode){ 14 | case PORT: 15 | default: 16 | return create_viterbi27_port(len); 17 | #ifdef __VEC__ 18 | case ALTIVEC: 19 | return create_viterbi27_av(len); 20 | #endif 21 | #ifdef __i386__ 22 | case MMX: 23 | return create_viterbi27_mmx(len); 24 | case SSE: 25 | return create_viterbi27_sse(len); 26 | case SSE2: 27 | return create_viterbi27_sse2(len); 28 | #endif 29 | } 30 | } 31 | 32 | void set_viterbi27_polynomial(int polys[2]){ 33 | switch(Cpu_mode){ 34 | case PORT: 35 | default: 36 | set_viterbi27_polynomial_port(polys); 37 | break; 38 | #ifdef __VEC__ 39 | case ALTIVEC: 40 | set_viterbi27_polynomial_av(polys); 41 | break; 42 | #endif 43 | #ifdef __i386__ 44 | case MMX: 45 | set_viterbi27_polynomial_mmx(polys); 46 | break; 47 | case SSE: 48 | set_viterbi27_polynomial_sse(polys); 49 | break; 50 | case SSE2: 51 | set_viterbi27_polynomial_sse2(polys); 52 | break; 53 | #endif 54 | } 55 | } 56 | 57 | /* Initialize Viterbi decoder for start of new frame */ 58 | int init_viterbi27(void *p,int starting_state){ 59 | switch(Cpu_mode){ 60 | case PORT: 61 | default: 62 | return init_viterbi27_port(p,starting_state); 63 | #ifdef __VEC__ 64 | case ALTIVEC: 65 | return init_viterbi27_av(p,starting_state); 66 | #endif 67 | #ifdef __i386__ 68 | case MMX: 69 | return init_viterbi27_mmx(p,starting_state); 70 | case SSE: 71 | return init_viterbi27_sse(p,starting_state); 72 | case SSE2: 73 | return init_viterbi27_sse2(p,starting_state); 74 | #endif 75 | } 76 | } 77 | 78 | /* Viterbi chainback */ 79 | int chainback_viterbi27( 80 | void *p, 81 | unsigned char *data, /* Decoded output data */ 82 | unsigned int nbits, /* Number of data bits */ 83 | unsigned int endstate){ /* Terminal encoder state */ 84 | 85 | switch(Cpu_mode){ 86 | case PORT: 87 | default: 88 | return chainback_viterbi27_port(p,data,nbits,endstate); 89 | #ifdef __VEC__ 90 | case ALTIVEC: 91 | return chainback_viterbi27_av(p,data,nbits,endstate); 92 | #endif 93 | #ifdef __i386__ 94 | case MMX: 95 | return chainback_viterbi27_mmx(p,data,nbits,endstate); 96 | case SSE: 97 | return chainback_viterbi27_sse(p,data,nbits,endstate); 98 | case SSE2: 99 | return chainback_viterbi27_sse2(p,data,nbits,endstate); 100 | #endif 101 | } 102 | } 103 | 104 | /* Delete instance of a Viterbi decoder */ 105 | void delete_viterbi27(void *p){ 106 | switch(Cpu_mode){ 107 | case PORT: 108 | default: 109 | delete_viterbi27_port(p); 110 | break; 111 | #ifdef __VEC__ 112 | case ALTIVEC: 113 | delete_viterbi27_av(p); 114 | break; 115 | #endif 116 | #ifdef __i386__ 117 | case MMX: 118 | delete_viterbi27_mmx(p); 119 | break; 120 | case SSE: 121 | delete_viterbi27_sse(p); 122 | break; 123 | case SSE2: 124 | delete_viterbi27_sse2(p); 125 | break; 126 | #endif 127 | } 128 | } 129 | 130 | /* Update decoder with a block of demodulated symbols 131 | * Note that nbits is the number of decoded data bits, not the number 132 | * of symbols! 133 | */ 134 | int update_viterbi27_blk(void *p,unsigned char syms[],int nbits){ 135 | if(p == NULL) 136 | return -1; 137 | 138 | switch(Cpu_mode){ 139 | case PORT: 140 | default: 141 | update_viterbi27_blk_port(p,syms,nbits); 142 | break; 143 | #ifdef __VEC__ 144 | case ALTIVEC: 145 | update_viterbi27_blk_av(p,syms,nbits); 146 | break; 147 | #endif 148 | #ifdef __i386__ 149 | case MMX: 150 | update_viterbi27_blk_mmx(p,syms,nbits); 151 | break; 152 | case SSE: 153 | update_viterbi27_blk_sse(p,syms,nbits); 154 | break; 155 | case SSE2: 156 | update_viterbi27_blk_sse2(p,syms,nbits); 157 | break; 158 | #endif 159 | } 160 | return 0; 161 | } 162 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi27_mmx.c: -------------------------------------------------------------------------------- 1 | /* K=7 r=1/2 Viterbi decoder for MMX 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "fec.h" 9 | 10 | typedef union { char c[64]; __m64 v[8];} decision_t; 11 | typedef union { unsigned char c[64]; __m64 v[8];} metric_t; 12 | 13 | unsigned char Mettab27_1[256][32] __attribute__ ((aligned(16))); 14 | unsigned char Mettab27_2[256][32] __attribute__ ((aligned(16))); 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder 18 | * Don't change this without also changing references in mmxbfly27.s! 19 | */ 20 | struct v27 { 21 | metric_t metrics1; /* path metric buffer 1 */ 22 | metric_t metrics2; /* path metric buffer 2 */ 23 | decision_t *dp; /* Pointer to current decision */ 24 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 25 | decision_t *decisions; /* Beginning of decisions for block */ 26 | }; 27 | 28 | /* Initialize Viterbi decoder for start of new frame */ 29 | int init_viterbi27_mmx(void *p,int starting_state){ 30 | struct v27 *vp = (struct v27 *)p; 31 | int i; 32 | 33 | if(p == NULL) 34 | return -1; 35 | for(i=0;i<64;i++) 36 | vp->metrics1.c[i] = 63; 37 | 38 | vp->old_metrics = &vp->metrics1; 39 | vp->new_metrics = &vp->metrics2; 40 | vp->dp = vp->decisions; 41 | vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ 42 | return 0; 43 | } 44 | 45 | void set_viterbi27_polynomial_mmx(int polys[2]){ 46 | int state; 47 | 48 | for(state=0;state < 32;state++){ 49 | int symbol; 50 | for(symbol = 0;symbol < 256;symbol++){ 51 | int sym; 52 | 53 | sym = parity((2*state) & abs(polys[0])) ^ (polys[0] < 0); 54 | Mettab27_1[symbol][state] = (sym ? (255-symbol):symbol) / 16; 55 | 56 | sym = parity((2*state) & abs(polys[1])) ^ (polys[1] < 0); 57 | Mettab27_2[symbol][state] = (sym ? (255-symbol):symbol) / 16; 58 | } 59 | } 60 | Init++; 61 | } 62 | 63 | 64 | /* Create a new instance of a Viterbi decoder */ 65 | void *create_viterbi27_mmx(int len){ 66 | struct v27 *vp; 67 | int polys[2] = { V27POLYA, V27POLYB }; 68 | 69 | if(Init == 0){ 70 | set_viterbi27_polynomial_mmx(polys); 71 | } 72 | if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL) 73 | return NULL; 74 | 75 | if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){ 76 | free(vp); 77 | return NULL; 78 | } 79 | init_viterbi27_mmx(vp,0); 80 | return vp; 81 | } 82 | 83 | /* Viterbi chainback */ 84 | int chainback_viterbi27_mmx( 85 | void *p, 86 | unsigned char *data, /* Decoded output data */ 87 | unsigned int nbits, /* Number of data bits */ 88 | unsigned int endstate){ /* Terminal encoder state */ 89 | 90 | struct v27 *vp = (struct v27 *)p; 91 | decision_t *d; 92 | 93 | if(p == NULL) 94 | return -1; 95 | d = (decision_t *)vp->decisions; 96 | endstate &= 63; 97 | d += 6; /* Look past tail */ 98 | while(nbits-- != 0){ 99 | int k; 100 | 101 | k = d[nbits].c[endstate>>2] & 1; 102 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 103 | } 104 | return 0; 105 | } 106 | 107 | /* Delete instance of a Viterbi decoder */ 108 | void delete_viterbi27_mmx(void *p){ 109 | struct v27 *vp = p; 110 | 111 | if(vp != NULL){ 112 | free(vp->decisions); 113 | free(vp); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi27_port.c: -------------------------------------------------------------------------------- 1 | /* K=7 r=1/2 Viterbi decoder in portable C 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | 12 | typedef union { unsigned int w[64]; } metric_t; 13 | typedef union { unsigned long w[2];} decision_t; 14 | static union branchtab27 { unsigned char c[32]; } Branchtab27[2] __attribute__ ((aligned(16))); 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder 18 | * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! 19 | */ 20 | struct v27 { 21 | metric_t metrics1; /* path metric buffer 1 */ 22 | metric_t metrics2; /* path metric buffer 2 */ 23 | decision_t *dp; /* Pointer to current decision */ 24 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 25 | decision_t *decisions; /* Beginning of decisions for block */ 26 | }; 27 | 28 | /* Initialize Viterbi decoder for start of new frame */ 29 | int init_viterbi27_port(void *p,int starting_state){ 30 | struct v27 *vp = p; 31 | int i; 32 | 33 | if(p == NULL) 34 | return -1; 35 | for(i=0;i<64;i++) 36 | vp->metrics1.w[i] = 63; 37 | 38 | vp->old_metrics = &vp->metrics1; 39 | vp->new_metrics = &vp->metrics2; 40 | vp->dp = vp->decisions; 41 | vp->old_metrics->w[starting_state & 63] = 0; /* Bias known start state */ 42 | return 0; 43 | } 44 | 45 | void set_viterbi27_polynomial_port(int polys[2]){ 46 | int state; 47 | 48 | for(state=0;state < 32;state++){ 49 | Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 50 | Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 51 | } 52 | Init++; 53 | } 54 | 55 | /* Create a new instance of a Viterbi decoder */ 56 | void *create_viterbi27_port(int len){ 57 | struct v27 *vp; 58 | 59 | if(!Init){ 60 | int polys[2] = { V27POLYA, V27POLYB }; 61 | set_viterbi27_polynomial_port(polys); 62 | } 63 | if((vp = malloc(sizeof(struct v27))) == NULL) 64 | return NULL; 65 | if((vp->decisions = malloc((len+6)*sizeof(decision_t))) == NULL){ 66 | free(vp); 67 | return NULL; 68 | } 69 | init_viterbi27_port(vp,0); 70 | 71 | return vp; 72 | } 73 | 74 | /* Viterbi chainback */ 75 | int chainback_viterbi27_port( 76 | void *p, 77 | unsigned char *data, /* Decoded output data */ 78 | unsigned int nbits, /* Number of data bits */ 79 | unsigned int endstate){ /* Terminal encoder state */ 80 | struct v27 *vp = p; 81 | decision_t *d; 82 | 83 | if(p == NULL) 84 | return -1; 85 | d = vp->decisions; 86 | /* Make room beyond the end of the encoder register so we can 87 | * accumulate a full byte of decoded data 88 | */ 89 | endstate %= 64; 90 | endstate <<= 2; 91 | 92 | /* The store into data[] only needs to be done every 8 bits. 93 | * But this avoids a conditional branch, and the writes will 94 | * combine in the cache anyway 95 | */ 96 | d += 6; /* Look past tail */ 97 | while(nbits-- != 0){ 98 | int k; 99 | 100 | k = (d[nbits].w[(endstate>>2)/32] >> ((endstate>>2)%32)) & 1; 101 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 102 | } 103 | return 0; 104 | } 105 | 106 | /* Delete instance of a Viterbi decoder */ 107 | void delete_viterbi27_port(void *p){ 108 | struct v27 *vp = p; 109 | 110 | if(vp != NULL){ 111 | free(vp->decisions); 112 | free(vp); 113 | } 114 | } 115 | 116 | /* C-language butterfly */ 117 | #define BFLY(i) {\ 118 | unsigned int metric,m0,m1,decision;\ 119 | metric = (Branchtab27[0].c[i] ^ sym0) + (Branchtab27[1].c[i] ^ sym1);\ 120 | m0 = vp->old_metrics->w[i] + metric;\ 121 | m1 = vp->old_metrics->w[i+32] + (510 - metric);\ 122 | decision = (signed int)(m0-m1) > 0;\ 123 | vp->new_metrics->w[2*i] = decision ? m1 : m0;\ 124 | d->w[i/16] |= decision << ((2*i)&31);\ 125 | m0 -= (metric+metric-510);\ 126 | m1 += (metric+metric-510);\ 127 | decision = (signed int)(m0-m1) > 0;\ 128 | vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\ 129 | d->w[i/16] |= decision << ((2*i+1)&31);\ 130 | } 131 | 132 | /* Update decoder with a block of demodulated symbols 133 | * Note that nbits is the number of decoded data bits, not the number 134 | * of symbols! 135 | */ 136 | int update_viterbi27_blk_port(void *p,unsigned char *syms,int nbits){ 137 | struct v27 *vp = p; 138 | void *tmp; 139 | decision_t *d; 140 | 141 | if(p == NULL) 142 | return -1; 143 | d = (decision_t *)vp->dp; 144 | while(nbits--){ 145 | unsigned char sym0,sym1; 146 | 147 | d->w[0] = d->w[1] = 0; 148 | sym0 = *syms++; 149 | sym1 = *syms++; 150 | 151 | BFLY(0); 152 | BFLY(1); 153 | BFLY(2); 154 | BFLY(3); 155 | BFLY(4); 156 | BFLY(5); 157 | BFLY(6); 158 | BFLY(7); 159 | BFLY(8); 160 | BFLY(9); 161 | BFLY(10); 162 | BFLY(11); 163 | BFLY(12); 164 | BFLY(13); 165 | BFLY(14); 166 | BFLY(15); 167 | BFLY(16); 168 | BFLY(17); 169 | BFLY(18); 170 | BFLY(19); 171 | BFLY(20); 172 | BFLY(21); 173 | BFLY(22); 174 | BFLY(23); 175 | BFLY(24); 176 | BFLY(25); 177 | BFLY(26); 178 | BFLY(27); 179 | BFLY(28); 180 | BFLY(29); 181 | BFLY(30); 182 | BFLY(31); 183 | d++; 184 | /* Swap pointers to old and new metrics */ 185 | tmp = vp->old_metrics; 186 | vp->old_metrics = vp->new_metrics; 187 | vp->new_metrics = tmp; 188 | } 189 | vp->dp = d; 190 | return 0; 191 | } 192 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi27_sse.c: -------------------------------------------------------------------------------- 1 | /* K=7 r=1/2 Viterbi decoder for SSE 2 | * Feb 2004, Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "fec.h" 9 | 10 | typedef union { unsigned char c[64]; } metric_t; 11 | typedef union { unsigned long w[2]; unsigned char c[8]; __m64 v[1];} decision_t; 12 | union branchtab27 { unsigned char c[32]; __m64 v[4];} Branchtab27_sse[2]; 13 | static int Init = 0; 14 | 15 | /* State info for instance of Viterbi decoder 16 | * Don't change this without also changing references in ssebfly27.s! 17 | */ 18 | struct v27 { 19 | metric_t metrics1; /* path metric buffer 1 */ 20 | metric_t metrics2; /* path metric buffer 2 */ 21 | decision_t *dp; /* Pointer to current decision */ 22 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 23 | decision_t *decisions; /* Beginning of decisions for block */ 24 | }; 25 | 26 | /* Create a new instance of a Viterbi decoder */ 27 | void *create_viterbi27_sse(int len){ 28 | struct v27 *vp; 29 | 30 | if(!Init){ 31 | int polys[2] = { V27POLYA, V27POLYB }; 32 | 33 | set_viterbi27_polynomial_sse(polys); 34 | } 35 | if((vp = malloc(sizeof(struct v27))) == NULL) 36 | return NULL; 37 | if((vp->decisions = malloc((len+6)*sizeof(decision_t))) == NULL){ 38 | free(vp); 39 | return NULL; 40 | } 41 | init_viterbi27(vp,0); 42 | return vp; 43 | } 44 | 45 | void set_viterbi27_polynomial_sse(int polys[2]){ 46 | int state; 47 | 48 | for(state=0;state < 32;state++){ 49 | Branchtab27_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 50 | Branchtab27_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 51 | } 52 | Init++; 53 | } 54 | 55 | /* Initialize Viterbi decoder for start of new frame */ 56 | int init_viterbi27_sse(void *p,int starting_state){ 57 | struct v27 *vp = p; 58 | int i; 59 | 60 | if(p == NULL) 61 | return -1; 62 | for(i=0;i<64;i++) 63 | vp->metrics1.c[i] = 63; 64 | 65 | vp->old_metrics = &vp->metrics1; 66 | vp->new_metrics = &vp->metrics2; 67 | vp->dp = vp->decisions; 68 | vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ 69 | return 0; 70 | } 71 | 72 | /* Viterbi chainback */ 73 | int chainback_viterbi27_sse( 74 | void *p, 75 | unsigned char *data, /* Decoded output data */ 76 | unsigned int nbits, /* Number of data bits */ 77 | unsigned int endstate){ /* Terminal encoder state */ 78 | struct v27 *vp = p; 79 | decision_t *d; 80 | 81 | if(p == NULL) 82 | return -1; 83 | 84 | d = vp->decisions; 85 | /* Make room beyond the end of the encoder register so we can 86 | * accumulate a full byte of decoded data 87 | */ 88 | endstate %= 64; 89 | endstate <<= 2; 90 | 91 | /* The store into data[] only needs to be done every 8 bits. 92 | * But this avoids a conditional branch, and the writes will 93 | * combine in the cache anyway 94 | */ 95 | d += 6; /* Look past tail */ 96 | while(nbits-- != 0){ 97 | int k; 98 | 99 | k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; 100 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 101 | } 102 | return 0; 103 | } 104 | 105 | /* Delete instance of a Viterbi decoder */ 106 | void delete_viterbi27_sse(void *p){ 107 | struct v27 *vp = p; 108 | 109 | if(vp != NULL){ 110 | free(vp->decisions); 111 | free(vp); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi27_sse2.c: -------------------------------------------------------------------------------- 1 | /* K=7 r=1/2 Viterbi decoder for SSE2 2 | * Feb 2004, Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "fec.h" 9 | 10 | typedef union { unsigned char c[64]; __m128i v[4]; } metric_t; 11 | typedef union { unsigned long w[2]; unsigned char c[8]; unsigned short s[4]; __m64 v[1];} decision_t; 12 | union branchtab27 { unsigned char c[32]; __m128i v[2];} Branchtab27_sse2[2]; 13 | static int Init = 0; 14 | 15 | /* State info for instance of Viterbi decoder 16 | * Don't change this without also changing references in sse2bfly27.s! 17 | */ 18 | struct v27 { 19 | metric_t metrics1; /* path metric buffer 1 */ 20 | metric_t metrics2; /* path metric buffer 2 */ 21 | decision_t *dp; /* Pointer to current decision */ 22 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 23 | decision_t *decisions; /* Beginning of decisions for block */ 24 | }; 25 | 26 | /* Initialize Viterbi decoder for start of new frame */ 27 | int init_viterbi27_sse2(void *p,int starting_state){ 28 | struct v27 *vp = p; 29 | int i; 30 | 31 | if(p == NULL) 32 | return -1; 33 | for(i=0;i<64;i++) 34 | vp->metrics1.c[i] = 63; 35 | 36 | vp->old_metrics = &vp->metrics1; 37 | vp->new_metrics = &vp->metrics2; 38 | vp->dp = vp->decisions; 39 | vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ 40 | return 0; 41 | } 42 | 43 | void set_viterbi27_polynomial_sse2(int polys[2]){ 44 | int state; 45 | 46 | for(state=0;state < 32;state++){ 47 | Branchtab27_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 48 | Branchtab27_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 49 | } 50 | Init++; 51 | } 52 | 53 | 54 | /* Create a new instance of a Viterbi decoder */ 55 | void *create_viterbi27_sse2(int len){ 56 | void *p; 57 | struct v27 *vp; 58 | 59 | if(!Init){ 60 | int polys[2] = { V27POLYA, V27POLYB }; 61 | set_viterbi27_polynomial_sse2(polys); 62 | } 63 | /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ 64 | if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v27))) 65 | return NULL; 66 | vp = (struct v27 *)p; 67 | 68 | if((p = malloc((len+6)*sizeof(decision_t))) == NULL){ 69 | free(vp); 70 | return NULL; 71 | } 72 | vp->decisions = (decision_t *)p; 73 | init_viterbi27_sse2(vp,0); 74 | 75 | return vp; 76 | } 77 | 78 | /* Viterbi chainback */ 79 | int chainback_viterbi27_sse2( 80 | void *p, 81 | unsigned char *data, /* Decoded output data */ 82 | unsigned int nbits, /* Number of data bits */ 83 | unsigned int endstate){ /* Terminal encoder state */ 84 | struct v27 *vp = p; 85 | decision_t *d; 86 | 87 | if(p == NULL) 88 | return -1; 89 | d = vp->decisions; 90 | /* Make room beyond the end of the encoder register so we can 91 | * accumulate a full byte of decoded data 92 | */ 93 | endstate %= 64; 94 | endstate <<= 2; 95 | 96 | /* The store into data[] only needs to be done every 8 bits. 97 | * But this avoids a conditional branch, and the writes will 98 | * combine in the cache anyway 99 | */ 100 | d += 6; /* Look past tail */ 101 | while(nbits-- != 0){ 102 | int k; 103 | 104 | k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; 105 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 106 | } 107 | return 0; 108 | } 109 | 110 | /* Delete instance of a Viterbi decoder */ 111 | void delete_viterbi27_sse2(void *p){ 112 | struct v27 *vp = p; 113 | 114 | if(vp != NULL){ 115 | free(vp->decisions); 116 | free(vp); 117 | } 118 | } 119 | 120 | 121 | #if 0 122 | /* This code is turned off because it's slower than my hand-crafted assembler in sse2bfly27.s. But it does work. */ 123 | void update_viterbi27_blk_sse2(void *p,unsigned char *syms,int nbits){ 124 | struct v27 *vp = p; 125 | decision_t *d; 126 | 127 | if(p == NULL) 128 | return; 129 | d = (decision_t *)vp->dp; 130 | while(nbits--){ 131 | __m128i sym0v,sym1v; 132 | void *tmp; 133 | int i; 134 | 135 | /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ 136 | sym0v = _mm_set1_epi8(syms[0]); 137 | sym1v = _mm_set1_epi8(syms[1]); 138 | syms += 2; 139 | 140 | for(i=0;i<2;i++){ 141 | __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; 142 | 143 | /* Form branch metrics */ 144 | metric = _mm_avg_epu8(_mm_xor_si128(Branchtab27_sse2[0].v[i],sym0v),_mm_xor_si128(Branchtab27_sse2[1].v[i],sym1v)); 145 | /* There's no packed bytes right shift in SSE2, so we use the word version and mask 146 | * (I'm *really* starting to like Altivec...) 147 | */ 148 | metric = _mm_srli_epi16(metric,3); 149 | metric = _mm_and_si128(metric,_mm_set1_epi8(31)); 150 | m_metric = _mm_sub_epi8(_mm_set1_epi8(31),metric); 151 | 152 | /* Add branch metrics to path metrics */ 153 | m0 = _mm_add_epi8(vp->old_metrics->v[i],metric); 154 | m3 = _mm_add_epi8(vp->old_metrics->v[2+i],metric); 155 | m1 = _mm_add_epi8(vp->old_metrics->v[2+i],m_metric); 156 | m2 = _mm_add_epi8(vp->old_metrics->v[i],m_metric); 157 | 158 | /* Compare and select, using modulo arithmetic */ 159 | decision0 = _mm_cmpgt_epi8(_mm_sub_epi8(m0,m1),_mm_setzero_si128()); 160 | decision1 = _mm_cmpgt_epi8(_mm_sub_epi8(m2,m3),_mm_setzero_si128()); 161 | survivor0 = _mm_or_si128(_mm_and_si128(decision0,m1),_mm_andnot_si128(decision0,m0)); 162 | survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2)); 163 | 164 | /* Pack each set of decisions into 16 bits */ 165 | d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1)); 166 | d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1)); 167 | 168 | /* Store surviving metrics */ 169 | vp->new_metrics->v[2*i] = _mm_unpacklo_epi8(survivor0,survivor1); 170 | vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi8(survivor0,survivor1); 171 | } 172 | d++; 173 | /* Swap pointers to old and new metrics */ 174 | tmp = vp->old_metrics; 175 | vp->old_metrics = vp->new_metrics; 176 | vp->new_metrics = tmp; 177 | } 178 | vp->dp = d; 179 | } 180 | #endif 181 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi29.c: -------------------------------------------------------------------------------- 1 | /* Switch to K=9 r=1/2 Viterbi decoder with optional Intel or PowerPC SIMD 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "fec.h" 8 | 9 | /* Create a new instance of a Viterbi decoder */ 10 | void *create_viterbi29(int len){ 11 | find_cpu_mode(); 12 | 13 | switch(Cpu_mode){ 14 | case PORT: 15 | default: 16 | return create_viterbi29_port(len); 17 | #ifdef __VEC__ 18 | case ALTIVEC: 19 | return create_viterbi29_av(len); 20 | #endif 21 | #ifdef __i386__ 22 | case MMX: 23 | return create_viterbi29_mmx(len); 24 | case SSE: 25 | return create_viterbi29_sse(len); 26 | case SSE2: 27 | return create_viterbi29_sse2(len); 28 | #endif 29 | } 30 | } 31 | 32 | void set_viterbi29_polynomial(int polys[2]){ 33 | switch(Cpu_mode){ 34 | case PORT: 35 | default: 36 | set_viterbi29_polynomial_port(polys); 37 | break; 38 | #ifdef __VEC__ 39 | case ALTIVEC: 40 | set_viterbi29_polynomial_av(polys); 41 | break; 42 | #endif 43 | #ifdef __i386__ 44 | case MMX: 45 | set_viterbi29_polynomial_mmx(polys); 46 | break; 47 | case SSE: 48 | set_viterbi29_polynomial_sse(polys); 49 | break; 50 | case SSE2: 51 | set_viterbi29_polynomial_sse2(polys); 52 | break; 53 | #endif 54 | } 55 | } 56 | 57 | /* Initialize Viterbi decoder for start of new frame */ 58 | int init_viterbi29(void *p,int starting_state){ 59 | switch(Cpu_mode){ 60 | case PORT: 61 | default: 62 | return init_viterbi29_port(p,starting_state); 63 | #ifdef __VEC__ 64 | case ALTIVEC: 65 | return init_viterbi29_av(p,starting_state); 66 | #endif 67 | #ifdef __i386__ 68 | case MMX: 69 | return init_viterbi29_mmx(p,starting_state); 70 | case SSE: 71 | return init_viterbi29_sse(p,starting_state); 72 | case SSE2: 73 | return init_viterbi29_sse2(p,starting_state); 74 | #endif 75 | } 76 | } 77 | 78 | /* Viterbi chainback */ 79 | int chainback_viterbi29( 80 | void *p, 81 | unsigned char *data, /* Decoded output data */ 82 | unsigned int nbits, /* Number of data bits */ 83 | unsigned int endstate){ /* Terminal encoder state */ 84 | 85 | switch(Cpu_mode){ 86 | case PORT: 87 | default: 88 | return chainback_viterbi29_port(p,data,nbits,endstate); 89 | #ifdef __VEC__ 90 | case ALTIVEC: 91 | return chainback_viterbi29_av(p,data,nbits,endstate); 92 | #endif 93 | #ifdef __i386__ 94 | case MMX: 95 | return chainback_viterbi29_mmx(p,data,nbits,endstate); 96 | case SSE: 97 | return chainback_viterbi29_sse(p,data,nbits,endstate); 98 | case SSE2: 99 | return chainback_viterbi29_sse2(p,data,nbits,endstate); 100 | #endif 101 | } 102 | } 103 | 104 | /* Delete instance of a Viterbi decoder */ 105 | void delete_viterbi29(void *p){ 106 | switch(Cpu_mode){ 107 | case PORT: 108 | default: 109 | delete_viterbi29_port(p); 110 | break; 111 | #ifdef __VEC__ 112 | case ALTIVEC: 113 | delete_viterbi29_av(p); 114 | break; 115 | #endif 116 | #ifdef __i386__ 117 | case MMX: 118 | delete_viterbi29_mmx(p); 119 | break; 120 | case SSE: 121 | delete_viterbi29_sse(p); 122 | break; 123 | case SSE2: 124 | delete_viterbi29_sse2(p); 125 | break; 126 | #endif 127 | } 128 | } 129 | 130 | /* Update decoder with a block of demodulated symbols 131 | * Note that nbits is the number of decoded data bits, not the number 132 | * of symbols! 133 | */ 134 | int update_viterbi29_blk(void *p,unsigned char syms[],int nbits){ 135 | switch(Cpu_mode){ 136 | case PORT: 137 | default: 138 | return update_viterbi29_blk_port(p,syms,nbits); 139 | #ifdef __VEC__ 140 | case ALTIVEC: 141 | return update_viterbi29_blk_av(p,syms,nbits); 142 | #endif 143 | #ifdef __i386__ 144 | case MMX: 145 | return update_viterbi29_blk_mmx(p,syms,nbits); 146 | case SSE: 147 | return update_viterbi29_blk_sse(p,syms,nbits); 148 | case SSE2: 149 | return update_viterbi29_blk_sse2(p,syms,nbits); 150 | #endif 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi29_mmx.c: -------------------------------------------------------------------------------- 1 | /* K=9 r=1/2 Viterbi decoder for MMX 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | typedef union { char c[256]; __m64 v[32];} decision_t; 12 | typedef union { unsigned char c[256]; __m64 v[32];} metric_t; 13 | 14 | unsigned char Mettab29_1[256][128] __attribute__ ((aligned(8))); 15 | unsigned char Mettab29_2[256][128] __attribute__ ((aligned(8))); 16 | static int Init = 0; 17 | 18 | /* State info for instance of Viterbi decoder 19 | * Don't change this without also changing references in mmxbfly29.s! 20 | */ 21 | struct v29 { 22 | metric_t metrics1; /* path metric buffer 1 */ 23 | metric_t metrics2; /* path metric buffer 2 */ 24 | decision_t *dp; /* Pointer to current decision */ 25 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 26 | decision_t *decisions; /* Beginning of decisions for block */ 27 | }; 28 | 29 | /* Create a new instance of a Viterbi decoder */ 30 | void *create_viterbi29_mmx(int len){ 31 | struct v29 *vp; 32 | 33 | if(Init == 0){ 34 | int polys[2] = {V29POLYA,V29POLYB}; 35 | 36 | set_viterbi29_polynomial_mmx(polys); 37 | } 38 | if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) 39 | return NULL; 40 | 41 | if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ 42 | free(vp); 43 | return NULL; 44 | } 45 | init_viterbi29(vp,0); 46 | return vp; 47 | } 48 | 49 | void set_viterbi29_polynomial_mmx(int polys[2]){ 50 | int state; 51 | 52 | for(state=0;state < 128;state++){ 53 | int symbol; 54 | 55 | for(symbol = 0;symbol < 256;symbol++){ 56 | int sym; 57 | 58 | sym = parity((2*state) & abs(polys[0])) ^ (polys[0] < 0); 59 | Mettab29_1[symbol][state] = (sym ? (255-symbol):symbol) / 16; 60 | 61 | sym = parity((2*state) & abs(polys[1])) ^ (polys[1] < 0); 62 | Mettab29_2[symbol][state] = (sym ? (255-symbol):symbol) / 16; 63 | } 64 | } 65 | Init++; 66 | } 67 | 68 | /* Initialize Viterbi decoder for start of new frame */ 69 | int init_viterbi29_mmx(void *p,int starting_state){ 70 | struct v29 *vp = p; 71 | int i; 72 | 73 | if(p == NULL) 74 | return -1; 75 | for(i=0;i<256;i++) 76 | vp->metrics1.c[i] = 63; 77 | 78 | vp->old_metrics = &vp->metrics1; 79 | vp->new_metrics = &vp->metrics2; 80 | vp->dp = vp->decisions; 81 | vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */ 82 | return 0; 83 | } 84 | 85 | /* Viterbi chainback */ 86 | int chainback_viterbi29_mmx( 87 | void *p, 88 | unsigned char *data, /* Decoded output data */ 89 | unsigned int nbits, /* Number of data bits */ 90 | unsigned int endstate){ /* Terminal encoder state */ 91 | 92 | struct v29 *vp = (struct v29 *)p; 93 | decision_t *d; 94 | 95 | if(p == NULL) 96 | return -1; 97 | 98 | d = (decision_t *)vp->decisions; 99 | endstate &= 255; 100 | d += 8; /* Look past tail */ 101 | while(nbits-- != 0){ 102 | int k; 103 | 104 | k = d[nbits].c[endstate] & 1; 105 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 106 | } 107 | return 0; 108 | } 109 | 110 | /* Delete instance of a Viterbi decoder */ 111 | void delete_viterbi29_mmx(void *p){ 112 | struct v29 *vp = p; 113 | 114 | if(vp != NULL){ 115 | free(vp->decisions); 116 | free(vp); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi29_port.c: -------------------------------------------------------------------------------- 1 | /* K=9 r=1/2 Viterbi decoder in portable C 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "fec.h" 9 | 10 | typedef union { unsigned int w[256]; } metric_t; 11 | typedef union { unsigned long w[8];} decision_t; 12 | 13 | static union { unsigned char c[128]; } Branchtab29[2]; 14 | static int Init = 0; 15 | 16 | /* State info for instance of Viterbi decoder */ 17 | struct v29 { 18 | metric_t metrics1; /* path metric buffer 1 */ 19 | metric_t metrics2; /* path metric buffer 2 */ 20 | decision_t *dp; /* Pointer to current decision */ 21 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 22 | decision_t *decisions; /* Beginning of decisions for block */ 23 | }; 24 | 25 | /* Initialize Viterbi decoder for start of new frame */ 26 | int init_viterbi29_port(void *p,int starting_state){ 27 | struct v29 *vp = p; 28 | int i; 29 | 30 | if(p == NULL) 31 | return -1; 32 | for(i=0;i<256;i++) 33 | vp->metrics1.w[i] = 63; 34 | 35 | vp->old_metrics = &vp->metrics1; 36 | vp->new_metrics = &vp->metrics2; 37 | vp->dp = vp->decisions; 38 | vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ 39 | return 0; 40 | } 41 | 42 | void set_viterbi29_polynomial_port(int polys[2]){ 43 | int state; 44 | 45 | for(state=0;state < 128;state++){ 46 | Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 47 | Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 48 | } 49 | Init++; 50 | } 51 | 52 | 53 | /* Create a new instance of a Viterbi decoder */ 54 | void *create_viterbi29_port(int len){ 55 | struct v29 *vp; 56 | 57 | if(!Init){ 58 | int polys[2] = {V29POLYA,V29POLYB}; 59 | set_viterbi29_polynomial_port(polys); 60 | } 61 | if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) 62 | return NULL; 63 | 64 | if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ 65 | free(vp); 66 | return NULL; 67 | } 68 | init_viterbi29_port(vp,0); 69 | 70 | return vp; 71 | } 72 | 73 | 74 | /* Viterbi chainback */ 75 | int chainback_viterbi29_port( 76 | void *p, 77 | unsigned char *data, /* Decoded output data */ 78 | unsigned int nbits, /* Number of data bits */ 79 | unsigned int endstate){ /* Terminal encoder state */ 80 | struct v29 *vp = p; 81 | decision_t *d; 82 | 83 | if(p == NULL) 84 | return -1; 85 | 86 | d = vp->decisions; 87 | /* Make room beyond the end of the encoder register so we can 88 | * accumulate a full byte of decoded data 89 | */ 90 | endstate %= 256; 91 | 92 | /* The store into data[] only needs to be done every 8 bits. 93 | * But this avoids a conditional branch, and the writes will 94 | * combine in the cache anyway 95 | */ 96 | d += 8; /* Look past tail */ 97 | while(nbits-- != 0){ 98 | int k; 99 | 100 | k = (d[nbits].w[(endstate)/32] >> (endstate%32)) & 1; 101 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 102 | } 103 | return 0; 104 | } 105 | 106 | 107 | /* Delete instance of a Viterbi decoder */ 108 | void delete_viterbi29_port(void *p){ 109 | struct v29 *vp = p; 110 | 111 | if(vp != NULL){ 112 | free(vp->decisions); 113 | free(vp); 114 | } 115 | } 116 | 117 | /* C-language butterfly */ 118 | #define BFLY(i) {\ 119 | unsigned int metric,m0,m1,decision;\ 120 | metric = (Branchtab29[0].c[i] ^ sym0) + (Branchtab29[1].c[i] ^ sym1);\ 121 | m0 = vp->old_metrics->w[i] + metric;\ 122 | m1 = vp->old_metrics->w[i+128] + (510 - metric);\ 123 | decision = (signed int)(m0-m1) > 0;\ 124 | vp->new_metrics->w[2*i] = decision ? m1 : m0;\ 125 | d->w[i/16] |= decision << ((2*i)&31);\ 126 | m0 -= (metric+metric-510);\ 127 | m1 += (metric+metric-510);\ 128 | decision = (signed int)(m0-m1) > 0;\ 129 | vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\ 130 | d->w[i/16] |= decision << ((2*i+1)&31);\ 131 | } 132 | 133 | /* Update decoder with a block of demodulated symbols 134 | * Note that nbits is the number of decoded data bits, not the number 135 | * of symbols! 136 | */ 137 | 138 | int update_viterbi29_blk_port(void *p,unsigned char *syms,int nbits){ 139 | struct v29 *vp = p; 140 | decision_t *d; 141 | 142 | if(p == NULL) 143 | return -1; 144 | 145 | d = (decision_t *)vp->dp; 146 | while(nbits--){ 147 | void *tmp; 148 | unsigned char sym0,sym1; 149 | int i; 150 | 151 | for(i=0;i<8;i++) 152 | d->w[i] = 0; 153 | sym0 = *syms++; 154 | sym1 = *syms++; 155 | 156 | for(i=0;i<128;i++) 157 | BFLY(i); 158 | 159 | d++; 160 | tmp = vp->old_metrics; 161 | vp->old_metrics = vp->new_metrics; 162 | vp->new_metrics = tmp; 163 | } 164 | vp->dp = d; 165 | return 0; 166 | } 167 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi29_sse.c: -------------------------------------------------------------------------------- 1 | /* K=9 r=1/2 Viterbi decoder for SSE 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | typedef union { unsigned char w[256]; __m64 v[32];} metric_t; 12 | typedef union { unsigned long w[8]; unsigned char c[32]; __m64 v[4];} decision_t; 13 | 14 | union branchtab29 { unsigned char c[128]; } Branchtab29_sse[2]; 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder 18 | * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! 19 | */ 20 | struct v29 { 21 | metric_t metrics1; /* path metric buffer 1 */ 22 | metric_t metrics2; /* path metric buffer 2 */ 23 | decision_t *dp; /* Pointer to current decision */ 24 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 25 | decision_t *decisions; /* Beginning of decisions for block */ 26 | }; 27 | 28 | /* Create a new instance of a Viterbi decoder */ 29 | void *create_viterbi29_sse(int len){ 30 | struct v29 *vp; 31 | 32 | if(!Init){ 33 | int polys[2] = { V29POLYA,V29POLYB }; 34 | 35 | set_viterbi29_polynomial_sse(polys); 36 | } 37 | if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) 38 | return NULL; 39 | if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ 40 | free(vp); 41 | return NULL; 42 | } 43 | init_viterbi29(vp,0); 44 | return vp; 45 | } 46 | 47 | void set_viterbi29_polynomial_sse(int polys[2]){ 48 | int state; 49 | 50 | for(state=0;state < 128;state++){ 51 | Branchtab29_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 52 | Branchtab29_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 53 | } 54 | Init++; 55 | } 56 | 57 | /* Initialize Viterbi decoder for start of new frame */ 58 | int init_viterbi29_sse(void *p,int starting_state){ 59 | struct v29 *vp = p; 60 | int i; 61 | 62 | if(p == NULL) 63 | return -1; 64 | for(i=0;i<256;i++) 65 | vp->metrics1.w[i] = 200; 66 | 67 | vp->old_metrics = &vp->metrics1; 68 | vp->new_metrics = &vp->metrics2; 69 | vp->dp = vp->decisions; 70 | vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ 71 | return 0; 72 | } 73 | 74 | /* Viterbi chainback */ 75 | int chainback_viterbi29_sse( 76 | void *p, 77 | unsigned char *data, /* Decoded output data */ 78 | unsigned int nbits, /* Number of data bits */ 79 | unsigned int endstate){ /* Terminal encoder state */ 80 | struct v29 *vp = p; 81 | decision_t *d; 82 | 83 | if(p == NULL) 84 | return -1; 85 | d = vp->decisions; 86 | /* Make room beyond the end of the encoder register so we can 87 | * accumulate a full byte of decoded data 88 | */ 89 | endstate %= 256; 90 | 91 | /* The store into data[] only needs to be done every 8 bits. 92 | * But this avoids a conditional branch, and the writes will 93 | * combine in the cache anyway 94 | */ 95 | d += 8; /* Look past tail */ 96 | while(nbits-- != 0){ 97 | int k; 98 | 99 | k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; 100 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 101 | } 102 | return 0; 103 | } 104 | 105 | 106 | /* Delete instance of a Viterbi decoder */ 107 | void delete_viterbi29_sse(void *p){ 108 | struct v29 *vp = p; 109 | 110 | if(vp != NULL){ 111 | free(vp->decisions); 112 | free(vp); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi29_sse2.c: -------------------------------------------------------------------------------- 1 | /* K=9 r=1/2 Viterbi decoder for SSE2 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | typedef union { unsigned char c[256]; __m128i v[16];} metric_t; 12 | typedef union { unsigned long w[8]; unsigned char c[32];} decision_t; 13 | 14 | union branchtab29 { unsigned char c[128]; } Branchtab29_sse2[2]; 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder 18 | * Don't change this without also changing references in sse2bfly29.s! 19 | */ 20 | struct v29 { 21 | metric_t metrics1; /* path metric buffer 1 */ 22 | metric_t metrics2; /* path metric buffer 2 */ 23 | decision_t *dp; /* Pointer to current decision */ 24 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 25 | decision_t *decisions; /* Beginning of decisions for block */ 26 | }; 27 | 28 | /* Initialize Viterbi decoder for start of new frame */ 29 | int init_viterbi29_sse2(void *p,int starting_state){ 30 | struct v29 *vp = p; 31 | int i; 32 | 33 | for(i=0;i<256;i++) 34 | vp->metrics1.c[i] = 63; 35 | 36 | vp->old_metrics = &vp->metrics1; 37 | vp->new_metrics = &vp->metrics2; 38 | vp->dp = vp->decisions; 39 | vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */ 40 | return 0; 41 | } 42 | 43 | void set_viterbi29_polynomial_sse2(int polys[2]){ 44 | int state; 45 | 46 | for(state=0;state < 128;state++){ 47 | Branchtab29_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 48 | Branchtab29_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 49 | } 50 | Init++; 51 | } 52 | 53 | 54 | /* Create a new instance of a Viterbi decoder */ 55 | void *create_viterbi29_sse2(int len){ 56 | void *p; 57 | struct v29 *vp; 58 | 59 | if(!Init){ 60 | int polys[2] = {V29POLYA,V29POLYB}; 61 | 62 | set_viterbi29_polynomial(polys); 63 | } 64 | /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ 65 | if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v29))) 66 | return NULL; 67 | vp = (struct v29 *)p; 68 | if((p = malloc((len+8)*sizeof(decision_t))) == NULL){ 69 | free(vp); 70 | return NULL; 71 | } 72 | vp->decisions = (decision_t *)p; 73 | init_viterbi29_sse2(vp,0); 74 | return vp; 75 | } 76 | 77 | 78 | /* Viterbi chainback */ 79 | int chainback_viterbi29_sse2( 80 | void *p, 81 | unsigned char *data, /* Decoded output data */ 82 | unsigned int nbits, /* Number of data bits */ 83 | unsigned int endstate){ /* Terminal encoder state */ 84 | struct v29 *vp = p; 85 | decision_t *d; 86 | 87 | if(p == NULL) 88 | return -1; 89 | d = vp->decisions; 90 | 91 | /* Make room beyond the end of the encoder register so we can 92 | * accumulate a full byte of decoded data 93 | */ 94 | endstate %= 256; 95 | 96 | /* The store into data[] only needs to be done every 8 bits. 97 | * But this avoids a conditional branch, and the writes will 98 | * combine in the cache anyway 99 | */ 100 | d += 8; /* Look past tail */ 101 | while(nbits-- != 0){ 102 | int k; 103 | 104 | k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; 105 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 106 | } 107 | return 0; 108 | } 109 | 110 | 111 | /* Delete instance of a Viterbi decoder */ 112 | void delete_viterbi29_sse2(void *p){ 113 | struct v29 *vp = p; 114 | 115 | if(vp != NULL){ 116 | free(vp->decisions); 117 | free(vp); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi39.c: -------------------------------------------------------------------------------- 1 | /* Switch to K=9 r=1/3 Viterbi decoder with optional Intel or PowerPC SIMD 2 | * Copyright Aug 2006, Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "fec.h" 8 | 9 | /* Create a new instance of a Viterbi decoder */ 10 | void *create_viterbi39(int len){ 11 | find_cpu_mode(); 12 | 13 | switch(Cpu_mode){ 14 | case PORT: 15 | default: 16 | return create_viterbi39_port(len); 17 | #ifdef __VEC__ 18 | case ALTIVEC: 19 | return create_viterbi39_av(len); 20 | #endif 21 | #ifdef __i386__ 22 | case MMX: 23 | return create_viterbi39_mmx(len); 24 | case SSE: 25 | return create_viterbi39_sse(len); 26 | case SSE2: 27 | return create_viterbi39_sse2(len); 28 | #endif 29 | } 30 | } 31 | 32 | void set_viterbi39_polynomial(int polys[3]){ 33 | switch(Cpu_mode){ 34 | case PORT: 35 | default: 36 | set_viterbi39_polynomial_port(polys); 37 | break; 38 | #ifdef __VEC__ 39 | case ALTIVEC: 40 | set_viterbi39_polynomial_av(polys); 41 | break; 42 | #endif 43 | #ifdef __i386__ 44 | case MMX: 45 | set_viterbi39_polynomial_mmx(polys); 46 | break; 47 | case SSE: 48 | set_viterbi39_polynomial_sse(polys); 49 | break; 50 | case SSE2: 51 | set_viterbi39_polynomial_sse2(polys); 52 | break; 53 | #endif 54 | } 55 | } 56 | 57 | 58 | /* Initialize Viterbi decoder for start of new frame */ 59 | int init_viterbi39(void *p,int starting_state){ 60 | switch(Cpu_mode){ 61 | case PORT: 62 | default: 63 | return init_viterbi39_port(p,starting_state); 64 | #ifdef __VEC__ 65 | case ALTIVEC: 66 | return init_viterbi39_av(p,starting_state); 67 | #endif 68 | #ifdef __i386__ 69 | case MMX: 70 | return init_viterbi39_mmx(p,starting_state); 71 | case SSE: 72 | return init_viterbi39_sse(p,starting_state); 73 | case SSE2: 74 | return init_viterbi39_sse2(p,starting_state); 75 | #endif 76 | } 77 | } 78 | 79 | /* Viterbi chainback */ 80 | int chainback_viterbi39( 81 | void *p, 82 | unsigned char *data, /* Decoded output data */ 83 | unsigned int nbits, /* Number of data bits */ 84 | unsigned int endstate){ /* Terminal encoder state */ 85 | 86 | switch(Cpu_mode){ 87 | case PORT: 88 | default: 89 | return chainback_viterbi39_port(p,data,nbits,endstate); 90 | #ifdef __VEC__ 91 | case ALTIVEC: 92 | return chainback_viterbi39_av(p,data,nbits,endstate); 93 | #endif 94 | #ifdef __i386__ 95 | case MMX: 96 | return chainback_viterbi39_mmx(p,data,nbits,endstate); 97 | case SSE: 98 | return chainback_viterbi39_sse(p,data,nbits,endstate); 99 | case SSE2: 100 | return chainback_viterbi39_sse2(p,data,nbits,endstate); 101 | #endif 102 | } 103 | } 104 | 105 | /* Delete instance of a Viterbi decoder */ 106 | void delete_viterbi39(void *p){ 107 | switch(Cpu_mode){ 108 | case PORT: 109 | default: 110 | delete_viterbi39_port(p); 111 | break; 112 | #ifdef __VEC__ 113 | case ALTIVEC: 114 | delete_viterbi39_av(p); 115 | break; 116 | #endif 117 | #ifdef __i386__ 118 | case MMX: 119 | delete_viterbi39_mmx(p); 120 | break; 121 | case SSE: 122 | delete_viterbi39_sse(p); 123 | break; 124 | case SSE2: 125 | delete_viterbi39_sse2(p); 126 | break; 127 | #endif 128 | } 129 | } 130 | 131 | /* Update decoder with a block of demodulated symbols 132 | * Note that nbits is the number of decoded data bits, not the number 133 | * of symbols! 134 | */ 135 | int update_viterbi39_blk(void *p,unsigned char syms[],int nbits){ 136 | switch(Cpu_mode){ 137 | case PORT: 138 | default: 139 | return update_viterbi39_blk_port(p,syms,nbits); 140 | #ifdef __VEC__ 141 | case ALTIVEC: 142 | return update_viterbi39_blk_av(p,syms,nbits); 143 | #endif 144 | #ifdef __i386__ 145 | case MMX: 146 | return update_viterbi39_blk_mmx(p,syms,nbits); 147 | case SSE: 148 | return update_viterbi39_blk_sse(p,syms,nbits); 149 | case SSE2: 150 | return update_viterbi39_blk_sse2(p,syms,nbits); 151 | #endif 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi39_mmx.c: -------------------------------------------------------------------------------- 1 | /* K=9 r=1/3 Viterbi decoder for x86 MMX 2 | * Aug 2006, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | typedef union { unsigned char c[256]; __m64 v[32];} decision_t; 12 | typedef union { unsigned short s[256]; __m64 v[64];} metric_t; 13 | 14 | static union branchtab39 { unsigned short s[128]; __m64 v[32];} Branchtab39[3]; 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder */ 18 | struct v39 { 19 | metric_t metrics1; /* path metric buffer 1 */ 20 | metric_t metrics2; /* path metric buffer 2 */ 21 | void *dp; /* Pointer to current decision */ 22 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 23 | void *decisions; /* Beginning of decisions for block */ 24 | }; 25 | 26 | /* Initialize Viterbi decoder for start of new frame */ 27 | int init_viterbi39_mmx(void *p,int starting_state){ 28 | struct v39 *vp = p; 29 | int i; 30 | 31 | if(p == NULL) 32 | return -1; 33 | for(i=0;i<256;i++) 34 | vp->metrics1.s[i] = 1000; 35 | 36 | vp->old_metrics = &vp->metrics1; 37 | vp->new_metrics = &vp->metrics2; 38 | vp->dp = vp->decisions; 39 | vp->old_metrics->s[starting_state & 255] = 0; /* Bias known start state */ 40 | return 0; 41 | } 42 | 43 | void set_viterbi39_polynomial_mmx(int polys[3]){ 44 | int state; 45 | 46 | for(state=0;state < 128;state++){ 47 | Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0; 48 | Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0; 49 | Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0; 50 | } 51 | Init++; 52 | } 53 | 54 | /* Create a new instance of a Viterbi decoder */ 55 | void *create_viterbi39_mmx(int len){ 56 | struct v39 *vp; 57 | 58 | if(!Init){ 59 | int polys[3] = { V39POLYA,V39POLYB,V39POLYC }; 60 | set_viterbi39_polynomial_mmx(polys); 61 | } 62 | if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL) 63 | return NULL; 64 | if((vp->decisions = malloc((len+8)*sizeof(decision_t))) == NULL){ 65 | free(vp); 66 | return NULL; 67 | } 68 | init_viterbi39_mmx(vp,0); 69 | return vp; 70 | } 71 | 72 | 73 | 74 | /* Viterbi chainback */ 75 | int chainback_viterbi39_mmx( 76 | void *p, 77 | unsigned char *data, /* Decoded output data */ 78 | unsigned int nbits, /* Number of data bits */ 79 | unsigned int endstate){ /* Terminal encoder state */ 80 | struct v39 *vp = p; 81 | decision_t *d; 82 | int path_metric; 83 | 84 | if(p == NULL) 85 | return -1; 86 | 87 | d = (decision_t *)vp->decisions; 88 | 89 | endstate %= 256; 90 | 91 | path_metric = vp->old_metrics->s[endstate]; 92 | 93 | /* The store into data[] only needs to be done every 8 bits. 94 | * But this avoids a conditional branch, and the writes will 95 | * combine in the cache anyway 96 | */ 97 | d += 8; /* Look past tail */ 98 | while(nbits-- != 0){ 99 | int k; 100 | 101 | k = d[nbits].c[endstate] & 1; 102 | endstate = (k << 7) | (endstate >> 1); 103 | data[nbits>>3] = endstate; 104 | } 105 | return path_metric; 106 | } 107 | 108 | /* Delete instance of a Viterbi decoder */ 109 | void delete_viterbi39_mmx(void *p){ 110 | struct v39 *vp = p; 111 | 112 | if(vp != NULL){ 113 | free(vp->decisions); 114 | free(vp); 115 | } 116 | } 117 | 118 | 119 | int update_viterbi39_blk_mmx(void *p,unsigned char *syms,int nbits){ 120 | struct v39 *vp = p; 121 | decision_t *d; 122 | int path_metric = 0; 123 | 124 | if(p == NULL) 125 | return -1; 126 | 127 | d = (decision_t *)vp->dp; 128 | 129 | while(nbits--){ 130 | __m64 sym0v,sym1v,sym2v; 131 | void *tmp; 132 | int i; 133 | 134 | /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ 135 | sym0v = _mm_set1_pi16(syms[0]); 136 | sym1v = _mm_set1_pi16(syms[1]); 137 | sym2v = _mm_set1_pi16(syms[2]); 138 | syms += 3; 139 | 140 | for(i=0;i<32;i++){ 141 | __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; 142 | 143 | /* Form branch metrics 144 | * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, 145 | * the XOR operations constitute conditional negation. 146 | * metric and m_metric (-metric) are in the range 0-1530 147 | */ 148 | m0 = _mm_add_pi16(_mm_xor_si64(Branchtab39[0].v[i],sym0v),_mm_xor_si64(Branchtab39[1].v[i],sym1v)); 149 | metric = _mm_add_pi16(_mm_xor_si64(Branchtab39[2].v[i],sym2v),m0); 150 | m_metric = _mm_sub_pi16(_mm_set1_pi16(765),metric); 151 | 152 | /* Add branch metrics to path metrics */ 153 | m0 = _mm_add_pi16(vp->old_metrics->v[i],metric); 154 | m3 = _mm_add_pi16(vp->old_metrics->v[32+i],metric); 155 | m1 = _mm_add_pi16(vp->old_metrics->v[32+i],m_metric); 156 | m2 = _mm_add_pi16(vp->old_metrics->v[i],m_metric); 157 | 158 | /* Compare and select 159 | * There's no packed min instruction in MMX, so we use modulo arithmetic 160 | * to form the decisions and then do the select the hard way 161 | */ 162 | decision0 = _mm_cmpgt_pi16(_mm_sub_pi16(m0,m1),_mm_setzero_si64()); 163 | decision1 = _mm_cmpgt_pi16(_mm_sub_pi16(m2,m3),_mm_setzero_si64()); 164 | survivor0 = _mm_or_si64(_mm_and_si64(decision0,m1),_mm_andnot_si64(decision0,m0)); 165 | survivor1 = _mm_or_si64(_mm_and_si64(decision1,m3),_mm_andnot_si64(decision1,m2)); 166 | 167 | /* Merge decisions and store as bytes */ 168 | d->v[i] = _mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64())); 169 | 170 | /* Store surviving metrics */ 171 | vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1); 172 | vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1); 173 | } 174 | if(vp->new_metrics->s[0] < vp->old_metrics->s[0]) 175 | path_metric += 65536; /* Hack: wraparound probably occured */ 176 | d++; 177 | /* Swap pointers to old and new metrics */ 178 | tmp = vp->old_metrics; 179 | vp->old_metrics = vp->new_metrics; 180 | vp->new_metrics = tmp; 181 | } 182 | vp->dp = d; 183 | _mm_empty(); 184 | return path_metric; 185 | } 186 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi39_port.c: -------------------------------------------------------------------------------- 1 | /* K=9 r=1/3 Viterbi decoder in portable C 2 | * Copyright Aug 2006, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "fec.h" 9 | 10 | typedef union { unsigned int w[256]; } metric_t; 11 | typedef union { unsigned long w[8];} decision_t; 12 | 13 | static union { unsigned char c[128]; } Branchtab39[3]; 14 | static int Init = 0; 15 | 16 | /* State info for instance of Viterbi decoder */ 17 | struct v39 { 18 | metric_t metrics1; /* path metric buffer 1 */ 19 | metric_t metrics2; /* path metric buffer 2 */ 20 | decision_t *dp; /* Pointer to current decision */ 21 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 22 | decision_t *decisions; /* Beginning of decisions for block */ 23 | }; 24 | 25 | /* Initialize Viterbi decoder for start of new frame */ 26 | int init_viterbi39_port(void *p,int starting_state){ 27 | struct v39 *vp = p; 28 | int i; 29 | 30 | if(p == NULL) 31 | return -1; 32 | for(i=0;i<256;i++) 33 | vp->metrics1.w[i] = 63; 34 | 35 | vp->old_metrics = &vp->metrics1; 36 | vp->new_metrics = &vp->metrics2; 37 | vp->dp = vp->decisions; 38 | vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ 39 | return 0; 40 | } 41 | 42 | void set_viterbi39_polynomial_port(int polys[3]){ 43 | int state; 44 | 45 | for(state=0;state < 128;state++){ 46 | Branchtab39[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 47 | Branchtab39[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 48 | Branchtab39[2].c[state] = (polys[2] < 0) ^ parity((2*state) & abs(polys[2])) ? 255 : 0; 49 | } 50 | Init++; 51 | } 52 | 53 | /* Create a new instance of a Viterbi decoder */ 54 | void *create_viterbi39_port(int len){ 55 | struct v39 *vp; 56 | 57 | if(!Init){ 58 | int polys[3] = {V39POLYA,V39POLYB,V39POLYC}; 59 | set_viterbi39_polynomial_port(polys); 60 | } 61 | if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL) 62 | return NULL; 63 | 64 | if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ 65 | free(vp); 66 | return NULL; 67 | } 68 | init_viterbi39_port(vp,0); 69 | 70 | return vp; 71 | } 72 | 73 | 74 | /* Viterbi chainback */ 75 | int chainback_viterbi39_port( 76 | void *p, 77 | unsigned char *data, /* Decoded output data */ 78 | unsigned int nbits, /* Number of data bits */ 79 | unsigned int endstate){ /* Terminal encoder state */ 80 | struct v39 *vp = p; 81 | decision_t *d; 82 | 83 | if(p == NULL) 84 | return -1; 85 | 86 | d = vp->decisions; 87 | /* Make room beyond the end of the encoder register so we can 88 | * accumulate a full byte of decoded data 89 | */ 90 | endstate %= 256; 91 | 92 | /* The store into data[] only needs to be done every 8 bits. 93 | * But this avoids a conditional branch, and the writes will 94 | * combine in the cache anyway 95 | */ 96 | d += 8; /* Look past tail */ 97 | while(nbits-- != 0){ 98 | int k; 99 | 100 | k = (d[nbits].w[(endstate)/32] >> (endstate%32)) & 1; 101 | data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 102 | } 103 | return 0; 104 | } 105 | 106 | 107 | /* Delete instance of a Viterbi decoder */ 108 | void delete_viterbi39_port(void *p){ 109 | struct v39 *vp = p; 110 | 111 | if(vp != NULL){ 112 | free(vp->decisions); 113 | free(vp); 114 | } 115 | } 116 | 117 | /* C-language butterfly */ 118 | #define BFLY(i) {\ 119 | unsigned int metric,m0,m1,decision;\ 120 | metric = (Branchtab39[0].c[i] ^ sym0) + (Branchtab39[1].c[i] ^ sym1) + \ 121 | (Branchtab39[2].c[i] ^ sym2);\ 122 | m0 = vp->old_metrics->w[i] + metric;\ 123 | m1 = vp->old_metrics->w[i+128] + (765 - metric);\ 124 | decision = (signed int)(m0-m1) > 0;\ 125 | vp->new_metrics->w[2*i] = decision ? m1 : m0;\ 126 | d->w[i/16] |= decision << ((2*i)&31);\ 127 | m0 -= (metric+metric-765);\ 128 | m1 += (metric+metric-765);\ 129 | decision = (signed int)(m0-m1) > 0;\ 130 | vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\ 131 | d->w[i/16] |= decision << ((2*i+1)&31);\ 132 | } 133 | 134 | /* Update decoder with a block of demodulated symbols 135 | * Note that nbits is the number of decoded data bits, not the number 136 | * of symbols! 137 | */ 138 | 139 | int update_viterbi39_blk_port(void *p,unsigned char *syms,int nbits){ 140 | struct v39 *vp = p; 141 | decision_t *d; 142 | 143 | if(p == NULL) 144 | return -1; 145 | 146 | d = (decision_t *)vp->dp; 147 | while(nbits--){ 148 | void *tmp; 149 | unsigned char sym0,sym1,sym2; 150 | int i; 151 | 152 | for(i=0;i<8;i++) 153 | d->w[i] = 0; 154 | sym0 = *syms++; 155 | sym1 = *syms++; 156 | sym2 = *syms++; 157 | 158 | for(i=0;i<128;i++) 159 | BFLY(i); 160 | 161 | d++; 162 | tmp = vp->old_metrics; 163 | vp->old_metrics = vp->new_metrics; 164 | vp->new_metrics = tmp; 165 | } 166 | vp->dp = d; 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi615.c: -------------------------------------------------------------------------------- 1 | /* K=15 r=1/6 Viterbi decoder with optional Intel or PowerPC SIMD 2 | * Copyright Feb 2004, Phil Karn, KA9Q 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "fec.h" 8 | 9 | /* Create a new instance of a Viterbi decoder */ 10 | void *create_viterbi615(int len){ 11 | 12 | find_cpu_mode(); 13 | 14 | switch(Cpu_mode){ 15 | case PORT: 16 | default: 17 | return create_viterbi615_port(len); 18 | #ifdef __VEC__ 19 | case ALTIVEC: 20 | return create_viterbi615_av(len); 21 | #endif 22 | #ifdef __i386__ 23 | case MMX: 24 | return create_viterbi615_mmx(len); 25 | case SSE: 26 | return create_viterbi615_sse(len); 27 | case SSE2: 28 | return create_viterbi615_sse2(len); 29 | #endif 30 | } 31 | } 32 | 33 | void set_viterbi615_polynomial(int polys[6]){ 34 | 35 | switch(Cpu_mode){ 36 | case PORT: 37 | default: 38 | set_viterbi615_polynomial_port(polys); 39 | break; 40 | #ifdef __VEC__ 41 | case ALTIVEC: 42 | set_viterbi615_polynomial_av(polys); 43 | break; 44 | #endif 45 | #ifdef __i386__ 46 | case MMX: 47 | set_viterbi615_polynomial_mmx(polys); 48 | break; 49 | case SSE: 50 | set_viterbi615_polynomial_sse(polys); 51 | break; 52 | case SSE2: 53 | set_viterbi615_polynomial_sse2(polys); 54 | break; 55 | #endif 56 | } 57 | } 58 | 59 | /* Initialize Viterbi decoder for start of new frame */ 60 | int init_viterbi615(void *p,int starting_state){ 61 | switch(Cpu_mode){ 62 | case PORT: 63 | default: 64 | return init_viterbi615_port(p,starting_state); 65 | #ifdef __VEC__ 66 | case ALTIVEC: 67 | return init_viterbi615_av(p,starting_state); 68 | #endif 69 | #ifdef __i386__ 70 | case MMX: 71 | return init_viterbi615_mmx(p,starting_state); 72 | case SSE: 73 | return init_viterbi615_sse(p,starting_state); 74 | case SSE2: 75 | return init_viterbi615_sse2(p,starting_state); 76 | #endif 77 | } 78 | } 79 | 80 | /* Viterbi chainback */ 81 | int chainback_viterbi615( 82 | void *p, 83 | unsigned char *data, /* Decoded output data */ 84 | unsigned int nbits, /* Number of data bits */ 85 | unsigned int endstate){ /* Terminal encoder state */ 86 | 87 | switch(Cpu_mode){ 88 | case PORT: 89 | default: 90 | return chainback_viterbi615_port(p,data,nbits,endstate); 91 | #ifdef __VEC__ 92 | case ALTIVEC: 93 | return chainback_viterbi615_av(p,data,nbits,endstate); 94 | #endif 95 | #ifdef __i386__ 96 | case MMX: 97 | return chainback_viterbi615_mmx(p,data,nbits,endstate); 98 | case SSE: 99 | return chainback_viterbi615_sse(p,data,nbits,endstate); 100 | case SSE2: 101 | return chainback_viterbi615_sse2(p,data,nbits,endstate); 102 | #endif 103 | } 104 | } 105 | 106 | /* Delete instance of a Viterbi decoder */ 107 | void delete_viterbi615(void *p){ 108 | switch(Cpu_mode){ 109 | case PORT: 110 | default: 111 | delete_viterbi615_port(p); 112 | break; 113 | #ifdef __VEC__ 114 | case ALTIVEC: 115 | delete_viterbi615_av(p); 116 | break; 117 | #endif 118 | #ifdef __i386__ 119 | case MMX: 120 | delete_viterbi615_mmx(p); 121 | break; 122 | case SSE: 123 | delete_viterbi615_sse(p); 124 | break; 125 | case SSE2: 126 | delete_viterbi615_sse2(p); 127 | break; 128 | #endif 129 | } 130 | } 131 | 132 | /* Update decoder with a block of demodulated symbols 133 | * Note that nbits is the number of decoded data bits, not the number 134 | * of symbols! 135 | */ 136 | int update_viterbi615_blk(void *p,unsigned char syms[],int nbits){ 137 | switch(Cpu_mode){ 138 | case PORT: 139 | default: 140 | return update_viterbi615_blk_port(p,syms,nbits); 141 | #ifdef __VEC__ 142 | case ALTIVEC: 143 | return update_viterbi615_blk_av(p,syms,nbits); 144 | #endif 145 | #ifdef __i386__ 146 | case MMX: 147 | return update_viterbi615_blk_mmx(p,syms,nbits); 148 | case SSE: 149 | return update_viterbi615_blk_sse(p,syms,nbits); 150 | case SSE2: 151 | return update_viterbi615_blk_sse2(p,syms,nbits); 152 | #endif 153 | } 154 | } 155 | 156 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi615_mmx.c: -------------------------------------------------------------------------------- 1 | /* K=15 r=1/6 Viterbi decoder for x86 MMX 2 | * Mar 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | typedef union { unsigned char c[16384]; __m64 v[2048];} decision_t; 12 | typedef union { unsigned short s[16384]; __m64 v[4096];} metric_t; 13 | 14 | static union branchtab615 { unsigned short s[8192]; __m64 v[2048];} Branchtab615[6]; 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder */ 18 | struct v615 { 19 | metric_t metrics1; /* path metric buffer 1 */ 20 | metric_t metrics2; /* path metric buffer 2 */ 21 | void *dp; /* Pointer to current decision */ 22 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 23 | void *decisions; /* Beginning of decisions for block */ 24 | }; 25 | 26 | /* Initialize Viterbi decoder for start of new frame */ 27 | int init_viterbi615_mmx(void *p,int starting_state){ 28 | struct v615 *vp = p; 29 | int i; 30 | 31 | if(p == NULL) 32 | return -1; 33 | for(i=0;i<16384;i++) 34 | vp->metrics1.s[i] = 5000; 35 | 36 | vp->old_metrics = &vp->metrics1; 37 | vp->new_metrics = &vp->metrics2; 38 | vp->dp = vp->decisions; 39 | vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */ 40 | return 0; 41 | } 42 | 43 | /* Create a new instance of a Viterbi decoder */ 44 | void *create_viterbi615_mmx(int len){ 45 | struct v615 *vp; 46 | 47 | if(!Init){ 48 | int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; 49 | set_viterbi615_polynomial_mmx(polys); 50 | } 51 | 52 | if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL) 53 | return NULL; 54 | if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){ 55 | free(vp); 56 | return NULL; 57 | } 58 | init_viterbi615_mmx(vp,0); 59 | return vp; 60 | } 61 | 62 | void set_viterbi615_polynomial_mmx(int polys[6]){ 63 | int state; 64 | int i; 65 | 66 | for(state=0;state < 8192;state++){ 67 | for(i=0;i<6;i++) 68 | Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; 69 | } 70 | Init++; 71 | } 72 | 73 | /* Viterbi chainback */ 74 | int chainback_viterbi615_mmx( 75 | void *p, 76 | unsigned char *data, /* Decoded output data */ 77 | unsigned int nbits, /* Number of data bits */ 78 | unsigned int endstate){ /* Terminal encoder state */ 79 | struct v615 *vp = p; 80 | decision_t *d; 81 | 82 | if(p == NULL) 83 | return -1; 84 | 85 | d = (decision_t *)vp->decisions; 86 | 87 | endstate %= 16384; 88 | 89 | /* The store into data[] only needs to be done every 8 bits. 90 | * But this avoids a conditional branch, and the writes will 91 | * combine in the cache anyway 92 | */ 93 | d += 14; /* Look past tail */ 94 | while(nbits-- != 0){ 95 | int k; 96 | 97 | k = d[nbits].c[endstate] & 1; 98 | endstate = (k << 13) | (endstate >> 1); 99 | data[nbits>>3] = endstate >> 6; 100 | } 101 | return 0; 102 | } 103 | 104 | /* Delete instance of a Viterbi decoder */ 105 | void delete_viterbi615_mmx(void *p){ 106 | struct v615 *vp = p; 107 | 108 | if(vp != NULL){ 109 | free(vp->decisions); 110 | free(vp); 111 | } 112 | } 113 | 114 | 115 | int update_viterbi615_blk_mmx(void *p,unsigned char *syms,int nbits){ 116 | struct v615 *vp = p; 117 | decision_t *d; 118 | 119 | if(p == NULL) 120 | return -1; 121 | 122 | d = (decision_t *)vp->dp; 123 | 124 | while(nbits--){ 125 | __m64 sym0v,sym1v,sym2v,sym3v,sym4v,sym5v; 126 | void *tmp; 127 | int i; 128 | 129 | /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ 130 | sym0v = _mm_set1_pi16(syms[0]); 131 | sym1v = _mm_set1_pi16(syms[1]); 132 | sym2v = _mm_set1_pi16(syms[2]); 133 | sym3v = _mm_set1_pi16(syms[3]); 134 | sym4v = _mm_set1_pi16(syms[4]); 135 | sym5v = _mm_set1_pi16(syms[5]); 136 | syms += 6; 137 | 138 | for(i=0;i<2048;i++){ 139 | __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; 140 | 141 | /* Form branch metrics 142 | * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, 143 | * the XOR operations constitute conditional negation. 144 | * metric and m_metric (-metric) are in the range 0-1530 145 | */ 146 | m0 = _mm_add_pi16(_mm_xor_si64(Branchtab615[0].v[i],sym0v),_mm_xor_si64(Branchtab615[1].v[i],sym1v)); 147 | m1 = _mm_add_pi16(_mm_xor_si64(Branchtab615[2].v[i],sym2v),_mm_xor_si64(Branchtab615[3].v[i],sym3v)); 148 | m2 = _mm_add_pi16(_mm_xor_si64(Branchtab615[4].v[i],sym4v),_mm_xor_si64(Branchtab615[5].v[i],sym5v)); 149 | metric = _mm_add_pi16(m0,_mm_add_pi16(m1,m2)); 150 | m_metric = _mm_sub_pi16(_mm_set1_pi16(1530),metric); 151 | 152 | /* Add branch metrics to path metrics */ 153 | m0 = _mm_add_pi16(vp->old_metrics->v[i],metric); 154 | m3 = _mm_add_pi16(vp->old_metrics->v[2048+i],metric); 155 | m1 = _mm_add_pi16(vp->old_metrics->v[2048+i],m_metric); 156 | m2 = _mm_add_pi16(vp->old_metrics->v[i],m_metric); 157 | 158 | /* Compare and select 159 | * There's no packed min instruction in MMX, so we use modulo arithmetic 160 | * to form the decisions and then do the select the hard way 161 | */ 162 | decision0 = _mm_cmpgt_pi16(_mm_sub_pi16(m0,m1),_mm_setzero_si64()); 163 | decision1 = _mm_cmpgt_pi16(_mm_sub_pi16(m2,m3),_mm_setzero_si64()); 164 | survivor0 = _mm_or_si64(_mm_and_si64(decision0,m1),_mm_andnot_si64(decision0,m0)); 165 | survivor1 = _mm_or_si64(_mm_and_si64(decision1,m3),_mm_andnot_si64(decision1,m2)); 166 | 167 | /* Merge decisions and store as bytes */ 168 | d->v[i] = _mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64())); 169 | 170 | /* Store surviving metrics */ 171 | vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1); 172 | vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1); 173 | } 174 | d++; 175 | /* Swap pointers to old and new metrics */ 176 | tmp = vp->old_metrics; 177 | vp->old_metrics = vp->new_metrics; 178 | vp->new_metrics = tmp; 179 | } 180 | vp->dp = d; 181 | _mm_empty(); 182 | return 0; 183 | } 184 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/viterbi615_port.c: -------------------------------------------------------------------------------- 1 | /* K=15 r=1/6 Viterbi decoder in portable C 2 | * Copyright Mar 2004, Phil Karn, KA9Q 3 | * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "fec.h" 10 | 11 | typedef union { unsigned long w[512]; unsigned char c[2048];} decision_t; 12 | typedef union { unsigned long w[16384]; } metric_t; 13 | 14 | static union branchtab615 { unsigned long w[8192]; } Branchtab615[6] __attribute__ ((aligned(16))); 15 | static int Init = 0; 16 | 17 | /* State info for instance of Viterbi decoder */ 18 | struct v615 { 19 | metric_t metrics1; /* path metric buffer 1 */ 20 | metric_t metrics2; /* path metric buffer 2 */ 21 | decision_t *dp; /* Pointer to current decision */ 22 | metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 23 | decision_t *decisions; /* Beginning of decisions for block */ 24 | }; 25 | 26 | /* Create a new instance of a Viterbi decoder */ 27 | void *create_viterbi615_port(int len){ 28 | struct v615 *vp; 29 | 30 | if(!Init){ 31 | int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; 32 | set_viterbi615_polynomial_port(polys); 33 | } 34 | if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL) 35 | return NULL; 36 | if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){ 37 | free(vp); 38 | return NULL; 39 | } 40 | init_viterbi615(vp,0); 41 | return vp; 42 | } 43 | 44 | void set_viterbi615_polynomial_port(int polys[6]){ 45 | int state; 46 | int i; 47 | 48 | for(state=0;state < 8192;state++){ 49 | for(i=0;i<6;i++) 50 | Branchtab615[i].w[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; 51 | } 52 | Init++; 53 | } 54 | 55 | /* Initialize Viterbi decoder for start of new frame */ 56 | int init_viterbi615_port(void *p,int starting_state){ 57 | struct v615 *vp = p; 58 | int i; 59 | 60 | if(p == NULL) 61 | return -1; 62 | for(i=0;i<16384;i++) 63 | vp->metrics1.w[i] = 1000; 64 | 65 | vp->old_metrics = &vp->metrics1; 66 | vp->new_metrics = &vp->metrics2; 67 | vp->dp = vp->decisions; 68 | vp->old_metrics->w[starting_state & 16383] = 0; /* Bias known start state */ 69 | return 0; 70 | } 71 | 72 | /* Viterbi chainback */ 73 | int chainback_viterbi615_port( 74 | void *p, 75 | unsigned char *data, /* Decoded output data */ 76 | unsigned int nbits, /* Number of data bits */ 77 | unsigned int endstate){ /* Terminal encoder state */ 78 | struct v615 *vp = p; 79 | decision_t *d; 80 | 81 | if(p == NULL) 82 | return -1; 83 | d = (decision_t *)vp->decisions; 84 | endstate %= 16384; 85 | 86 | /* The store into data[] only needs to be done every 8 bits. 87 | * But this avoids a conditional branch, and the writes will 88 | * combine in the cache anyway 89 | */ 90 | d += 14; /* Look past tail */ 91 | while(nbits-- != 0){ 92 | int k; 93 | 94 | k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; 95 | endstate = (k << 13) | (endstate >> 1); 96 | data[nbits>>3] = endstate >> 6; 97 | } 98 | return 0; 99 | } 100 | 101 | /* Delete instance of a Viterbi decoder */ 102 | void delete_viterbi615_port(void *p){ 103 | struct v615 *vp = p; 104 | 105 | if(vp != NULL){ 106 | free(vp->decisions); 107 | free(vp); 108 | } 109 | } 110 | 111 | /* C-language butterfly */ 112 | #define BFLY(i) {\ 113 | unsigned long metric,m0,m1,m2,m3,decision0,decision1;\ 114 | metric = ((Branchtab615[0].w[i] ^ syms[0]) + (Branchtab615[1].w[i] ^ syms[1])\ 115 | +(Branchtab615[2].w[i] ^ syms[2]) + (Branchtab615[3].w[i] ^ syms[3])\ 116 | +(Branchtab615[4].w[i] ^ syms[4]) + (Branchtab615[5].w[i] ^ syms[5]));\ 117 | m0 = vp->old_metrics->w[i] + metric;\ 118 | m1 = vp->old_metrics->w[i+8192] + (1530 - metric);\ 119 | m2 = vp->old_metrics->w[i] + (1530-metric);\ 120 | m3 = vp->old_metrics->w[i+8192] + metric;\ 121 | decision0 = (signed long)(m0-m1) >= 0;\ 122 | decision1 = (signed long)(m2-m3) >= 0;\ 123 | vp->new_metrics->w[2*i] = decision0 ? m1 : m0;\ 124 | vp->new_metrics->w[2*i+1] = decision1 ? m3 : m2;\ 125 | d->c[i/4] |= ((decision0|(decision1<<1)) << ((2*i)&7));\ 126 | } 127 | /* Update decoder with a block of demodulated symbols 128 | * Note that nbits is the number of decoded data bits, not the number 129 | * of symbols! 130 | */ 131 | 132 | int update_viterbi615_blk_port(void *p,unsigned char *syms,int nbits){ 133 | struct v615 *vp = p; 134 | void *tmp; 135 | decision_t *d; 136 | int i; 137 | 138 | if(p == NULL) 139 | return -1; 140 | d = (decision_t *)vp->dp; 141 | while(nbits--){ 142 | memset(d,0,sizeof(decision_t)); 143 | for(i=0;i<8192;i++) 144 | BFLY(i); 145 | 146 | syms += 6; 147 | d++; 148 | /* Swap pointers to old and new metrics */ 149 | tmp = vp->old_metrics; 150 | vp->old_metrics = vp->new_metrics; 151 | vp->new_metrics = tmp; 152 | } 153 | vp->dp = d; 154 | return 0; 155 | } 156 | 157 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/vtest27.c: -------------------------------------------------------------------------------- 1 | /* Test viterbi decoder speeds */ 2 | #include "config.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef HAVE_GETOPT_H 12 | #include 13 | #endif 14 | #include "fec.h" 15 | 16 | #if HAVE_GETOPT_LONG 17 | struct option Options[] = { 18 | {"frame-length",1,NULL,'l'}, 19 | {"frame-count",1,NULL,'n'}, 20 | {"ebn0",1,NULL,'e'}, 21 | {"gain",1,NULL,'g'}, 22 | {"verbose",0,NULL,'v'}, 23 | {"force-altivec",0,NULL,'a'}, 24 | {"force-port",0,NULL,'p'}, 25 | {"force-mmx",0,NULL,'m'}, 26 | {"force-sse",0,NULL,'s'}, 27 | {"force-sse2",0,NULL,'t'}, 28 | {NULL}, 29 | }; 30 | #endif 31 | 32 | #define RATE (1./2.) 33 | #define MAXBYTES 10000 34 | 35 | double Gain = 32.0; 36 | int Verbose = 0; 37 | 38 | int main(int argc,char *argv[]){ 39 | int i,d,tr; 40 | int sr=0,trials = 10000,errcnt,framebits=2048; 41 | long long int tot_errs=0; 42 | unsigned char bits[MAXBYTES]; 43 | unsigned char data[MAXBYTES]; 44 | unsigned char xordata[MAXBYTES]; 45 | unsigned char symbols[8*2*(MAXBYTES+6)]; 46 | void *vp; 47 | extern char *optarg; 48 | struct rusage start,finish; 49 | double extime; 50 | double gain,esn0,ebn0; 51 | time_t t; 52 | int badframes=0; 53 | 54 | time(&t); 55 | srandom(t); 56 | ebn0 = -100; 57 | #if HAVE_GETOPT_LONG 58 | while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ 59 | #else 60 | while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ 61 | #endif 62 | switch(d){ 63 | case 'a': 64 | Cpu_mode = ALTIVEC; 65 | break; 66 | case 'p': 67 | Cpu_mode = PORT; 68 | break; 69 | case 'm': 70 | Cpu_mode = MMX; 71 | break; 72 | case 's': 73 | Cpu_mode = SSE; 74 | break; 75 | case 't': 76 | Cpu_mode = SSE2; 77 | break; 78 | case 'l': 79 | framebits = atoi(optarg); 80 | break; 81 | case 'n': 82 | trials = atoi(optarg); 83 | break; 84 | case 'e': 85 | ebn0 = atof(optarg); 86 | break; 87 | case 'g': 88 | Gain = atof(optarg); 89 | break; 90 | case 'v': 91 | Verbose++; 92 | break; 93 | } 94 | } 95 | if(framebits > 8*MAXBYTES){ 96 | fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); 97 | framebits = MAXBYTES*8; 98 | } 99 | if((vp = create_viterbi27(framebits)) == NULL){ 100 | printf("create_viterbi27 failed\n"); 101 | exit(1); 102 | } 103 | if(ebn0 != -100){ 104 | esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ 105 | /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing 106 | * only half the noise power, and the sqrt() converts power to 107 | * voltage. 108 | */ 109 | gain = 1./sqrt(0.5/pow(10.,esn0/10.)); 110 | 111 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 112 | 113 | for(tr=0;tr 1 && errcnt != 0){ 141 | printf("frame %d, %d errors: ",tr,errcnt); 142 | for(i=0;i 1) 154 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 155 | else if(Verbose == 0) 156 | printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n", 157 | tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials), 158 | badframes,tr+1,(double)badframes/(tr+1)); 159 | else 160 | printf("\n"); 161 | 162 | } else { 163 | /* Do time trials */ 164 | memset(symbols,127,sizeof(symbols)); 165 | printf("Starting time trials\n"); 166 | getrusage(RUSAGE_SELF,&start); 167 | for(tr=0;tr < trials;tr++){ 168 | /* Initialize Viterbi decoder */ 169 | init_viterbi27(vp,0); 170 | 171 | /* Decode block */ 172 | update_viterbi27_blk(vp,symbols,framebits); 173 | 174 | /* Do Viterbi chainback */ 175 | chainback_viterbi27(vp,data,framebits,0); 176 | } 177 | getrusage(RUSAGE_SELF,&finish); 178 | extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); 179 | printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, 180 | framebits,extime); 181 | printf("decoder speed: %g bits/s\n",trials*framebits/extime); 182 | } 183 | exit(0); 184 | } 185 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/vtest29.c: -------------------------------------------------------------------------------- 1 | /* Test viterbi decoder speeds */ 2 | #include "config.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef HAVE_GETOPT_H 12 | #include 13 | #endif 14 | #include "fec.h" 15 | 16 | #if HAVE_GETOPT_LONG 17 | struct option Options[] = { 18 | {"frame-length",1,NULL,'l'}, 19 | {"frame-count",1,NULL,'n'}, 20 | {"ebn0",1,NULL,'e'}, 21 | {"gain",1,NULL,'g'}, 22 | {"verbose",0,NULL,'v'}, 23 | {"force-altivec",0,NULL,'a'}, 24 | {"force-port",0,NULL,'p'}, 25 | {"force-mmx",0,NULL,'m'}, 26 | {"force-sse",0,NULL,'s'}, 27 | {"force-sse2",0,NULL,'t'}, 28 | {NULL}, 29 | }; 30 | #endif 31 | 32 | #define RATE (1./2.) 33 | #define MAXBYTES 10000 34 | 35 | double Gain = 32.0; 36 | int Verbose = 0; 37 | 38 | int main(int argc,char *argv[]){ 39 | int i,d,tr; 40 | int sr=0,trials = 10000,errcnt,framebits=2048; 41 | long long tot_errs=0; 42 | unsigned char bits[MAXBYTES]; 43 | unsigned char data[MAXBYTES]; 44 | unsigned char xordata[MAXBYTES]; 45 | unsigned char symbols[8*2*(MAXBYTES+8)]; 46 | void *vp; 47 | extern char *optarg; 48 | struct rusage start,finish; 49 | double extime; 50 | double gain,esn0,ebn0; 51 | time_t t; 52 | int badframes=0; 53 | 54 | time(&t); 55 | srandom(t); 56 | ebn0 = -100; 57 | #if HAVE_GETOPT_LONG 58 | while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ 59 | #else 60 | while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ 61 | #endif 62 | switch(d){ 63 | case 'a': 64 | Cpu_mode = ALTIVEC; 65 | break; 66 | case 'p': 67 | Cpu_mode = PORT; 68 | break; 69 | case 'm': 70 | Cpu_mode = MMX; 71 | break; 72 | case 's': 73 | Cpu_mode = SSE; 74 | break; 75 | case 't': 76 | Cpu_mode = SSE2; 77 | break; 78 | case 'l': 79 | framebits = atoi(optarg); 80 | break; 81 | case 'n': 82 | trials = atoi(optarg); 83 | break; 84 | case 'e': 85 | ebn0 = atof(optarg); 86 | break; 87 | case 'g': 88 | Gain = atof(optarg); 89 | break; 90 | case 'v': 91 | Verbose++; 92 | break; 93 | } 94 | } 95 | if(framebits > 8*MAXBYTES){ 96 | fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); 97 | framebits = MAXBYTES*8; 98 | } 99 | if((vp = create_viterbi29(framebits)) == NULL){ 100 | printf("create_viterbi29 failed\n"); 101 | exit(1); 102 | } 103 | if(ebn0 != -100){ 104 | esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ 105 | /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing 106 | * only half the noise power, and the sqrt() converts power to 107 | * voltage. 108 | */ 109 | gain = 1./sqrt(0.5/pow(10.,esn0/10.)); 110 | 111 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 112 | 113 | for(tr=0;tr 1 && errcnt != 0){ 141 | printf("frame %d, %d errors: ",tr,errcnt); 142 | for(i=0;i 1) 154 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 155 | else if(Verbose == 0) 156 | printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n", 157 | tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials), 158 | badframes,tr+1,(double)badframes/(tr+1)); 159 | else 160 | printf("\n"); 161 | } else { 162 | /* Do time trials */ 163 | memset(symbols,127,sizeof(symbols)); 164 | printf("Starting time trials\n"); 165 | getrusage(RUSAGE_SELF,&start); 166 | for(tr=0;tr < trials;tr++){ 167 | /* Initialize Viterbi decoder */ 168 | init_viterbi29(vp,0); 169 | 170 | /* Decode block */ 171 | update_viterbi29_blk(vp,symbols,framebits); 172 | 173 | /* Do Viterbi chainback */ 174 | chainback_viterbi29(vp,data,framebits,0); 175 | } 176 | getrusage(RUSAGE_SELF,&finish); 177 | extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); 178 | printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, 179 | framebits,extime); 180 | printf("decoder speed: %g bits/s\n",trials*framebits/extime); 181 | } 182 | exit(0); 183 | } 184 | 185 | 186 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/vtest39.c: -------------------------------------------------------------------------------- 1 | /* Test viterbi decoder speeds */ 2 | #include "config.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef HAVE_GETOPT_H 12 | #include 13 | #endif 14 | #include "fec.h" 15 | 16 | #if HAVE_GETOPT_LONG 17 | struct option Options[] = { 18 | {"frame-length",1,NULL,'l'}, 19 | {"frame-count",1,NULL,'n'}, 20 | {"ebn0",1,NULL,'e'}, 21 | {"gain",1,NULL,'g'}, 22 | {"verbose",0,NULL,'v'}, 23 | {"force-altivec",0,NULL,'a'}, 24 | {"force-port",0,NULL,'p'}, 25 | {"force-mmx",0,NULL,'m'}, 26 | {"force-sse",0,NULL,'s'}, 27 | {"force-sse2",0,NULL,'t'}, 28 | {NULL}, 29 | }; 30 | #endif 31 | 32 | #define RATE (1./3.) 33 | #define MAXBYTES 10000 34 | 35 | double Gain = 32.0; 36 | int Verbose = 0; 37 | 38 | int main(int argc,char *argv[]){ 39 | int i,d,tr; 40 | int sr=0,trials = 10000,errcnt,framebits=2048; 41 | long long tot_errs=0; 42 | unsigned char bits[MAXBYTES]; 43 | unsigned char data[MAXBYTES]; 44 | unsigned char xordata[MAXBYTES]; 45 | unsigned char symbols[8*3*(MAXBYTES+8)]; 46 | void *vp; 47 | extern char *optarg; 48 | struct rusage start,finish; 49 | double extime; 50 | double gain,esn0,ebn0; 51 | time_t t; 52 | int badframes=0; 53 | 54 | time(&t); 55 | srandom(t); 56 | ebn0 = -100; 57 | #if HAVE_GETOPT_LONG 58 | while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ 59 | #else 60 | while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ 61 | #endif 62 | switch(d){ 63 | case 'a': 64 | Cpu_mode = ALTIVEC; 65 | break; 66 | case 'p': 67 | Cpu_mode = PORT; 68 | break; 69 | case 'm': 70 | Cpu_mode = MMX; 71 | break; 72 | case 's': 73 | Cpu_mode = SSE; 74 | break; 75 | case 't': 76 | Cpu_mode = SSE2; 77 | break; 78 | case 'l': 79 | framebits = atoi(optarg); 80 | break; 81 | case 'n': 82 | trials = atoi(optarg); 83 | break; 84 | case 'e': 85 | ebn0 = atof(optarg); 86 | break; 87 | case 'g': 88 | Gain = atof(optarg); 89 | break; 90 | case 'v': 91 | Verbose++; 92 | break; 93 | } 94 | } 95 | if(framebits > 8*MAXBYTES){ 96 | fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); 97 | framebits = MAXBYTES*8; 98 | } 99 | if((vp = create_viterbi39(framebits)) == NULL){ 100 | printf("create_viterbi39 failed\n"); 101 | exit(1); 102 | } 103 | if(ebn0 != -100){ 104 | esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ 105 | /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing 106 | * only half the noise power, and the sqrt() converts power to 107 | * voltage. 108 | */ 109 | gain = 1./sqrt(0.5/pow(10.,esn0/10.)); 110 | 111 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 112 | 113 | for(tr=0;tr 1 && errcnt != 0){ 142 | printf("frame %d, %d errors: ",tr,errcnt); 143 | for(i=0;i 1) 155 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 156 | else if(Verbose == 0) 157 | printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n", 158 | tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials), 159 | badframes,tr+1,(double)badframes/(tr+1)); 160 | else 161 | printf("\n"); 162 | } else { 163 | /* Do time trials */ 164 | memset(symbols,127,sizeof(symbols)); 165 | printf("Starting time trials\n"); 166 | getrusage(RUSAGE_SELF,&start); 167 | for(tr=0;tr < trials;tr++){ 168 | /* Initialize Viterbi decoder */ 169 | init_viterbi39(vp,0); 170 | 171 | /* Decode block */ 172 | update_viterbi39_blk(vp,symbols,framebits); 173 | 174 | /* Do Viterbi chainback */ 175 | chainback_viterbi39(vp,data,framebits,0); 176 | } 177 | getrusage(RUSAGE_SELF,&finish); 178 | extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); 179 | printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, 180 | framebits,extime); 181 | printf("decoder speed: %g bits/s\n",trials*framebits/extime); 182 | } 183 | exit(0); 184 | } 185 | 186 | 187 | -------------------------------------------------------------------------------- /uat-decode/fec/fec-3.0/vtest615.c: -------------------------------------------------------------------------------- 1 | /* Test viterbi decoder speeds */ 2 | #include "config.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef HAVE_GETOPT_H 12 | #include 13 | #endif 14 | #include "fec.h" 15 | 16 | #if HAVE_GETOPT_LONG 17 | struct option Options[] = { 18 | {"frame-length",1,NULL,'l'}, 19 | {"frame-count",1,NULL,'n'}, 20 | {"ebn0",1,NULL,'e'}, 21 | {"gain",1,NULL,'g'}, 22 | {"verbose",0,NULL,'v'}, 23 | {"force-altivec",0,NULL,'a'}, 24 | {"force-port",0,NULL,'p'}, 25 | {"force-mmx",0,NULL,'m'}, 26 | {"force-sse",0,NULL,'s'}, 27 | {"force-sse2",0,NULL,'t'}, 28 | {NULL}, 29 | }; 30 | #endif 31 | 32 | #define RATE (1./6.) 33 | #define MAXBYTES 10000 34 | #define OFFSET (127.5) 35 | #define CLIP 255 36 | 37 | double Gain = 24.0; 38 | int Verbose = 0; 39 | 40 | int main(int argc,char *argv[]){ 41 | int i,d,tr; 42 | int sr=0,trials = 10,errcnt,framebits=2048; 43 | int tot_errs=0; 44 | unsigned char bits[MAXBYTES]; 45 | unsigned char data[MAXBYTES]; 46 | unsigned char xordata[MAXBYTES]; 47 | unsigned char symbols[8*6*(MAXBYTES+14)]; 48 | void *vp; 49 | extern char *optarg; 50 | struct rusage start,finish; 51 | double extime; 52 | double gain,esn0,ebn0; 53 | time_t t; 54 | int badframes=0; 55 | 56 | time(&t); 57 | srandom(t); 58 | ebn0 = -100; 59 | #if HAVE_GETOPT_LONG 60 | while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ 61 | #else 62 | while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ 63 | #endif 64 | switch(d){ 65 | case 'a': 66 | Cpu_mode = ALTIVEC; 67 | break; 68 | case 'p': 69 | Cpu_mode = PORT; 70 | break; 71 | case 'm': 72 | Cpu_mode = MMX; 73 | break; 74 | case 's': 75 | Cpu_mode = SSE; 76 | break; 77 | case 't': 78 | Cpu_mode = SSE2; 79 | break; 80 | case 'l': 81 | framebits = atoi(optarg); 82 | break; 83 | case 'n': 84 | trials = atoi(optarg); 85 | break; 86 | case 'e': 87 | ebn0 = atof(optarg); 88 | break; 89 | case 'g': 90 | Gain = atof(optarg); 91 | break; 92 | case 'v': 93 | Verbose++; 94 | break; 95 | } 96 | } 97 | if(framebits > 8*MAXBYTES){ 98 | fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); 99 | framebits = MAXBYTES*8; 100 | } 101 | if((vp = create_viterbi615(framebits)) == NULL){ 102 | printf("create_viterbi615 failed\n"); 103 | exit(1); 104 | } 105 | if(ebn0 != -100){ 106 | esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ 107 | /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing 108 | * only half the noise power, and the sqrt() converts power to 109 | * voltage. 110 | */ 111 | gain = 1./sqrt(0.5/pow(10.,esn0/10.)); 112 | 113 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 114 | 115 | for(tr=0;tr 1 && errcnt != 0){ 147 | printf("frame %d, %d errors: ",tr,errcnt); 148 | for(i=0;i 1) 162 | printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); 163 | else if(Verbose == 0) 164 | printf("BER %d/%d (%.3g) FER %d/%d (%.3g)\n", 165 | tot_errs,framebits*(tr+1),tot_errs/((double)framebits*(tr+1)), 166 | badframes,(tr+1),(double)badframes/(tr+1)); 167 | else 168 | printf("\n"); 169 | } else { 170 | /* Do time trials */ 171 | memset(symbols,127,sizeof(symbols)); 172 | printf("Starting time trials\n"); 173 | getrusage(RUSAGE_SELF,&start); 174 | for(tr=0;tr < trials;tr++){ 175 | /* Initialize Viterbi decoder */ 176 | init_viterbi615(vp,0); 177 | 178 | /* Decode block */ 179 | update_viterbi615_blk(vp,symbols,framebits+14); 180 | 181 | /* Do Viterbi chainback */ 182 | chainback_viterbi615(vp,data,framebits,0); 183 | } 184 | getrusage(RUSAGE_SELF,&finish); 185 | extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); 186 | printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, 187 | framebits,extime); 188 | printf("decoder speed: %g bits/s\n",trials*framebits/extime); 189 | } 190 | exit(0); 191 | } 192 | -------------------------------------------------------------------------------- /uat-decode/radar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # This file (C) David Carr 2012. 4 | # All rights reserved. 5 | 6 | import cairo 7 | import math 8 | import re 9 | import sys 10 | 11 | def draw(blocks_m, blocks_h): 12 | PPD = 60.0 #pixels per degree 13 | lat_start = 20 14 | lat_range = 35 15 | lon_start = 230 16 | lon_range = 70 17 | 18 | pixels_x, pixels_y = lon_range*PPD, lat_range*PPD 19 | 20 | surface = cairo.ImageSurface (cairo.FORMAT_ARGB32, pixels_x, pixels_y) 21 | ctx = cairo.Context (surface) 22 | 23 | ctx.scale (pixels_x/lon_range, -1*pixels_y/lat_range) # Normalizing the canvas 24 | ctx.translate(-lon_start, -(lat_start+lat_range)) 25 | 26 | ctx.set_source_rgb(0, 0, 0) 27 | ctx.rectangle(lon_start, lat_start, lon_range, lat_range) 28 | ctx.fill() 29 | 30 | #draw in accending resolution 31 | for block, runs, scale in blocks_m: 32 | draw_block(ctx, block, runs, scale) 33 | 34 | for block, runs, scale in blocks_h: 35 | draw_block(ctx, block, runs, scale) 36 | 37 | surface.write_to_png ("test.png") # Output to PNG 38 | 39 | def draw_block(ctx, block_num, runs, scale): 40 | blon = (block_num % 450)*0.8 41 | blat = (block_num / 450)*0.0666666667 42 | 43 | #decode runs into bins 44 | bins = [] 45 | for run in runs: 46 | length = run[0] 47 | intensity = run[1] 48 | for j in xrange(length): 49 | bins.append(intensity) 50 | assert(len(bins) == 128) 51 | 52 | #draw bins 53 | for i in xrange(128): 54 | if bins[i] > 0: 55 | lon = blon + (i%32)*0.025*scale 56 | lat = blat + 0.0666666667*scale - ((i/32)*0.0166666667*scale) 57 | if bins[i] >= 4: 58 | ctx.set_source_rgba(1, 0, 0, 1) 59 | elif bins[i] >= 3: 60 | ctx.set_source_rgba(1, 1, 0, 1) 61 | elif bins[i] >= 2: 62 | ctx.set_source_rgba(0, 1, 0, 1) 63 | else: 64 | ctx.set_source_rgba(0, 0, 1, 1) 65 | ctx.rectangle(lon, lat, 0.025*scale, 0.0166666667*scale) 66 | ctx.fill() 67 | 68 | ### MAIN 69 | filename = sys.argv[1] 70 | print "Opening file %s" % filename 71 | infile = open(filename) 72 | 73 | #Parse output 74 | lines = infile.readlines() 75 | blocks_m = [] 76 | blocks_h = [] 77 | i=0 78 | while i[\d]+)", lines[i]) 98 | if m == None: 99 | print "Wierd" 100 | continue 101 | block_num = int(m.group("num")) 102 | 103 | #ignore lat/lon 104 | i+=1 105 | 106 | #Grab runs 107 | i+=1 108 | runs = [] 109 | for j in xrange(0,128): 110 | if lines[i][0:11] == "[End block]": 111 | #print "END" 112 | if scale == 1: 113 | blocks_h.append((block_num, runs, scale)) 114 | elif scale == 5: 115 | blocks_m.append((block_num, runs, scale)) 116 | else: 117 | print "Hmm. Low resolution blocks not supported." 118 | break 119 | 120 | #print lines[i], 121 | 122 | s = "Run: (?P[\d]+)[\s]+intensity: (?P[\d]+)" 123 | m = re.search(s, lines[i]) 124 | if m: 125 | runs.append((int(m.group("rl")), int(m.group("intensity")))) 126 | else: 127 | print "Strange" 128 | pass 129 | i+=1 130 | #Bitmap encoding "empty" block 131 | else: 132 | while True: 133 | #Block number 134 | i+=1 135 | if lines[i][0:11] == "[End block]": 136 | break 137 | else: 138 | m = re.search("Block (?P[\d]+)", lines[i]) 139 | if m == None: 140 | continue 141 | block_num = int(m.group("num")) 142 | #print block_num 143 | 144 | if scale == 1: 145 | blocks_h.append((block_num, [(128, 1)], scale)) 146 | elif scale == 5: 147 | blocks_m.append((block_num, [(128, 1)], scale)) 148 | else: 149 | print "Hmm. Low resolution blocks not supported." 150 | 151 | i+=1 152 | 153 | draw(blocks_m, blocks_h) 154 | --------------------------------------------------------------------------------