├── .gitignore ├── LICENSE ├── README.md ├── borip ├── client.go ├── example │ └── rx.go └── packet.go ├── boripserver ├── devices.go └── main.go ├── dsp ├── ax25 │ ├── constants.go │ ├── crc.go │ └── hdlc.go ├── biquad.go ├── conversion.go ├── conversion_386.s ├── conversion_amd64.s ├── conversion_arm.s ├── conversion_arm64.s ├── conversion_avo_amd64.go ├── conversion_avo_amd64.s ├── conversion_test.go ├── cpu_amd64.go ├── cpu_arm.go ├── cpu_arm64.go ├── cpu_arm64_test.go ├── cpu_arm_test.go ├── cpu_x86_test.go ├── demod.go ├── demod_386.s ├── demod_amd64.s ├── demod_arm.s ├── demod_arm64.s ├── demod_test.go ├── downsample.go ├── downsample_386.s ├── downsample_amd64.s ├── downsample_arm.s ├── downsample_arm64.s ├── downsample_test.go ├── dtmf │ └── dtmf.go ├── filter.go ├── fuzz.go ├── goertzel.go ├── goertzel_test.go ├── iaca.h ├── internal │ └── cpu │ │ ├── cpu.go │ │ ├── cpu.s │ │ ├── cpu_386.go │ │ ├── cpu_amd64.go │ │ ├── cpu_arm.go │ │ ├── cpu_arm64.go │ │ ├── cpu_arm64.s │ │ ├── cpu_arm64_android.go │ │ ├── cpu_arm64_darwin.go │ │ ├── cpu_arm64_freebsd.go │ │ ├── cpu_arm64_hwcap.go │ │ ├── cpu_arm64_linux.go │ │ ├── cpu_arm64_other.go │ │ ├── cpu_mips.go │ │ ├── cpu_mips64x.go │ │ ├── cpu_mipsle.go │ │ ├── cpu_no_name.go │ │ ├── cpu_ppc64x.go │ │ ├── cpu_ppc64x_aix.go │ │ ├── cpu_ppc64x_linux.go │ │ ├── cpu_riscv64.go │ │ ├── cpu_s390x.go │ │ ├── cpu_s390x.s │ │ ├── cpu_s390x_test.go │ │ ├── cpu_test.go │ │ ├── cpu_wasm.go │ │ ├── cpu_x86.go │ │ ├── cpu_x86.s │ │ ├── cpu_x86_test.go │ │ └── export_test.go ├── interpolate.go ├── math32.go ├── math32_386.s ├── math32_amd64.s ├── math32_arm.s ├── math32_arm64.s ├── math32_test.go ├── mathfixed.go ├── mathfixed_arm.s ├── mathfixed_test.go ├── sdft.go ├── stub_windows.go ├── util.go ├── util_386.s ├── util_amd64.s ├── util_arm.s ├── util_arm64.s ├── util_test.go └── window.go └── examples ├── ax25.go ├── dtmf_file.go └── dtmf_live.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Samuel Stauffer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Software Defined Radio (SDR) package and tools for Go 2 | ----------------------------------------------------- 3 | 4 | This repo is a collection of packages and tools for working with SDR in Go. 5 | 6 | It also includes ARM assembly optimized filters and conversions which 7 | allow real-time FM demodulation on the Raspberry Pi. 8 | 9 | ### Demodulators 10 | 11 | * FM (polar disciminator) 12 | * AFSK 13 | 14 | ### Decoders 15 | 16 | * AX.25 17 | * DTMF 18 | 19 | ### Other Algorithms 20 | 21 | * Goertzel 22 | * Sliding DFT 23 | 24 | ### Clients & Servers 25 | 26 | * borip compatible client and server 27 | 28 | ## Filter design 29 | 30 | Go packages for filter design: 31 | 32 | - [Parks-McClellan aka Remez](https://github.com/samuel/go-remez) 33 | 34 | ## SDR Hardware Interfaces 35 | 36 | Go packages to utilize SDR hardware: 37 | 38 | - [RTL-SDR](https://github.com/samuel/go-rtlsdr) 39 | - [HackRF](https://github.com/samuel/go-hackrf) 40 | -------------------------------------------------------------------------------- /borip/client.go: -------------------------------------------------------------------------------- 1 | package borip 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "fmt" 7 | "log" 8 | "net" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | var ErrUnexpectedResponse = errors.New("borip: unexpected resposne from server") 14 | 15 | type ErrResponse struct { 16 | errorType string 17 | msg string 18 | } 19 | 20 | func (e ErrResponse) Error() string { 21 | return fmt.Sprintf("borip: %s %s", e.errorType, e.msg) 22 | } 23 | 24 | func makeErrorResponse(parts []string) ErrResponse { 25 | if len(parts) == 1 { 26 | return ErrResponse{parts[0], ""} 27 | } 28 | return ErrResponse{parts[0], strings.Join(parts[1:], " ")} 29 | } 30 | 31 | type Device struct { 32 | Name, Serial string 33 | MinGain, MaxGain, GainStep float64 34 | FPGAFreq float64 // Hz 35 | SamplesPerPacket int // complex 4-byte samples (16-bit I/Q) per packet 36 | ValidAntennas []string 37 | } 38 | 39 | func parseDeviceString(st string) (*Device, error) { 40 | // Terratec NOXON (rev 3)|-5.000000|30.000000|1.000000|3200000.000000|16256|(Default)|Terratec NOXON (rev 3) 41 | deviceParts := strings.Split(st, "|") 42 | if len(deviceParts) < 8 { 43 | return nil, ErrUnexpectedResponse 44 | } 45 | d := &Device{ 46 | Name: deviceParts[0], 47 | Serial: deviceParts[7], 48 | } 49 | var err error 50 | if d.MinGain, err = strconv.ParseFloat(deviceParts[1], 64); err != nil { 51 | return nil, err 52 | } 53 | if d.MaxGain, err = strconv.ParseFloat(deviceParts[2], 64); err != nil { 54 | return nil, err 55 | } 56 | if d.GainStep, err = strconv.ParseFloat(deviceParts[3], 64); err != nil { 57 | return nil, err 58 | } 59 | if d.FPGAFreq, err = strconv.ParseFloat(deviceParts[4], 64); err != nil { 60 | return nil, err 61 | } 62 | if val, err := strconv.ParseInt(deviceParts[5], 10, 32); err != nil { 63 | return nil, err 64 | } else { 65 | d.SamplesPerPacket = int(val) 66 | } 67 | d.ValidAntennas = strings.Split(deviceParts[6], ",") 68 | return d, nil 69 | } 70 | 71 | type BorIP struct { 72 | conn net.Conn 73 | rd *bufio.Reader 74 | wr *bufio.Writer 75 | running bool // After a successful "GO" is sent to the server 76 | 77 | device *Device 78 | } 79 | 80 | func Dial(addr string) (*BorIP, error) { 81 | conn, err := net.Dial("tcp", addr) 82 | if err != nil { 83 | return nil, err 84 | } 85 | bor := &BorIP{ 86 | conn: conn, 87 | rd: bufio.NewReader(conn), 88 | wr: bufio.NewWriter(conn), 89 | } 90 | line, err := bor.rd.ReadString('\n') 91 | if err != nil { 92 | return nil, err 93 | } 94 | line = strings.TrimSpace(line) 95 | if line != "DEVICE -" { 96 | log.Printf("Unexpected hello from server: %s", line) 97 | } 98 | return bor, nil 99 | } 100 | 101 | func (bor *BorIP) SelectDevice(hint string) (*Device, error) { 102 | res, err := bor.command("DEVICE", hint) 103 | if err != nil { 104 | return nil, err 105 | } 106 | resParts := strings.SplitN(res, " ", 2) 107 | if resParts[0] != "DEVICE" || len(resParts) < 2 { 108 | return nil, ErrUnexpectedResponse 109 | } 110 | if resParts[1][0] == '-' { 111 | if len(resParts[1]) == 1 { 112 | // Probably selected ! which is not an error. Just deselects the device. 113 | bor.device = nil 114 | return nil, nil 115 | } 116 | return nil, errors.New("borip: " + strings.TrimSpace(resParts[1][1:])) 117 | } 118 | dev, err := parseDeviceString(resParts[1]) 119 | if err != nil { 120 | return nil, err 121 | } 122 | bor.device = dev 123 | return dev, nil 124 | } 125 | 126 | func (bor *BorIP) Device() *Device { 127 | return bor.device 128 | } 129 | 130 | func (bor *BorIP) SetFrequency(freq float64) (targetIF, actualIF, targetDDC, actualDDC float64, err error) { 131 | res, e := bor.command("FREQ", strconv.FormatFloat(freq, 'f', -1, 64)) 132 | if e != nil { 133 | err = e 134 | return 135 | } 136 | resParts, e := parseAndCheck(res, 1) 137 | if e != nil { 138 | err = e 139 | return 140 | } 141 | if len(resParts) >= 3 { 142 | targetIF, _ = strconv.ParseFloat(resParts[2], 64) 143 | } 144 | if len(resParts) >= 4 { 145 | actualIF, _ = strconv.ParseFloat(resParts[3], 64) 146 | } 147 | if len(resParts) >= 5 { 148 | targetDDC, _ = strconv.ParseFloat(resParts[4], 64) 149 | } 150 | if len(resParts) >= 6 { 151 | actualDDC, _ = strconv.ParseFloat(resParts[5], 64) 152 | } 153 | return 154 | } 155 | 156 | func (bor *BorIP) Frequency() (float64, error) { 157 | res, err := bor.command("FREQ") 158 | if err != nil { 159 | return 0.0, err 160 | } 161 | if len(res) < 6 { 162 | return 0.0, ErrUnexpectedResponse 163 | } 164 | return strconv.ParseFloat(res[5:], 64) 165 | } 166 | 167 | func (bor *BorIP) SetAntenna(ant string) error { 168 | res, err := bor.command("ANTENNA", ant) 169 | if err != nil { 170 | return err 171 | } 172 | _, err = parseAndCheck(res, 1) 173 | return err 174 | } 175 | 176 | func (bor *BorIP) Antenna() (string, error) { 177 | res, err := bor.command("ANTENNA") 178 | if err != nil { 179 | return "", err 180 | } 181 | if len(res) < 8 { 182 | return "", ErrUnexpectedResponse 183 | } 184 | return res[8:], nil 185 | } 186 | 187 | // Return the actual sampling rate (closest) 188 | func (bor *BorIP) SetRate(rate float64) (float64, error) { 189 | res, err := bor.command("RATE", strconv.FormatFloat(rate, 'f', -1, 64)) 190 | if err != nil { 191 | return 0.0, err 192 | } 193 | parts, err := parseAndCheck(res, 1) 194 | if err != nil { 195 | return 0.0, err 196 | } 197 | return strconv.ParseFloat(parts[2], 64) 198 | } 199 | 200 | func (bor *BorIP) Rate() (float64, error) { 201 | res, err := bor.command("RATE") 202 | if err != nil { 203 | return 0.0, err 204 | } 205 | if len(res) < 6 { 206 | return 0.0, ErrUnexpectedResponse 207 | } 208 | return strconv.ParseFloat(res[5:], 64) 209 | } 210 | 211 | // Return the actual gain (closest) 212 | func (bor *BorIP) SetGain(rate float64) error { 213 | res, err := bor.command("GAIN", strconv.FormatFloat(rate, 'f', -1, 64)) 214 | if err != nil { 215 | return err 216 | } 217 | _, err = parseAndCheck(res, 1) 218 | return err 219 | } 220 | 221 | func (bor *BorIP) Gain() (float64, error) { 222 | res, err := bor.command("GAIN") 223 | if err != nil { 224 | return 0.0, err 225 | } 226 | if len(res) < 6 { 227 | return 0.0, ErrUnexpectedResponse 228 | } 229 | return strconv.ParseFloat(res[5:], 64) 230 | } 231 | 232 | func (bor *BorIP) SetDestination(dest string) (string, error) { 233 | res, err := bor.command("DEST", dest) 234 | if err != nil { 235 | return "", err 236 | } 237 | parts, err := parseAndCheck(res, 2) 238 | if err != nil { 239 | return "", err 240 | } 241 | return parts[2], nil 242 | } 243 | 244 | func (bor *BorIP) Destination() (string, error) { 245 | res, err := bor.command("DEST") 246 | if err != nil { 247 | return "", err 248 | } 249 | if len(res) < 5 { 250 | return "", ErrUnexpectedResponse 251 | } 252 | return res[5:], nil 253 | } 254 | 255 | func (bor *BorIP) SetHeaderEnabled(enabled bool) error { 256 | enabledStr := "OFF" 257 | if enabled { 258 | enabledStr = "ON" 259 | } 260 | res, err := bor.command("HEADER", enabledStr) 261 | if err != nil { 262 | return err 263 | } 264 | _, err = parseAndCheck(res, 1) 265 | return err 266 | } 267 | 268 | func (bor *BorIP) HeaderEnabled() (bool, error) { 269 | res, err := bor.command("HEADER") 270 | if err != nil { 271 | return false, err 272 | } 273 | if len(res) < 8 { 274 | return false, ErrUnexpectedResponse 275 | } 276 | switch res[7:] { 277 | case "ON": 278 | return true, nil 279 | case "OFF": 280 | return false, nil 281 | } 282 | return false, ErrUnexpectedResponse 283 | } 284 | 285 | func (bor *BorIP) Go() error { 286 | res, err := bor.command("GO") 287 | if err != nil { 288 | return err 289 | } 290 | _, err = parseAndCheck(res, 1) 291 | if err == nil { 292 | bor.running = true 293 | } 294 | return err 295 | } 296 | 297 | func (bor *BorIP) Stop() error { 298 | res, err := bor.command("STOP") 299 | if err != nil { 300 | return err 301 | } 302 | _, err = parseAndCheck(res, 1) 303 | if err == nil { 304 | bor.running = false 305 | } 306 | return err 307 | } 308 | 309 | func parseAndCheck(res string, minArgs int) ([]string, error) { 310 | resParts := strings.Split(res, " ") 311 | if len(resParts) < 2 { 312 | return nil, ErrUnexpectedResponse 313 | } 314 | if resParts[1] != "OK" { 315 | return nil, makeErrorResponse(resParts[1:]) 316 | } 317 | if len(resParts) < 1+minArgs { 318 | return nil, ErrUnexpectedResponse 319 | } 320 | return resParts, nil 321 | } 322 | 323 | func (bor *BorIP) command(cmd string, args ...string) (string, error) { 324 | argString := "" 325 | if len(args) > 0 { 326 | argString = " " + strings.Join(args, " ") 327 | } 328 | if _, err := bor.wr.WriteString(cmd + argString + "\n"); err != nil { 329 | return "", err 330 | } 331 | if err := bor.wr.Flush(); err != nil { 332 | return "", err 333 | } 334 | line, err := bor.rd.ReadString('\n') 335 | return strings.TrimSpace(line), err 336 | } 337 | 338 | func (bor *BorIP) Close() { 339 | if bor.running { 340 | bor.Stop() 341 | } 342 | if bor.device != nil { 343 | bor.SelectDevice("!") 344 | } 345 | bor.conn.Close() 346 | } 347 | -------------------------------------------------------------------------------- /borip/example/rx.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "log" 7 | "math" 8 | "net" 9 | "os" 10 | "time" 11 | 12 | "github.com/samuel/go-dsp/borip" 13 | ) 14 | 15 | func polarDiscriminant(a, b complex128) float64 { 16 | c := a * complex(real(b), -imag(b)) 17 | angle := math.Atan2(imag(c), real(c)) 18 | return angle / math.Pi 19 | } 20 | 21 | func main() { 22 | bor, err := borip.Dial("127.0.0.1:28888") 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | defer bor.Close() 27 | 28 | dev, err := bor.SelectDevice("RTL") 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | fmt.Fprintf(os.Stderr, "%+v\n", dev) 33 | 34 | freq := 484.7e6 35 | // freq := 162.4e6 36 | // freq := 92.7e6 37 | 38 | targetIF, actualIF, _, _, err := bor.SetFrequency(freq) 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | queryFreq, err := bor.Frequency() 43 | if err != nil { 44 | log.Fatal(err) 45 | } 46 | fmt.Fprintf(os.Stderr, "Freq: %f %f (%f)\n", targetIF, actualIF, queryFreq) 47 | if err := bor.SetAntenna(dev.ValidAntennas[0]); err != nil { 48 | log.Fatal(err) 49 | } 50 | // ant, err := bor.Antenna() 51 | // if err != nil { 52 | // log.Fatal(err) 53 | // } 54 | // fmt.Printf("Antenna: %s\n", ant) 55 | 56 | rate := 1.0e6 57 | actualRate, err := bor.SetRate(rate) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | fmt.Fprintf(os.Stderr, "Actual rate: %f\n", actualRate) 62 | // actualRate, err = bor.Rate() 63 | // if err != nil { 64 | // log.Fatal(err) 65 | // } 66 | // fmt.Printf("Actual rate: %f\n", actualRate) 67 | 68 | // err = bor.SetGain(-1.0) 69 | // if err != nil { 70 | // log.Fatal(err) 71 | // } 72 | // gain, err := bor.Gain() 73 | // if err != nil { 74 | // log.Fatal(err) 75 | // } 76 | // fmt.Printf("Gain: %f\n", gain) 77 | // dest, err := bor.Destination() 78 | // if err != nil { 79 | // log.Fatal(err) 80 | // } 81 | // fmt.Printf("Destination: %s\n", dest) 82 | // if err := bor.SetHeaderEnabled(true); err != nil { 83 | // log.Fatal(err) 84 | // } 85 | headers, err := bor.HeaderEnabled() 86 | if err != nil { 87 | log.Fatal(err) 88 | } 89 | fmt.Fprintf(os.Stderr, "Header enabled: %+v\n", headers) 90 | 91 | // 92 | 93 | dest, err := bor.SetDestination("127.0.0.1:2288") 94 | if err != nil { 95 | log.Fatal(err) 96 | } 97 | fmt.Fprintf(os.Stderr, "Destination: %s\n", dest) 98 | 99 | addr, err := net.ResolveUDPAddr("udp", dest) 100 | if err != nil { 101 | log.Fatal(err) 102 | } 103 | conn, err := net.ListenUDP("udp", addr) 104 | if err != nil { 105 | log.Fatal(err) 106 | } 107 | defer conn.Close() 108 | 109 | // rd := borip.NewPacketReader(conn, headers) 110 | // go func() { 111 | // samples := make([]complex128, 65536) 112 | // // lastIQ := complex(float64(0.0), float64(0.0)) 113 | // lastT := time.Now() 114 | // for { 115 | // n, err := rd.ReadSamples(samples) 116 | // if err != nil { 117 | // log.Fatal(err) 118 | // } 119 | // if headers { 120 | // fmt.Fprintf(os.Stderr, "%+v\n", rd.Header()) 121 | // } 122 | // t := time.Now() 123 | // rate := float64(n) / (float64(t.Sub(lastT).Nanoseconds()) / 1e9) 124 | // fmt.Printf("Actual rate: %f\n", rate) 125 | // lastT = t 126 | 127 | // // for _, iq := range samples { 128 | // // pcm := polarDiscriminant(iq, lastIQ) 129 | // // lastIQ = iq 130 | // // _ = pcm 131 | // // // fmt.Printf(" %f", pcm) 132 | // // pcm16 := int16(pcm * 16384) 133 | // // binary.Write(os.Stdout, binary.LittleEndian, pcm16) 134 | // // } 135 | // // fmt.Println() 136 | 137 | // // fm->pre_r = fm->signal[fm->signal_len - 2]; 138 | // // fm->pre_j = fm->signal[fm->signal_len - 1]; 139 | // // fm->signal2_len = fm->signal_len/2; 140 | // // fmt.Printf("%d:", n) 141 | // // if n > 4 { 142 | // // n = 4 143 | // // } 144 | // // for i := 0; i < n; i++ { 145 | // // fmt.Printf(" %+v", samples[i]) 146 | // // } 147 | // // fmt.Println() 148 | // } 149 | // }() 150 | 151 | rd := borip.NewPacketReader(conn, headers) 152 | go func() { 153 | wr, err := os.Create("samples.bin") 154 | if err != nil { 155 | log.Fatal(err) 156 | } 157 | defer wr.Close() 158 | samples := make([]complex128, 65536) 159 | for { 160 | n, err := rd.ReadSamples(samples) 161 | if err != nil { 162 | log.Fatal(err) 163 | } 164 | for i := 0; i < n; i++ { 165 | if err := binary.Write(wr, binary.LittleEndian, float32(real(samples[i]))); err != nil { 166 | log.Fatal(err) 167 | } 168 | if err := binary.Write(wr, binary.LittleEndian, float32(imag(samples[i]))); err != nil { 169 | log.Fatal(err) 170 | } 171 | } 172 | if err := wr.Sync(); err != nil { 173 | log.Fatal(err) 174 | } 175 | } 176 | }() 177 | 178 | bor.Go() 179 | 180 | time.Sleep(time.Second * 30) 181 | 182 | bor.Close() 183 | } 184 | -------------------------------------------------------------------------------- /borip/packet.go: -------------------------------------------------------------------------------- 1 | package borip 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "net" 7 | ) 8 | 9 | const ( 10 | defaultBufferSize = 256 * 1024 11 | packetHeaderSize = 4 12 | ) 13 | 14 | var ErrShortPacket = errors.New("borip: short packet") 15 | 16 | const ( 17 | FlagNone = 0x00 18 | FlagHardwareOverrun = 0x01 // Used at hardware interface 19 | FlagNetworkOverrun = 0x02 // Used at client (network too slow) 20 | FlagBufferOverrun = 0x04 // Used at client (client consumer too slow) 21 | FlagEmptyPayload = 0x08 // Reserved 22 | FlagStreamStart = 0x10 // Used for first packet of newly started stream 23 | FlagStremEnd = 0x20 // Reserved (TO DO: Server sends BF_EMPTY_PAYLOAD | BF_STREAM_END) 24 | FlagBufferUnderrun = 0x40 // Used at hardware interface 25 | FlagHardwareTimeout = 0x80 // Used at hardware interface 26 | ) 27 | 28 | type PacketHeader struct { 29 | Flags byte 30 | Notification byte // Reserved (currently 0) 31 | Idx uint16 // Sequence number (incremented each time a packet is sent, used by client to count dropped packets) 32 | } 33 | 34 | type PacketReader struct { 35 | conn net.PacketConn 36 | buf []byte 37 | bufI, bufN int 38 | withHeaders bool 39 | header PacketHeader 40 | } 41 | 42 | func NewPacketReader(conn net.PacketConn, withHeaders bool) *PacketReader { 43 | return &PacketReader{ 44 | conn: conn, 45 | buf: make([]byte, defaultBufferSize), 46 | withHeaders: withHeaders, 47 | } 48 | } 49 | 50 | func (rd *PacketReader) Header() PacketHeader { 51 | return rd.header 52 | } 53 | 54 | func (rd *PacketReader) ReadSamples(samples []complex128) (int, error) { 55 | if rd.bufI >= rd.bufN { 56 | n, _, err := rd.conn.ReadFrom(rd.buf) 57 | if err != nil { 58 | return 0, err 59 | } 60 | rd.bufI = 0 61 | rd.bufN = n 62 | if rd.withHeaders { 63 | if n < packetHeaderSize { 64 | return 0, ErrShortPacket 65 | } 66 | rd.header.Flags = rd.buf[rd.bufI] 67 | rd.header.Notification = rd.buf[rd.bufI+1] 68 | rd.header.Idx = binary.LittleEndian.Uint16(rd.buf[2:4]) 69 | rd.bufI += 4 70 | } 71 | // rd.bufN = n - (n & 7) 72 | } 73 | idx := 0 74 | for rd.bufI < rd.bufN { 75 | iReal := int16(binary.LittleEndian.Uint16(rd.buf[rd.bufI : rd.bufI+2])) 76 | qQmag := int16(binary.LittleEndian.Uint16(rd.buf[rd.bufI+2 : rd.bufI+4])) 77 | samples[idx] = complex(float64(iReal), float64(qQmag)) 78 | idx++ 79 | rd.bufI += 4 80 | } 81 | return idx, nil 82 | } 83 | -------------------------------------------------------------------------------- /boripserver/devices.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // TODO: 4 | // * keep device list up to date (handle inserting/removing new devices) 5 | 6 | import ( 7 | "errors" 8 | "log" 9 | "sync" 10 | 11 | "github.com/samuel/go-rtlsdr/rtl" 12 | ) 13 | 14 | var ( 15 | ErrDeviceNotAvailable = errors.New("device not available") 16 | ) 17 | 18 | type device struct { 19 | name string 20 | rtlIndex int 21 | 22 | mutex sync.RWMutex 23 | rtlDev *rtl.Device 24 | inUse bool 25 | sendCloseChan chan bool 26 | } 27 | 28 | var ( 29 | defaultDevice string 30 | 31 | devicesMutex sync.RWMutex 32 | devices map[string]*device 33 | ) 34 | 35 | func init() { 36 | devices = make(map[string]*device) 37 | 38 | count := rtl.GetDeviceCount() 39 | for i := 0; i < count; i++ { 40 | name := rtl.GetDeviceName(i) 41 | if name == "" { 42 | log.Printf("RTL returned a blank name for index %d", i) 43 | } else { 44 | // TODO: handle non-unique device names 45 | if defaultDevice == "" { 46 | defaultDevice = name 47 | } 48 | devices[name] = &device{ 49 | name: name, 50 | rtlIndex: i, 51 | } 52 | } 53 | } 54 | } 55 | 56 | func deviceList() []*device { 57 | devicesMutex.RLock() 58 | defer devicesMutex.RUnlock() 59 | 60 | devs := make([]*device, 0, len(devices)) 61 | for _, dev := range devices { 62 | dev.mutex.Lock() 63 | if !dev.inUse { 64 | devs = append(devs, dev) 65 | } 66 | dev.mutex.Unlock() 67 | } 68 | return devs 69 | } 70 | 71 | func (dev *device) open() error { 72 | dev.mutex.Lock() 73 | defer dev.mutex.Unlock() 74 | if dev.inUse { 75 | return ErrDeviceNotAvailable 76 | } 77 | rdev, err := rtl.Open(dev.rtlIndex) 78 | if err != nil { 79 | return err 80 | } 81 | dev.inUse = true 82 | dev.rtlDev = rdev 83 | return nil 84 | } 85 | 86 | func (dev *device) close() { 87 | dev.mutex.Lock() 88 | defer dev.mutex.Unlock() 89 | if dev.rtlDev == nil { 90 | return 91 | } 92 | dev.rtlDev.Close() 93 | dev.rtlDev = nil 94 | dev.inUse = false 95 | } 96 | -------------------------------------------------------------------------------- /dsp/ax25/constants.go: -------------------------------------------------------------------------------- 1 | package ax25 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type PID byte 8 | 9 | const ( 10 | ISO8208CCITTX25PLP PID = 0x01 // ISO 8208/CCITT X.25 PLP 11 | CompressedTCPIP PID = 0x06 // Compressed TCP/IP. RFC 1144 12 | UncompressedTCPIP PID = 0x07 // Uncompressed TCP/IP 13 | SegmentationFragment PID = 0x08 // Segmentation fragment 14 | TEXNETDatagramProtocol PID = 0xc3 // TEXNET database protocol 15 | LinkQualityProtocol PID = 0xc4 // Link Quality Protocol 16 | AppleTalk PID = 0xca // Appletalk 17 | AppletalkARP PID = 0xcb // Appletalk ARP 18 | ARPAInternetProtocol PID = 0xcc // ARPA Internet Protocol 19 | ARPAAddressResolution PID = 0xcd // ARPA Address Resolution 20 | FlexNet PID = 0xce // FlexNet 21 | NETROM PID = 0xcf // NET/ROM 22 | NoLayer3Protocol PID = 0xf0 // No Layer 3 Protocol Implemented 23 | ) 24 | 25 | var pidToString = map[PID]string{ 26 | ISO8208CCITTX25PLP: "ISO 8208/CCITT X.25 PLP", 27 | CompressedTCPIP: "Compressed TCP/IP. RFC 1144", 28 | UncompressedTCPIP: "Uncompressed TCP/IP", 29 | SegmentationFragment: "Segmentation fragment", 30 | TEXNETDatagramProtocol: "TEXNET database protocol", 31 | LinkQualityProtocol: "Link Quality Protocol", 32 | AppleTalk: "Appletalk", 33 | AppletalkARP: "Appletalk ARP", 34 | ARPAInternetProtocol: "ARPA Internet Protocol", 35 | ARPAAddressResolution: "ARPA Address Resolution", 36 | FlexNet: "FlexNet", 37 | NETROM: "NET/ROM", 38 | NoLayer3Protocol: "No Layer 3 Protocol Implemented", 39 | } 40 | 41 | func (pid PID) String() string { 42 | if s := pidToString[pid]; s != "" { 43 | return s 44 | } 45 | return fmt.Sprintf("%02x", int(pid)) 46 | } 47 | 48 | type FrameType byte 49 | 50 | const ( 51 | IFrame FrameType = 0 // Information frame 52 | SFrame FrameType = 1 // Supervisory frame 53 | UFrame FrameType = 2 // Unnumbered frame 54 | ) 55 | 56 | func (t FrameType) String() string { 57 | switch t { 58 | case IFrame: 59 | return "I" 60 | case SFrame: 61 | return "S" 62 | case UFrame: 63 | return "U" 64 | } 65 | return fmt.Sprintf("%02x", int(t)) 66 | } 67 | 68 | type UnnumberedType byte 69 | 70 | const ( 71 | SABME UnnumberedType = 0x6f // Set Async Balanced Mode 72 | SABM UnnumberedType = 0x2f // Set Async Balanced Mode 73 | DISC UnnumberedType = 0x43 // Disconnect 74 | DM UnnumberedType = 0x0f // Disconnect Mode 75 | UA UnnumberedType = 0x63 // Unnumbered Acknowledge 76 | FRMR UnnumberedType = 0x87 // Frame Reject 77 | UI UnnumberedType = 0x03 // Unnumbered Information 78 | XID UnnumberedType = 0xaf // Exchange Identification 79 | TEST UnnumberedType = 0xe3 // Test 80 | ) 81 | 82 | var ( 83 | UnnumberedTypeName = map[UnnumberedType]string{ 84 | SABME: "SABME", 85 | SABM: "SABM", 86 | DISC: "DISC", 87 | DM: "DM", 88 | UA: "UA", 89 | FRMR: "FRMR", 90 | UI: "UI", 91 | XID: "XID", 92 | TEST: "TEST", 93 | } 94 | ) 95 | 96 | func (t UnnumberedType) String() string { 97 | if s := UnnumberedTypeName[t]; s != "" { 98 | return s 99 | } 100 | return fmt.Sprintf("%02x", int(t)) 101 | } 102 | 103 | type SupervisoryType byte 104 | 105 | const ( 106 | RR SupervisoryType = 0x1 // Receive Ready 107 | RNR SupervisoryType = 0x5 // Receive Not Ready 108 | REJ SupervisoryType = 0x9 // Reject 109 | SREJ SupervisoryType = 0xd // Selective Reject 110 | ) 111 | 112 | func (t SupervisoryType) String() string { 113 | switch t { 114 | case RR: 115 | return "RR" 116 | case RNR: 117 | return "RNR" 118 | case REJ: 119 | return "REJ" 120 | case SREJ: 121 | return "SREJ" 122 | } 123 | return fmt.Sprintf("%02x", int(t)) 124 | } 125 | -------------------------------------------------------------------------------- /dsp/ax25/crc.go: -------------------------------------------------------------------------------- 1 | package ax25 2 | 3 | var crcCcittTable = []uint16{ 4 | 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, 5 | 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, 6 | 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, 7 | 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, 8 | 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, 9 | 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, 10 | 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, 11 | 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, 12 | 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, 13 | 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, 14 | 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, 15 | 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, 16 | 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, 17 | 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, 18 | 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, 19 | 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, 20 | 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, 21 | 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, 22 | 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, 23 | 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, 24 | 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, 25 | 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, 26 | 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, 27 | 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, 28 | 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, 29 | 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, 30 | 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, 31 | 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, 32 | 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, 33 | 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, 34 | 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, 35 | 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78, 36 | } 37 | 38 | func checkCrcCcitt(buf []byte) bool { 39 | var crc uint16 = 0xffff 40 | 41 | for _, b := range buf { 42 | crc = (crc >> 8) ^ crcCcittTable[(crc^uint16(b))&0xff] 43 | } 44 | return (crc & 0xffff) == 0xf0b8 45 | } 46 | -------------------------------------------------------------------------------- /dsp/ax25/hdlc.go: -------------------------------------------------------------------------------- 1 | package ax25 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Address struct { 8 | Callsign string 9 | SSID int // Secondary Station ID 10 | } 11 | 12 | func (a Address) String() string { 13 | return fmt.Sprintf("%s-%d", a.Callsign, a.SSID) 14 | } 15 | 16 | type Frame struct { 17 | Source Address 18 | Destination Address 19 | Repeaters []Address 20 | V1 bool 21 | Command bool // command=true, response=false 22 | Type FrameType 23 | SendSeq, RecvSeq int 24 | PollFinal bool // P/F of 1 is true, 0 is false 25 | UnnumberedType UnnumberedType 26 | SupervisoryType SupervisoryType 27 | PID PID // Protocol Identifier 28 | Info []byte 29 | } 30 | 31 | type AX25 struct { 32 | bitstream byte 33 | inFrame bool 34 | rxBitI int 35 | rxBits byte 36 | rxBuf []byte 37 | maxBufferSize int 38 | } 39 | 40 | func NewDecoder() *AX25 { 41 | return &AX25{ 42 | rxBuf: make([]byte, 0, 512), 43 | maxBufferSize: 512, 44 | } 45 | } 46 | 47 | func parseAddress(buf []byte) Address { 48 | i := 0 49 | for ; i < 6; i++ { 50 | buf[i] >>= 1 51 | if buf[i] == 0x20 { 52 | break 53 | } 54 | } 55 | return Address{Callsign: string(buf[:i]), SSID: int((buf[6] >> 1) & 0xf)} 56 | } 57 | 58 | func (ax *AX25) processFrame() *Frame { 59 | if len(ax.rxBuf) < 10 { 60 | return nil 61 | } 62 | 63 | if !checkCrcCcitt(ax.rxBuf) { 64 | return nil 65 | } 66 | 67 | buf := ax.rxBuf[:len(ax.rxBuf)-2] 68 | 69 | // for i, b := range buf { 70 | // fmt.Printf("%d %08b\n", i, b) 71 | // } 72 | 73 | frame := Frame{ 74 | V1: true, 75 | Command: false, // command (true) or response (false) 76 | } 77 | 78 | if buf[1]&1 > 0 { 79 | // FlexNet Header Compression 80 | frame.V1 = false 81 | frame.Command = (buf[1] & 2) != 0 82 | var dest []byte 83 | if i := (buf[2] >> 2) & 0x2f; i != 0 { 84 | dest = append(dest, i+0x20) 85 | } 86 | if i := (buf[2] << 4) | ((buf[3]>>4)&0xf)&0x3f; i != 0 { 87 | dest = append(dest, i+0x20) 88 | } 89 | if i := (buf[3] << 2) | ((buf[4]>>6)&3)&0x3f; i != 0 { 90 | dest = append(dest, i+0x20) 91 | } 92 | if i := buf[4] & 0x3f; i != 0 { 93 | dest = append(dest, i+0x20) 94 | } 95 | if i := (buf[5] >> 2) & 0x3f; i != 0 { 96 | dest = append(dest, i+0x20) 97 | } 98 | if i := ((buf[5] << 4) | ((buf[6] >> 4) & 0xf)) & 0x3f; i != 0 { 99 | dest = append(dest, i+0x20) 100 | } 101 | if dest != nil { 102 | frame.Destination = Address{ 103 | Callsign: string(dest), 104 | SSID: int(buf[6] & 0xf), 105 | } 106 | } 107 | // TODO 108 | // fmt.Printf("%s QSO Nr %u", frame.Destination, (buf[0]<<6)|(buf[1]>>2)) 109 | buf = buf[7:] 110 | } else { 111 | // Normal Header 112 | if len(buf) < 15 { 113 | return nil 114 | } 115 | 116 | // 6.1.2. Command/Response Procedure 117 | // dest SSID high bit : buf[6]&0x80 -> C bit of AX.25 frame 118 | // src SSID high bit : buf[13]&0x80 -> C bit of LA PA frame 119 | if buf[6]&0x80 != buf[13]&0x80 { 120 | frame.V1 = false 121 | frame.Command = int(ax.rxBuf[6]&0x80) != 0 122 | } 123 | 124 | frame.Destination = parseAddress(buf[:7]) 125 | frame.Source = parseAddress(buf[7:14]) 126 | 127 | o := 14 128 | for ; buf[o-1]&1 == 0 && len(buf)-o > 7; o += 7 { 129 | frame.Repeaters = append(frame.Repeaters, parseAddress(buf[o:])) 130 | } 131 | buf = buf[o:] 132 | } 133 | 134 | if len(buf) == 0 { 135 | return &frame 136 | } 137 | 138 | // 4.2 Control-Field 139 | 140 | controlField := buf[0] 141 | buf = buf[1:] 142 | 143 | // 4.2.1 & 6.2 Poll/Final bit 144 | frame.PollFinal = controlField&0x10 != 0 145 | 146 | if controlField&1 == 0 { 147 | // Info frame 148 | frame.Type = IFrame 149 | // 0 : 0 150 | // 1-3: N(S) 151 | // 4 : P 152 | // 5-7: N(R) 153 | frame.SendSeq = int((controlField >> 1) & 7) 154 | frame.RecvSeq = int((controlField >> 5) & 7) 155 | } else if controlField&2 != 0 { 156 | // Unnumbered frame 157 | frame.Type = UFrame 158 | // 4.3.3 Unnumbered Frame Control Fields 159 | frame.UnnumberedType = UnnumberedType(controlField & ^byte(0x10)) 160 | } else { 161 | // Supervisory frame 162 | frame.Type = SFrame 163 | frame.SupervisoryType = SupervisoryType(controlField & 0x0f) 164 | frame.RecvSeq = int((controlField >> 5) & 7) 165 | } 166 | if len(buf) == 0 { 167 | return &frame 168 | } 169 | 170 | if frame.Type == IFrame || (frame.Type == UFrame && frame.UnnumberedType == UI) { 171 | frame.PID = PID(buf[0]) 172 | frame.Info = buf[1:] 173 | } 174 | return &frame 175 | } 176 | 177 | // Return a frame when a full one has been received. Otherwise return nil. 178 | func (ax *AX25) Feed(bit int) *Frame { 179 | ax.bitstream <<= 1 180 | ax.bitstream |= byte(bit) 181 | // Watch for flag 182 | if ax.bitstream&0xff == 0x7e { 183 | var frame *Frame 184 | if ax.inFrame && len(ax.rxBuf) > 2 { 185 | frame = ax.processFrame() 186 | } 187 | ax.inFrame = true 188 | ax.rxBuf = ax.rxBuf[:0] 189 | ax.rxBits = 0 190 | ax.rxBitI = 0 191 | return frame 192 | } 193 | // Frame abort 194 | if ax.bitstream&0x7f == 0x7f { 195 | ax.inFrame = false 196 | return nil 197 | } 198 | if !ax.inFrame { 199 | return nil 200 | } 201 | // Stuffed bit 202 | if ax.bitstream&0x3f == 0x3e { 203 | return nil 204 | } 205 | ax.rxBits >>= 1 206 | if bit != 0 { 207 | ax.rxBits |= 0x80 208 | } 209 | ax.rxBitI++ 210 | if ax.rxBitI == 8 { 211 | if len(ax.rxBuf) >= ax.maxBufferSize { 212 | ax.inFrame = false 213 | // TODO: return an error? 214 | return nil 215 | } 216 | ax.rxBuf = append(ax.rxBuf, ax.rxBits) 217 | ax.rxBits = 0 218 | ax.rxBitI = 0 219 | } 220 | return nil 221 | } 222 | -------------------------------------------------------------------------------- /dsp/biquad.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "math" 4 | 5 | // http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt 6 | 7 | type BiQuadFilter struct { 8 | B0, B1, B2 float64 9 | A0, A1, A2 float64 10 | prevIn, prevOut [2]float64 11 | } 12 | 13 | func (f *BiQuadFilter) Filter(input, output []float64) { 14 | b0a0 := f.B0 / f.A0 15 | b1a0 := f.B1 / f.A0 16 | b2a0 := f.B2 / f.A0 17 | a1a0 := f.A1 / f.A0 18 | a2a0 := f.A2 / f.A0 19 | for i, s := range input { 20 | newSample := b0a0*s + b1a0*f.prevIn[0] + b2a0*f.prevIn[1] - a1a0*f.prevOut[0] - a2a0*f.prevOut[1] 21 | f.prevOut[1] = f.prevOut[0] 22 | f.prevOut[0] = newSample 23 | f.prevIn[1] = f.prevIn[0] 24 | f.prevIn[0] = s 25 | output[i] = newSample 26 | } 27 | } 28 | 29 | func (f *BiQuadFilter) FilterF32(input, output []float32) { 30 | b0a0 := f.B0 / f.A0 31 | b1a0 := f.B1 / f.A0 32 | b2a0 := f.B2 / f.A0 33 | a1a0 := f.A1 / f.A0 34 | a2a0 := f.A2 / f.A0 35 | for i, s := range input { 36 | newSample := b0a0*float64(s) + b1a0*f.prevIn[0] + b2a0*f.prevIn[1] - a1a0*f.prevOut[0] - a2a0*f.prevOut[1] 37 | f.prevOut[1] = f.prevOut[0] 38 | f.prevOut[0] = newSample 39 | f.prevIn[1] = f.prevIn[0] 40 | f.prevIn[0] = float64(s) 41 | output[i] = float32(newSample) 42 | } 43 | } 44 | 45 | // H(s) = 1 / (s^2 + s/Q + 1) 46 | func NewLowPassBiQuadFilter(sampleRate, cutoffFreq, q float64) *BiQuadFilter { 47 | w0 := 2 * math.Pi * cutoffFreq / sampleRate 48 | sinW0 := math.Sin(w0) 49 | cosW0 := math.Cos(w0) 50 | alpha := sinW0 / (2 * q) 51 | return &BiQuadFilter{ 52 | B0: (1 - cosW0) / 2, 53 | B1: 1 - cosW0, 54 | B2: (1 - cosW0) / 2, 55 | A0: 1 + alpha, 56 | A1: -2 * cosW0, 57 | A2: 1 - alpha, 58 | } 59 | } 60 | 61 | // H(s) = s^2 / (s^2 + s/Q + 1) 62 | func NewHighPassBiQuadFilter(sampleRate, cutoffFreq, q float64) *BiQuadFilter { 63 | w0 := 2 * math.Pi * cutoffFreq / sampleRate 64 | sinW0 := math.Sin(w0) 65 | cosW0 := math.Cos(w0) 66 | alpha := sinW0 / (2 * q) 67 | return &BiQuadFilter{ 68 | B0: (1 + cosW0) / 2, 69 | B1: -(1 + cosW0), 70 | B2: (1 + cosW0) / 2, 71 | A0: 1 + alpha, 72 | A1: -2 * cosW0, 73 | A2: 1 - alpha, 74 | } 75 | } 76 | 77 | // H(s) = s / (s^2 + s/Q + 1) (constant skirt gain, peak gain = Q) 78 | func NewBandPassConstantSkirtGainBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter { 79 | w0 := 2 * math.Pi * centreFrequency / sampleRate 80 | sinW0 := math.Sin(w0) 81 | cosW0 := math.Cos(w0) 82 | alpha := sinW0 / (2 * q) 83 | return &BiQuadFilter{ 84 | B0: sinW0 / 2, // = Q*alpha 85 | B1: 0, 86 | B2: -sinW0 / 2, // = -Q*alpha 87 | A0: 1 + alpha, 88 | A1: -2 * cosW0, 89 | A2: 1 - alpha, 90 | } 91 | } 92 | 93 | // H(s) = (s/Q) / (s^2 + s/Q + 1) (constant 0 dB peak gain) 94 | func NewBandPassConstantPeakGainBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter { 95 | w0 := 2 * math.Pi * centreFrequency / sampleRate 96 | sinW0 := math.Sin(w0) 97 | cosW0 := math.Cos(w0) 98 | alpha := sinW0 / (2 * q) 99 | return &BiQuadFilter{ 100 | B0: alpha, 101 | B1: 0, 102 | B2: -alpha, 103 | A0: 1 + alpha, 104 | A1: -2 * cosW0, 105 | A2: 1 - alpha, 106 | } 107 | } 108 | 109 | // H(s) = (s^2 + 1) / (s^2 + s/Q + 1) 110 | func NotchBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter { 111 | w0 := 2 * math.Pi * centreFrequency / sampleRate 112 | sinW0 := math.Sin(w0) 113 | cosW0 := math.Cos(w0) 114 | alpha := sinW0 / (2 * q) 115 | return &BiQuadFilter{ 116 | B0: 1, 117 | B1: -2 * cosW0, 118 | B2: 1, 119 | A0: 1 + alpha, 120 | A1: -2 * cosW0, 121 | A2: 1 - alpha, 122 | } 123 | } 124 | 125 | // H(s) = (s^2 - s/Q + 1) / (s^2 + s/Q + 1) 126 | func AllPassBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter { 127 | w0 := 2 * math.Pi * centreFrequency / sampleRate 128 | sinW0 := math.Sin(w0) 129 | cosW0 := math.Cos(w0) 130 | alpha := sinW0 / (2 * q) 131 | return &BiQuadFilter{ 132 | B0: 1 - alpha, 133 | B1: -2 * cosW0, 134 | B2: 1 + alpha, 135 | A0: 1 + alpha, 136 | A1: -2 * cosW0, 137 | A2: 1 - alpha, 138 | } 139 | } 140 | 141 | // H(s) = (s^2 + s*(A/Q) + 1) / (s^2 + s/(A*Q) + 1) 142 | func PeakingEQBiQuadFilter(sampleRate, centreFrequency, q, dbGain float64) *BiQuadFilter { 143 | w0 := 2 * math.Pi * centreFrequency / sampleRate 144 | sinW0 := math.Sin(w0) 145 | cosW0 := math.Cos(w0) 146 | alpha := sinW0 / (2 * q) 147 | a := math.Pow(10, dbGain/40) // TODO: should we square root this value? 148 | return &BiQuadFilter{ 149 | B0: 1 + alpha*a, 150 | B1: -2 * cosW0, 151 | B2: 1 - alpha*a, 152 | A0: 1 + alpha/a, 153 | A1: -2 * cosW0, 154 | A2: 1 - alpha/a, 155 | } 156 | } 157 | 158 | // H(s) = A * (s^2 + (sqrt(A)/Q)*s + A)/(A*s^2 + (sqrt(A)/Q)*s + 1) 159 | // shelfSlope: a "shelf slope" parameter (for shelving EQ only). 160 | // When S = 1, the shelf slope is as steep as it can be and remain monotonically 161 | // increasing or decreasing gain with frequency. The shelf slope, in dB/octave, 162 | // remains proportional to S for all other values for a fixed f0/Fs and dBgain. 163 | // dbGain: Gain in decibels 164 | func LowShelfBiQuadFilter(sampleRate, cutoffFrequency, shelfSlope, dbGain float64) *BiQuadFilter { 165 | w0 := 2 * math.Pi * cutoffFrequency / sampleRate 166 | sinW0 := math.Sin(w0) 167 | cosW0 := math.Cos(w0) 168 | a := math.Pow(10, dbGain/40.0) // TODO: should we square root this value? 169 | alpha := sinW0 / 2 * math.Sqrt((a+1/a)*(1/shelfSlope-1)+2) 170 | temp := 2 * math.Sqrt(a) * alpha 171 | return &BiQuadFilter{ 172 | B0: a * ((a + 1) - (a-1)*cosW0 + temp), 173 | B1: 2 * a * ((a - 1) - (a+1)*cosW0), 174 | B2: a * ((a + 1) - (a-1)*cosW0 - temp), 175 | A0: (a + 1) + (a-1)*cosW0 + temp, 176 | A1: -2 * ((a - 1) + (a+1)*cosW0), 177 | A2: (a + 1) + (a-1)*cosW0 - temp, 178 | } 179 | } 180 | 181 | // H(s) = A * (A*s^2 + (sqrt(A)/Q)*s + 1)/(s^2 + (sqrt(A)/Q)*s + A) 182 | func HighShelfBiQuadFilter(sampleRate, cutoffFrequency, shelfSlope, dbGain float64) *BiQuadFilter { 183 | w0 := 2 * math.Pi * cutoffFrequency / sampleRate 184 | sinW0 := math.Sin(w0) 185 | cosW0 := math.Cos(w0) 186 | a := math.Pow(10, dbGain/40) // TODO: should we square root this value? 187 | alpha := sinW0 / 2 * math.Sqrt((a+1/a)*(1/shelfSlope-1)+2) 188 | temp := 2 * math.Sqrt(a) * alpha 189 | return &BiQuadFilter{ 190 | B0: a * ((a + 1) + (a-1)*cosW0 + temp), 191 | B1: -2 * a * ((a - 1) + (a+1)*cosW0), 192 | B2: a * ((a + 1) + (a-1)*cosW0 - temp), 193 | A0: (a + 1) - (a-1)*cosW0 + temp, 194 | A1: 2 * ((a - 1) - (a+1)*cosW0), 195 | A2: (a + 1) - (a-1)*cosW0 - temp, 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /dsp/conversion.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "encoding/binary" 5 | "math" 6 | ) 7 | 8 | //go:generate go run conversion_avo_amd64.go -out conversion_avo_amd64.s -stubs stub_windows.go 9 | 10 | // Ui8toi16 converts and scales unsigned 8-bit samples to 16-bit signed samples. 11 | func Ui8toi16(input []byte, output []int16) 12 | func ui8toi16(input []byte, output []int16) { 13 | n := len(output) 14 | if len(input) < n { 15 | n = len(input) 16 | } 17 | for i, v := range input[:n] { 18 | v -= 128 19 | v16 := int16((uint16(v) << 8) | uint16(v)) 20 | output[i] = v16 21 | } 22 | } 23 | 24 | // Ui8toi16b converts and scales unsigned 8-bit samples to 16-bit signed samples. 25 | func Ui8toi16b(input, output []byte) 26 | func ui8toi16b(input, output []byte) { 27 | n := len(output) / 2 28 | if len(input) < n { 29 | n = len(input) 30 | } 31 | for i, v := range input[:n] { 32 | v -= 128 33 | output[i*2] = v 34 | output[i*2+1] = v 35 | } 36 | } 37 | 38 | // Ui8tof32 converts unsigned 8-bit samples to 32-bit float. 39 | // It does not scale the samples. 40 | func Ui8tof32(input []byte, output []float32) 41 | func ui8tof32(input []byte, output []float32) { 42 | n := len(input) 43 | if len(output) < n { 44 | n = len(output) 45 | } 46 | _ = output[n-1] // eliminate bounds check 47 | for i, v := range input[:n] { 48 | output[i] = float32(int(v) - 128) 49 | } 50 | } 51 | 52 | // I8tof32 converts signed 8-bit samples to 32-bit float. 53 | // It does not scale the samples. 54 | func I8tof32(input []byte, output []float32) 55 | func i8tof32(input []byte, output []float32) { 56 | n := len(input) 57 | if len(output) < n { 58 | n = len(output) 59 | } 60 | for i, v := range input[:n] { 61 | output[i] = float32(int8(v)) 62 | } 63 | } 64 | 65 | // Ui8toc64 converts unsigned 8-bit interleaved complex samples to 64-bit complex (32-bit real and imaginary parts). 66 | // It does not scale the samples. 67 | func Ui8toc64(input []byte, output []complex64) 68 | func ui8toc64(input []byte, output []complex64) { 69 | n := len(input) / 2 70 | if len(output) < n { 71 | n = len(output) 72 | } 73 | for i := 0; i < n; i++ { 74 | output[i] = complex( 75 | float32(int(input[i*2])-128), 76 | float32(int(input[i*2+1])-128), 77 | ) 78 | } 79 | } 80 | 81 | // I8toc64 converts signed 8-bit interleaved complex samples to 64-bit complex (32-bit real and imaginary parts). 82 | // It does not scale the samples. 83 | func I8toc64(input []int8, output []complex64) { 84 | // func i8toc64(input []int8, output []complex64) { 85 | n := len(input) / 2 86 | if len(output) < n { 87 | n = len(output) 88 | } 89 | for i := 0; i < n; i++ { 90 | output[i] = complex( 91 | float32(input[i*2]), 92 | float32(input[i*2+1]), 93 | ) 94 | } 95 | } 96 | 97 | // C64toi8 converts 64-bit complex samples to signed 8-bit interleaved. 98 | // It does not scale the samples. 99 | func C64toi8(input []complex64, output []int8) { 100 | // func c64toi8(input []complex64, output []int8) { 101 | n := len(output) / 2 102 | if len(input) < n { 103 | n = len(input) 104 | } 105 | for i, s := range input[:n] { 106 | output[i*2] = int8(real(s)) 107 | output[i*2+1] = int8(imag(s)) 108 | } 109 | } 110 | 111 | // F32toi16 converts scaled 32-bit floats to 16-bit integers. 112 | func F32toi16(input []float32, output []int16, scale float32) 113 | func f32toi16(input []float32, output []int16, scale float32) { 114 | n := len(output) 115 | if len(input) < n { 116 | n = len(input) 117 | } 118 | for i, v := range input[:n] { 119 | output[i] = int16(v * scale) 120 | } 121 | } 122 | 123 | // F32toi16ble converts float32 to int16 stored in a byte slice. The values 124 | // are stored in little-endian. 125 | func F32toi16ble(input []float32, output []byte, scale float32) 126 | func f32toi16ble(input []float32, output []byte, scale float32) { 127 | n := len(output) / 2 128 | if len(input) < n { 129 | n = len(input) 130 | } 131 | for i, v := range input[:n] { 132 | v := uint16(int16(v * scale)) 133 | output[i*2] = uint8(v & 0xff) 134 | output[i*2+1] = uint8(v >> 8) 135 | } 136 | } 137 | 138 | // I16ToBLE converts int16 values to little endian bytes. 139 | func I16ToBLE(input []int16, output []byte) 140 | func i16ToBLE(input []int16, output []byte) { 141 | n := len(input) 142 | if len(output)/2 < n { 143 | n = len(output) / 2 144 | } 145 | for i, v := range input[:n] { 146 | output[i*2] = byte(v & 0xff) 147 | output[i*2+1] = byte(v >> 8) 148 | } 149 | } 150 | 151 | // I16bleToF64 converts int16 stored in a byte slice as little endian to float64. 152 | func I16bleToF64(input []byte, output []float64, scale float64) 153 | func i16bleToF64(input []byte, output []float64, scale float64) { 154 | n := len(input) / 2 155 | if len(output) < n { 156 | n = len(output) 157 | } 158 | for i := range output[:n] { 159 | output[i] = float64(int16(uint16(input[i*2])|(uint16(input[i*2+1])<<8))) * scale 160 | } 161 | } 162 | 163 | // I16bleToF32 converts int16 stored in a byte slice as little endian to float32. 164 | func I16bleToF32(input []byte, output []float32, scale float32) 165 | func i16bleToF32(input []byte, output []float32, scale float32) { 166 | n := len(input) / 2 167 | if len(output) < n { 168 | n = len(output) 169 | } 170 | for i := range output[:n] { 171 | output[i] = float32(int16(uint16(input[i*2])|(uint16(input[i*2+1])<<8))) * scale 172 | } 173 | } 174 | 175 | // I32bleToF32 converts int32 stored in a byte slice as little endian to float32. 176 | func I32bleToF32(input []byte, output []float32, scale float32) { 177 | // func i32bleToF32(input []byte, output []float32, scale float32) { 178 | n := len(input) / 4 179 | if len(output) < n { 180 | n = len(output) 181 | } 182 | for i := range output[:n] { 183 | output[i] = float32( 184 | int32( 185 | uint32(input[i*4])| 186 | (uint32(input[i*4+1])<<8)| 187 | (uint32(input[i*4+2])<<16)| 188 | (uint32(input[i*4+3])<<24))) * scale 189 | } 190 | } 191 | 192 | // F32Tof32ble converts a float32 slice to a byte slice of little endian float32. 193 | func F32Tof32ble(input []float32, output []byte) { 194 | // func f32Tof32ble(input []float32, output []byte) { 195 | n := len(output) / 4 196 | if len(input) < n { 197 | n = len(input) 198 | } 199 | for i, s := range input[:n] { 200 | binary.LittleEndian.PutUint32(output[i*4:], math.Float32bits(s)) 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /dsp/conversion_386.s: -------------------------------------------------------------------------------- 1 | TEXT ·Ui8toi16(SB), 7, $0 2 | JMP ·ui8toi16(SB) 3 | 4 | TEXT ·Ui8toi16b(SB), 7, $0 5 | JMP ·ui8toi16b(SB) 6 | 7 | TEXT ·Ui8tof32(SB), 7, $0 8 | JMP ·ui8tof32(SB) 9 | 10 | TEXT ·I8tof32(SB), 7, $0 11 | JMP ·i8tof32(SB) 12 | 13 | TEXT ·Ui8toc64(SB), 7, $0 14 | JMP ·ui8toc64(SB) 15 | 16 | TEXT ·F32toi16(SB), 7, $0 17 | JMP ·f32toi16(SB) 18 | 19 | TEXT ·F32toi16ble(SB), 7, $0 20 | JMP ·f32toi16ble(SB) 21 | 22 | TEXT ·I16bleToF64(SB), 7, $0 23 | JMP ·i16bleToF64(SB) 24 | 25 | TEXT ·I16bleToF32(SB), 7, $0 26 | JMP ·i16bleToF32(SB) 27 | -------------------------------------------------------------------------------- /dsp/conversion_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·Ui8toi16(SB), NOSPLIT, $0 4 | MOVW input+0(FP), R1 5 | MOVW input_len+4(FP), R2 6 | MOVW output+12(FP), R3 7 | MOVW output_len+16(FP), R4 8 | 9 | // Choose the shortest length 10 | CMP R2, R4 11 | MOVW.LT R4, R2 12 | 13 | // If no input then skip loop 14 | TEQ $0, R2 15 | BEQ ui8toi16_done 16 | ADD R1, R2 17 | 18 | ui8toi16_loop: 19 | MOVBU 0(R1), R0 20 | ADD $1, R1 21 | SUB $128, R0 22 | MOVBU R0, 0(R3) 23 | MOVBU R0, 1(R3) 24 | ADD $2, R3 25 | CMP R2, R1 26 | BLT ui8toi16_loop 27 | 28 | ui8toi16_done: 29 | RET 30 | 31 | TEXT ·Ui8toi16b(SB), NOSPLIT, $0 32 | MOVW output_len+16(FP), R4 33 | MOVW R4>>1, R4 34 | MOVW R4, output_len+16(FP) 35 | B ·Ui8toi16(SB) 36 | 37 | TEXT ·Ui8tof32(SB), NOSPLIT, $0 38 | MOVW input+0(FP), R1 39 | MOVW input_len+4(FP), R2 40 | MOVW output+12(FP), R3 41 | MOVW output_len+16(FP), R0 42 | 43 | // Choose the shortest length 44 | CMP R2, R0 45 | MOVW.LT R0, R2 46 | 47 | // If no input then skip loop 48 | CMP $0, R2 49 | BEQ ui8tof32_done 50 | 51 | MOVBU ·HaveNEON+0(SB), R0 52 | CMP $0, R0 53 | BNE ui8tof32_neon 54 | 55 | AND $(~3), R2, R0 56 | ADD R1, R2 57 | TEQ $0, R0 58 | BEQ ui8tof32_tail 59 | ADD R1, R0 60 | 61 | MOVW $0x80808080, R8 62 | 63 | ui8tof32_loop: 64 | // This is slower on Raspberry Pi but faster on Udoo Quad (which uses NEON anyway) 65 | // MOVBU 0(R1), R4 66 | // MOVBU 1(R1), R5 67 | // MOVBU 2(R1), R6 68 | // MOVBU 3(R1), R7 69 | // ADD $4, R1 70 | // SUB $128, R4 71 | // SUB $128, R5 72 | // SUB $128, R6 73 | // SUB $128, R7 74 | 75 | // This is faster on Raspberry Pi but slower on Udoo Quad (which uses NEON anyway) 76 | MOVW (R1), R4 77 | ADD $4, R1 78 | WORD $0xe6544ff8 // usub8 r4, r4, r8 79 | WORD $0xe6af5474 // sxtb r5, r4, ror #8 80 | WORD $0xe6af6874 // sxtb r6, r4, ror #16 81 | WORD $0xe6af7c74 // sxtb r7, r4, ror #24 82 | WORD $0xe6af4074 // sxtb r4, r4 83 | 84 | WORD $0xec454a1e // vmov s28, s29, r4, r5 85 | WORD $0xec476a1f // vmov s30, s31, r6, r7 86 | WORD $0xeeb80ace // vcvt.f32.s32 s0, s28 87 | WORD $0xeef80aee // vcvt.f32.s32 s1, s29 88 | WORD $0xeeb81acf // vcvt.f32.s32 s2, s30 89 | WORD $0xeef81aef // vcvt.f32.s32 s3, s31 90 | 91 | WORD $0xeca30a04 // vstmia r3!, {s0, s1, s2, s3} 92 | CMP R0, R1 93 | BLT ui8tof32_loop 94 | 95 | B ui8tof32_tail 96 | 97 | ////////////// Neon //////////// 98 | 99 | ui8tof32_neon: 100 | MOVW $128, R0 101 | WORD $0xeee00b10 // vdup.8 q0, r0 102 | 103 | AND $(~(16*4-1)), R2, R4 104 | ADD R1, R2 105 | TEQ $0, R4 106 | BEQ ui8tof32_tail 107 | ADD R1, R4 108 | 109 | ui8tof32_neon_loop: 110 | WORD $0xf461c28d // vld1.32 {d28, d29, d30, d31}, [r1]! 111 | 112 | // WORD $0xf461c2bd // vld1.32 {d28, d29, d30, d31}, [r1:256]! 113 | WORD $0xf3cc4280 // vsubl.u8 q10, d28, d0 114 | WORD $0xf3cd6280 // vsubl.u8 q11, d29, d0 115 | WORD $0xf3ce8280 // vsubl.u8 q12, d30, d0 116 | WORD $0xf3cfa280 // vsubl.u8 q13, d31, d0 117 | WORD $0xf2902a34 // vmovl.s16 q1, d20 118 | WORD $0xf2904a35 // vmovl.s16 q2, d21 119 | WORD $0xf2906a36 // vmovl.s16 q3, d22 120 | WORD $0xf2908a37 // vmovl.s16 q4, d23 121 | WORD $0xf290aa38 // vmovl.s16 q5, d24 122 | WORD $0xf290ca39 // vmovl.s16 q6, d25 123 | WORD $0xf290ea3a // vmovl.s16 q7, d26 124 | WORD $0xf2d00a3b // vmovl.s16 q8, d27 125 | WORD $0xf3bb2642 // vcvt.f32.s32 q1, q1 126 | WORD $0xf3bb4644 // vcvt.f32.s32 q2, q2 127 | WORD $0xf3bb6646 // vcvt.f32.s32 q3, q3 128 | WORD $0xf3bb8648 // vcvt.f32.s32 q4, q4 129 | WORD $0xf403228d // vst1.32 {d2, d3, d4, d5}, [r3]! 130 | WORD $0xf3bba64a // vcvt.f32.s32 q5, q5 131 | WORD $0xf3bbc64c // vcvt.f32.s32 q6, q6 132 | WORD $0xf403628d // vst1.32 {d6, d7, d8, d9}, [r3]! 133 | WORD $0xf3bbe64e // vcvt.f32.s32 q7, q7 134 | WORD $0xf3fb0660 // vcvt.f32.s32 q8, q8 135 | WORD $0xf403a28d // vst1.32 {d10, d11, d12, d13}, [r3]! 136 | WORD $0xf403e28d // vst1.32 {d14, d15, d16, d17}, [r3]! 137 | 138 | CMP R4, R1 139 | BLT ui8tof32_neon_loop 140 | 141 | ui8tof32_tail: 142 | CMP R1, R2 143 | BEQ ui8tof32_done 144 | 145 | ui8tof32_tail_loop: 146 | MOVBU 0(R1), R4 147 | SUB $128, R4 148 | MOVWF R4, F0 149 | ADD $1, R1 150 | WORD $0xeca30a01 // vstmia r3!, {s0} 151 | CMP R2, R1 152 | BLT ui8tof32_tail_loop 153 | 154 | ui8tof32_done: 155 | RET 156 | 157 | // TODO 158 | TEXT ·I8tof32(SB), NOSPLIT, $0 159 | B ·i8tof32(SB) 160 | 161 | TEXT ·Ui8toc64(SB), NOSPLIT, $0 162 | MOVW input_len+4(FP), R2 163 | AND $(~1), R2 164 | MOVW R2, input_len+4(FP) 165 | MOVW output_len+16(FP), R0 166 | MOVW R0<<1, R0 167 | MOVW R0, output_len+16(FP) 168 | B ·Ui8tof32(SB) 169 | 170 | TEXT ·F32toi16(SB), NOSPLIT, $0 171 | MOVW input+0(FP), R1 172 | MOVW input_len+4(FP), R2 173 | MOVW output+12(FP), R3 174 | MOVW output_len+16(FP), R0 175 | MOVF scale+24(FP), F0 176 | 177 | // Choose the shortest length 178 | CMP R2, R0 179 | MOVW.LT R0, R2 180 | 181 | // If no input then we are done 182 | TEQ $0, R2 183 | BEQ f32toi16_done 184 | 185 | MOVW R2, R7 186 | ADD R2<<2, R1, R2 187 | 188 | // R1 = input 189 | // R2 = end of output 190 | // R3 = output 191 | // R7 = count 192 | 193 | MOVBU ·UseVector+0(SB), R0 194 | TEQ $0, R0 195 | BNE f32toi16_vector 196 | 197 | //////////////// VFP Scalar ///////////// 198 | 199 | AND $(~3), R7 200 | TEQ $0, R7 201 | BEQ f32toi16_tail 202 | ADD R7<<2, R1, R7 // R7 = end of output truncated to block size 203 | 204 | f32toi16_scalar_loop: 205 | WORD $0xecb11a04 // vldmia r1!, {s2, s3, s4, s5} 206 | WORD $0xee211a00 // vmul.f32 s2, s2, s0 207 | WORD $0xee611a80 // vmul.f32 s3, s3, s0 208 | WORD $0xee222a00 // vmul.f32 s4, s4, s0 209 | WORD $0xee622a80 // vmul.f32 s5, s5, s0 210 | WORD $0xeebd1ac1 // vcvt.s32.f32 s2, s2 211 | WORD $0xeefd1ae1 // vcvt.s32.f32 s3, s3 212 | WORD $0xeebd2ac2 // vcvt.s32.f32 s4, s4 213 | WORD $0xeefd2ae2 // vcvt.s32.f32 s5, s5 214 | WORD $0xec540a11 // vmov r0, r4, s2, s3 215 | MOVH R0, 0(R3) 216 | MOVH R4, 2(R3) 217 | WORD $0xec5b8a12 // vmov r8, r11, s4, s5 218 | MOVH R8, 4(R3) 219 | MOVH R11, 6(R3) 220 | ADD $8, R3 221 | 222 | CMP R7, R1 223 | BLT f32toi16_scalar_loop 224 | 225 | B f32toi16_tail 226 | 227 | ///////////// VFP Vector ////////////// 228 | 229 | f32toi16_vector: 230 | AND $(~7), R7 231 | TEQ $0, R7 232 | BEQ f32toi16_tail 233 | ADD R7<<2, R1, R7 // R7 = end of output truncated to block size 234 | 235 | PLD (R1) 236 | PLD 64(R1) 237 | PLD (2*64)(R1) 238 | PLD (3*64)(R1) 239 | 240 | // Set vector length to 8 241 | WORD $0xeef10a10 // vmrs r0, fpscr 242 | BIC $((7<<16)|(3<<20)), R0 243 | ORR $((7<<16)|(0<<20)), R0 244 | WORD $0xeee10a10 // fmxr fpscr, r0 245 | 246 | f32toi16_vector_loop: 247 | PLD (4*64)(R1) 248 | WORD $0xecb14a08 // vldmia r1!, {s8-s15} 249 | WORD $0xee244a00 // vmul.f32 s8, s8, s0 250 | WORD $0xeebd4ac4 // vcvt.s32.f32 s8, s8 251 | WORD $0xeefd4ae4 // vcvt.s32.f32 s9, s9 252 | WORD $0xeebd5ac5 // vcvt.s32.f32 s10, s10 253 | WORD $0xeefd5ae5 // vcvt.s32.f32 s11, s11 254 | WORD $0xec540a14 // vmov r0, r4, s8, s9 255 | WORD $0xec5b8a15 // vmov r8, r11, s10, s11 256 | MOVH R0, 0(R3) 257 | MOVH R4, 2(R3) 258 | MOVH R8, 4(R3) 259 | MOVH R11, 6(R3) 260 | WORD $0xeebd6ac6 // vcvt.s32.f32 s12, s12 261 | WORD $0xeefd6ae6 // vcvt.s32.f32 s13, s13 262 | WORD $0xeebd7ac7 // vcvt.s32.f32 s14, s14 263 | WORD $0xeefd7ae7 // vcvt.s32.f32 s15, s15 264 | WORD $0xec540a16 // vmov r0, r4, s12, s13 265 | WORD $0xec5b8a17 // vmov r8, r11, s14, s15 266 | MOVH R0, 8(R3) 267 | MOVH R4, 10(R3) 268 | MOVH R8, 12(R3) 269 | MOVH R11, 14(R3) 270 | ADD $16, R3 271 | 272 | CMP R7, R1 273 | BLT f32toi16_vector_loop 274 | 275 | // Clear vector mode 276 | WORD $0xeef10a10 // vmrs r0, fpscr 277 | BIC $((7<<16)|(3<<20)), R0 278 | WORD $0xeee10a10 // fmxr fpscr, r0 279 | 280 | f32toi16_tail: 281 | CMP R1, R2 282 | BEQ f32toi16_done 283 | 284 | f32toi16_tail_loop: 285 | MOVF 0(R1), F1 286 | ADD $4, R1 287 | MULF F0, F1 288 | MOVFW F1, R0 289 | MOVHU R0, (R3) 290 | ADD $2, R3 291 | CMP R2, R1 292 | BLT f32toi16_tail_loop 293 | 294 | f32toi16_done: 295 | RET 296 | 297 | // TODO: detect endianess and use non-native order writes on big-endian 298 | TEXT ·F32toi16ble(SB), NOSPLIT, $0 299 | MOVW output_len+16(FP), R0 300 | MOVW R0>>1, R0 301 | MOVW R0, output_len+16(FP) 302 | B ·F32toi16(SB) 303 | 304 | TEXT ·I16bleToF64(SB), NOSPLIT, $0 305 | B ·i16bleToF64(SB) 306 | 307 | TEXT ·I16bleToF32(SB), NOSPLIT, $0 308 | B ·i16bleToF32(SB) 309 | -------------------------------------------------------------------------------- /dsp/conversion_arm64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·Ui8toi16(SB), NOSPLIT, $0 4 | B ·ui8toi16(SB) 5 | 6 | TEXT ·Ui8toi16b(SB), NOSPLIT, $0 7 | B ·ui8toi16b(SB) 8 | 9 | TEXT ·Ui8tof32(SB), NOSPLIT, $0 10 | MOVD input(FP), R0 11 | MOVD input_len+8(FP), R1 12 | MOVD output+24(FP), R2 13 | MOVD output_len+32(FP), R3 14 | 15 | CMP R3, R1 16 | BLT ui8tof32_min_len 17 | MOVD R3, R1 18 | ui8tof32_min_len: 19 | 20 | #define BLOCK_SIZE 32 21 | 22 | MOVW $0x80, R3 23 | WORD $0x4e010c60 // dup v0.16b, w3 24 | 25 | CMP $BLOCK_SIZE, R1 26 | BLT ui8tof32_scalar 27 | 28 | ui8tof32_simd_loop: 29 | WORD $0xad400801 // ldp q1, q2, [x0] 30 | ADD $BLOCK_SIZE, R0 31 | WORD $0x6e208421 // sub v1.16b, v1.16b, v0.16b 32 | WORD $0x6e208442 // sub v2.16b, v2.16b, v0.16b 33 | WORD $0xf9804001 // prfm pldl1strm, [x0, 128] 34 | WORD $0x0f08a42a // sxtl v10.8h, v1.8b 35 | WORD $0x4f08a42b // sxtl2 v11.8h, v1.16b 36 | WORD $0x0f08a44c // sxtl v12.8h, v2.8b 37 | WORD $0x4f08a44d // sxtl2 v13.8h, v2.16b 38 | WORD $0x0f10a554 // sxtl v20.4s, v10.4h 39 | WORD $0x4f10a555 // sxtl2 v21.4s, v10.8h 40 | WORD $0x0f10a576 // sxtl v22.4s, v11.4h 41 | WORD $0x4f10a577 // sxtl2 v23.4s, v11.8h 42 | WORD $0x0f10a598 // sxtl v24.4s, v12.4h 43 | WORD $0x4f10a599 // sxtl2 v25.4s, v12.8h 44 | WORD $0x0f10a5ba // sxtl v26.4s, v13.4h 45 | WORD $0x4f10a5bb // sxtl2 v27.4s, v13.8h 46 | //WORD $0x4e21da81 // scvtf v1.4s, v20.4s 47 | //WORD $0x4e21daa2 // scvtf v2.4s, v21.4s 48 | WORD $0x4e21da89 // scvtf v9.4s, v20.4s 49 | WORD $0x4e21daaa // scvtf v10.4s, v21.4s 50 | WORD $0x4e21dac3 // scvtf v3.4s, v22.4s 51 | WORD $0x4e21dae4 // scvtf v4.4s, v23.4s 52 | //WORD $0xad000841 // stp q1, q2, [x2] 53 | WORD $0xad002849 // stp q9, q10, [x2] 54 | WORD $0x4e21db05 // scvtf v5.4s, v24.4s 55 | WORD $0x4e21db26 // scvtf v6.4s, v25.4s 56 | WORD $0xad011043 // stp q3, q4, [x2,32] 57 | WORD $0x4e21db47 // scvtf v7.4s, v26.4s 58 | WORD $0x4e21db68 // scvtf v8.4s, v27.4s 59 | WORD $0xad021845 // stp q5, q6, [x2,64] 60 | //WORD $0xad032047 // stp q7, q8, [x2,96] 61 | ADD $(BLOCK_SIZE*4), R2 62 | SUB $BLOCK_SIZE, R1 63 | WORD $0xad3f2047 // stp q7, q8, [x2,-32] 64 | CMP $BLOCK_SIZE, R1 65 | BGE ui8tof32_simd_loop 66 | 67 | ui8tof32_scalar: 68 | CMP ZR, R1 69 | BEQ ui8tof32_done 70 | 71 | ui8tof32_scalar_loop: 72 | MOVBU (R0), R5 73 | SUB $128, R5, R5 74 | SCVTFS R5, F0 75 | FMOVS F0, (R2) 76 | ADD $1, R0 77 | ADD $4, R2 78 | SUBS $1, R1 79 | BNE ui8tof32_scalar_loop 80 | ui8tof32_done: 81 | RET 82 | 83 | TEXT ·I8tof32(SB), NOSPLIT, $0 84 | B ·i8tof32(SB) 85 | 86 | TEXT ·Ui8toc64(SB), NOSPLIT, $0 87 | B ·ui8toc64(SB) 88 | 89 | TEXT ·F32toi16(SB), NOSPLIT, $0 90 | B ·f32toi16(SB) 91 | 92 | TEXT ·F32toi16ble(SB), NOSPLIT, $0 93 | B ·f32toi16ble(SB) 94 | 95 | TEXT ·I16bleToF64(SB), NOSPLIT, $0 96 | B ·i16bleToF64(SB) 97 | 98 | TEXT ·I16bleToF32(SB), NOSPLIT, $0 99 | B ·i16bleToF32(SB) 100 | -------------------------------------------------------------------------------- /dsp/conversion_avo_amd64.go: -------------------------------------------------------------------------------- 1 | //go:build ignore 2 | // +build ignore 3 | 4 | package main 5 | 6 | import ( 7 | . "github.com/mmcloughlin/avo/build" 8 | . "github.com/mmcloughlin/avo/operand" 9 | . "github.com/mmcloughlin/avo/reg" 10 | ) 11 | 12 | func main() { 13 | TEXT("Ui8tof32", NOSPLIT, "func(input []byte, output []float32)") 14 | Doc("Ui8tof32 converts unsigned 8-bit samples to 32-bit float.") 15 | inputPtr := Load(Param("input").Base(), GP64()) 16 | inputLen := Load(Param("input").Len(), GP64()) 17 | outputPtr := Load(Param("output").Base(), GP64()) 18 | outputLen := Load(Param("output").Len(), GP64()) 19 | 20 | Comment("Pick shortest length") 21 | CMPQ(outputLen, inputLen) 22 | JGE(LabelRef("ui8tof32_min_len")) 23 | MOVQ(outputLen, inputLen) 24 | Label("ui8tof32_min_len") 25 | 26 | index := GP64() 27 | MOVQ(U64(0), index) 28 | 29 | t64 := GP64() 30 | 31 | Comment("If input is too short to optimize (less than 32 bytes) then single step") 32 | MOVQ(U64(32), t64) 33 | CMPQ(t64, inputLen) 34 | JGE(LabelRef("ui8tof32_stepper")) 35 | 36 | Comment("Align output to 16-byte boundary") 37 | MOVQ(outputPtr, t64) 38 | ANDQ(Imm(0xf), t64) 39 | SHRQ(Imm(2), t64) // divide by 4 to convert bytes to 32-bit blocks 40 | JZ(LabelRef("ui8tof32_aligned")) 41 | 42 | t2 := GP64() 43 | MOVQ(U64(4), t2) 44 | SUBQ(t64, t2) 45 | ui8tof32Step(inputPtr, outputPtr, index, t2, "ui8tof32_align") 46 | 47 | Label("ui8tof32_aligned") 48 | n := GP64() 49 | MOVQ(inputLen, n) 50 | ANDQ(U32(^uint32(15)), n) 51 | CMPQ(index, n) 52 | JGE(LabelRef("ui8tof32_stepper")) 53 | 54 | // CMPB(NewDataAddr(Symbol{Name: "·x86+const_offsetX86HasSSE41"}, 0), Imm(1)) 55 | CMPB(NewDataAddr(Symbol{Name: "·useSSE4"}, 0), Imm(1)) 56 | JNE(LabelRef("ui8tof32_nosse4")) 57 | 58 | ui8tof32SSE4(inputPtr, outputPtr, index, n) 59 | 60 | JMP(LabelRef("ui8tof32_stepper")) 61 | 62 | Label("ui8tof32_nosse4") 63 | 64 | ui8tof32SSE2(inputPtr, outputPtr, index, n) 65 | 66 | Comment("TODO: work increasingly smaller blocks") 67 | 68 | Label("ui8tof32_stepper") 69 | CMPQ(index, inputLen) 70 | JGE(LabelRef("ui8tof32_done")) 71 | 72 | ui8tof32Step(inputPtr, outputPtr, index, inputLen, "ui8tof32_step") 73 | 74 | Label("ui8tof32_done") 75 | RET() 76 | 77 | Generate() 78 | } 79 | 80 | func ui8tof32Step(inputPtr, outputPtr, index, maxIndex Register, label string) { 81 | Label(label) 82 | x0 := XMM() 83 | t64 := GP64() 84 | MOVBQZX(Mem{Base: inputPtr}, t64) 85 | INCQ(inputPtr) 86 | SUBQ(Imm(128), t64) 87 | CVTSQ2SS(t64, x0) 88 | MOVSS(x0, Mem{Base: outputPtr}) 89 | ADDQ(Imm(4), outputPtr) 90 | INCQ(index) 91 | CMPQ(index, maxIndex) 92 | JLT(LabelRef(label)) 93 | } 94 | 95 | func ui8tof32SSE4(inputPtr, outputPtr, index, maxIndex Register) { 96 | t32 := GP32() 97 | x0 := XMM() 98 | x1 := XMM() 99 | toSub := XMM() 100 | 101 | MOVL(U32(0x80808080), t32) 102 | MOVD(t32, toSub) 103 | PSHUFL(Imm(0), toSub, toSub) 104 | 105 | Label("ui8tof32_sse4_loop") 106 | Comment("Load 16 unsigned 8-bit values") 107 | MOVOU(Mem{Base: inputPtr}, x0) 108 | Comment("Make the values signed") 109 | PSUBB(toSub, x0) 110 | 111 | Comment("Lowest 4 values (bytes 0-3)") 112 | PMOVSXBD(x0, x1) 113 | Comment("Convert 32-bit signed integers to 32-bit float") 114 | CVTPL2PS(x1, x1) 115 | MOVAPS(x1, Mem{Base: outputPtr}) 116 | 117 | Comment("Next 4 values (bytes 4-7)") 118 | PSHUFL(Imm(1), x0, x1) 119 | PMOVSXBD(x1, x1) 120 | Comment("Convert 32-bit signed integers to 32-bit float") 121 | CVTPL2PS(x1, x1) 122 | MOVAPS(x1, Mem{Base: outputPtr, Disp: 16}) 123 | 124 | Comment("Next 4 values (bytes 8-11)") 125 | PSHUFL(Imm(2), x0, x1) 126 | PMOVSXBD(x1, x1) 127 | Comment("Convert 32-bit signed integers to 32-bit float") 128 | CVTPL2PS(x1, x1) 129 | MOVAPS(x1, Mem{Base: outputPtr, Disp: 32}) 130 | 131 | Comment("Next 4 values (bytes 12-15)") 132 | PSHUFL(Imm(3), x0, x1) 133 | PMOVSXBD(x1, x1) 134 | Comment("Convert 32-bit signed integers to 32-bit float") 135 | CVTPL2PS(x1, x1) 136 | MOVAPS(x1, Mem{Base: outputPtr, Disp: 48}) 137 | 138 | ADDQ(Imm(16), index) 139 | ADDQ(Imm(16), inputPtr) 140 | ADDQ(Imm(64), outputPtr) 141 | CMPQ(index, maxIndex) 142 | JLT(LabelRef("ui8tof32_sse4_loop")) 143 | } 144 | 145 | func ui8tof32SSE2(inputPtr, outputPtr, index, maxIndex Register) { 146 | t32 := GP32() 147 | x0 := XMM() 148 | x1 := XMM() 149 | x2 := XMM() 150 | toSub := XMM() 151 | 152 | MOVL(U32(0x80808080), t32) 153 | MOVD(t32, toSub) 154 | PSHUFL(Imm(0), toSub, toSub) 155 | 156 | Label("ui8tof32_sse2_loop") 157 | Comment("Load 16 unsigned 8-bit values") 158 | MOVOU(Mem{Base: inputPtr}, x0) 159 | Comment("Make the values signed") 160 | PSUBB(toSub, x0) 161 | MOVO(x0, x1) 162 | 163 | Comment("Lowest 4 values (bytes 0-3)") 164 | PUNPCKLBW(x1, x1) 165 | MOVO(x1, x2) 166 | PUNPCKLWL(x1, x1) 167 | PSRAL(Imm(24), x1) 168 | CVTPL2PS(x1, x1) 169 | MOVAPS(x1, Mem{Base: outputPtr}) 170 | 171 | Comment("Next 4 values (bytes 4-7)") 172 | PUNPCKHWL(x2, x2) 173 | PSRAL(Imm(24), x2) 174 | CVTPL2PS(x2, x2) 175 | MOVAPS(x2, Mem{Base: outputPtr, Disp: 16}) 176 | 177 | Comment("Next 4 values (bytes 8-11)") 178 | PUNPCKHBW(x0, x0) 179 | MOVO(x0, x2) 180 | PUNPCKLWL(x0, x0) 181 | PSRAL(Imm(24), x0) 182 | CVTPL2PS(x0, x0) 183 | MOVAPS(x0, Mem{Base: outputPtr, Disp: 32}) 184 | 185 | Comment("Next 4 values (bytes 12-15)") 186 | PUNPCKHWL(x2, x2) 187 | PSRAL(Imm(24), x2) 188 | CVTPL2PS(x2, x2) 189 | MOVAPS(x2, Mem{Base: outputPtr, Disp: 48}) 190 | 191 | ADDQ(Imm(16), index) 192 | ADDQ(Imm(16), inputPtr) 193 | ADDQ(Imm(64), outputPtr) 194 | CMPQ(index, maxIndex) 195 | JLT(LabelRef("ui8tof32_sse2_loop")) 196 | } 197 | -------------------------------------------------------------------------------- /dsp/conversion_avo_amd64.s: -------------------------------------------------------------------------------- 1 | // Code generated by command: go run conversion_avo_amd64.go -out conversion_avo_amd64.s -stubs stub_windows.go. DO NOT EDIT. 2 | 3 | #include "textflag.h" 4 | 5 | // func Ui8tof32(input []byte, output []float32) 6 | // Requires: SSE, SSE2, SSE4.1 7 | TEXT ·Ui8tof32(SB), NOSPLIT, $0-48 8 | MOVQ input_base+0(FP), AX 9 | MOVQ input_len+8(FP), CX 10 | MOVQ output_base+24(FP), DX 11 | MOVQ output_len+32(FP), BX 12 | 13 | // Pick shortest length 14 | CMPQ BX, CX 15 | JGE ui8tof32_min_len 16 | MOVQ BX, CX 17 | 18 | ui8tof32_min_len: 19 | MOVQ $0x0000000000000000, BX 20 | 21 | // If input is too short to optimize (less than 32 bytes) then single step 22 | MOVQ $0x0000000000000020, SI 23 | CMPQ SI, CX 24 | JGE ui8tof32_stepper 25 | 26 | // Align output to 16-byte boundary 27 | MOVQ DX, SI 28 | ANDQ $0x0f, SI 29 | SHRQ $0x02, SI 30 | JZ ui8tof32_aligned 31 | MOVQ $0x0000000000000004, DI 32 | SUBQ SI, DI 33 | 34 | ui8tof32_align: 35 | MOVBQZX (AX), SI 36 | INCQ AX 37 | SUBQ $0x80, SI 38 | CVTSQ2SS SI, X0 39 | MOVSS X0, (DX) 40 | ADDQ $0x04, DX 41 | INCQ BX 42 | CMPQ BX, DI 43 | JLT ui8tof32_align 44 | 45 | ui8tof32_aligned: 46 | MOVQ CX, SI 47 | ANDQ $0xfffffff0, SI 48 | CMPQ BX, SI 49 | JGE ui8tof32_stepper 50 | CMPB ·useSSE4+0(SB), $0x01 51 | JNE ui8tof32_nosse4 52 | MOVL $0x80808080, DI 53 | MOVD DI, X3 54 | PSHUFL $0x00, X3, X3 55 | 56 | ui8tof32_sse4_loop: 57 | // Load 16 unsigned 8-bit values 58 | MOVOU (AX), X0 59 | 60 | // Make the values signed 61 | PSUBB X3, X0 62 | 63 | // Lowest 4 values (bytes 0-3) 64 | PMOVSXBD X0, X2 65 | 66 | // Convert 32-bit signed integers to 32-bit float 67 | CVTPL2PS X2, X2 68 | MOVAPS X2, (DX) 69 | 70 | // Next 4 values (bytes 4-7) 71 | PSHUFL $0x01, X0, X2 72 | PMOVSXBD X2, X2 73 | 74 | // Convert 32-bit signed integers to 32-bit float 75 | CVTPL2PS X2, X2 76 | MOVAPS X2, 16(DX) 77 | 78 | // Next 4 values (bytes 8-11) 79 | PSHUFL $0x02, X0, X2 80 | PMOVSXBD X2, X2 81 | 82 | // Convert 32-bit signed integers to 32-bit float 83 | CVTPL2PS X2, X2 84 | MOVAPS X2, 32(DX) 85 | 86 | // Next 4 values (bytes 12-15) 87 | PSHUFL $0x03, X0, X2 88 | PMOVSXBD X2, X2 89 | 90 | // Convert 32-bit signed integers to 32-bit float 91 | CVTPL2PS X2, X2 92 | MOVAPS X2, 48(DX) 93 | ADDQ $0x10, BX 94 | ADDQ $0x10, AX 95 | ADDQ $0x40, DX 96 | CMPQ BX, SI 97 | JLT ui8tof32_sse4_loop 98 | JMP ui8tof32_stepper 99 | 100 | ui8tof32_nosse4: 101 | MOVL $0x80808080, DI 102 | MOVD DI, X4 103 | PSHUFL $0x00, X4, X4 104 | 105 | ui8tof32_sse2_loop: 106 | // Load 16 unsigned 8-bit values 107 | MOVOU (AX), X0 108 | 109 | // Make the values signed 110 | PSUBB X4, X0 111 | MOVO X0, X2 112 | 113 | // Lowest 4 values (bytes 0-3) 114 | PUNPCKLBW X2, X2 115 | MOVO X2, X3 116 | PUNPCKLWL X2, X2 117 | PSRAL $0x18, X2 118 | CVTPL2PS X2, X2 119 | MOVAPS X2, (DX) 120 | 121 | // Next 4 values (bytes 4-7) 122 | PUNPCKHWL X3, X3 123 | PSRAL $0x18, X3 124 | CVTPL2PS X3, X3 125 | MOVAPS X3, 16(DX) 126 | 127 | // Next 4 values (bytes 8-11) 128 | PUNPCKHBW X0, X0 129 | MOVO X0, X3 130 | PUNPCKLWL X0, X0 131 | PSRAL $0x18, X0 132 | CVTPL2PS X0, X0 133 | MOVAPS X0, 32(DX) 134 | 135 | // Next 4 values (bytes 12-15) 136 | PUNPCKHWL X3, X3 137 | PSRAL $0x18, X3 138 | CVTPL2PS X3, X3 139 | MOVAPS X3, 48(DX) 140 | ADDQ $0x10, BX 141 | ADDQ $0x10, AX 142 | ADDQ $0x40, DX 143 | CMPQ BX, SI 144 | JLT ui8tof32_sse2_loop 145 | 146 | // TODO: work increasingly smaller blocks 147 | ui8tof32_stepper: 148 | CMPQ BX, CX 149 | JGE ui8tof32_done 150 | 151 | ui8tof32_step: 152 | MOVBQZX (AX), SI 153 | INCQ AX 154 | SUBQ $0x80, SI 155 | CVTSQ2SS SI, X1 156 | MOVSS X1, (DX) 157 | ADDQ $0x04, DX 158 | INCQ BX 159 | CMPQ BX, CX 160 | JLT ui8tof32_step 161 | 162 | ui8tof32_done: 163 | RET 164 | -------------------------------------------------------------------------------- /dsp/cpu_amd64.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "github.com/samuel/go-dsp/dsp/internal/cpu" 5 | ) 6 | 7 | var ( 8 | useSSE4 bool 9 | useAVX2 bool 10 | useSSE2 bool 11 | ) 12 | 13 | func init() { 14 | useSSE4 = cpu.X86.HasSSE41 15 | useAVX2 = cpu.X86.HasAVX 16 | useSSE2 = cpu.X86.HasSSE2 17 | } 18 | -------------------------------------------------------------------------------- /dsp/cpu_arm.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "io" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | "regexp" 9 | "strconv" 10 | ) 11 | 12 | var ( 13 | // HaveNEON is true if ARM NEON SIMD instructions are available 14 | HaveNEON bool 15 | // UseVector is true if VFP vector ops should be used 16 | UseVector bool 17 | ) 18 | 19 | var ( 20 | // neonRE matches /proc/cpuinfo if the neon instruction set is available 21 | neonRE = regexp.MustCompile(`(?m)^Features.*(neon|asimd).*$`) 22 | // rpi1RE matches /proc/cpuinfo for Raspberry Pi 1 23 | rpi1RE = regexp.MustCompile(`(?m)^Hardware.*BCM2708.*$`) 24 | ) 25 | 26 | func init() { 27 | // ARM doesn't expose CPU info to userland so it's necessary to 28 | // get the information from the kernel. 29 | // Ref: Cortex-A Series Programmer's Guide Section 20.1.7 Detecting NEON 30 | 31 | f, err := os.Open("/proc/cpuinfo") 32 | if err != nil { 33 | return 34 | } 35 | defer f.Close() 36 | 37 | b, err := ioutil.ReadAll(io.LimitReader(f, 2048)) 38 | if err != nil { 39 | log.Printf("Failed to read cpuinfo: %s", err.Error()) 40 | return 41 | } 42 | 43 | HaveNEON = neonRE.Match(b) 44 | // Vector ops are considerably slower on more recent ARM (ARM8, ARM9). 45 | // These generally have NEON anyway. Only enable vfp vector use for 46 | // Raspberry Pi 1 to be safe. 47 | UseVector = !HaveNEON && rpi1RE.Match(b) 48 | if b, err := strconv.ParseBool(os.Getenv("ARMVECTOR")); err == nil { 49 | UseVector = b 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /dsp/cpu_arm64.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | var HaveNEON = true 4 | -------------------------------------------------------------------------------- /dsp/cpu_arm64_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "testing" 4 | 5 | func simdTest(t *testing.T, fn func(t *testing.T)) { 6 | if HaveNEON { 7 | t.Run("neon", fn) 8 | HaveNEON = false 9 | t.Run("noneon", fn) 10 | HaveNEON = true 11 | } else { 12 | t.Run("neon", func(t *testing.T) { t.Skip("NEON not available") }) 13 | t.Run("noneon", fn) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /dsp/cpu_arm_test.go: -------------------------------------------------------------------------------- 1 | //go:build arm 2 | // +build arm 3 | 4 | package dsp 5 | 6 | import "testing" 7 | 8 | func simdTest(t *testing.T, fn func(t *testing.T)) { 9 | if HaveNEON { 10 | t.Run("neon", fn) 11 | HaveNEON = false 12 | t.Run("noneon", fn) 13 | HaveNEON = true 14 | } else { 15 | t.Run("neon", func(t *testing.T) { t.Skip("NEON not available") }) 16 | t.Run("noneon", fn) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /dsp/cpu_x86_test.go: -------------------------------------------------------------------------------- 1 | //go:build 386 || amd64 2 | // +build 386 amd64 3 | 4 | package dsp 5 | 6 | import ( 7 | "testing" 8 | ) 9 | 10 | func simdTest(t *testing.T, fn func(t *testing.T)) { 11 | if useSSE4 { 12 | t.Run("sse4", fn) 13 | useSSE4 = false 14 | t.Run("nosse4", fn) 15 | useSSE4 = true 16 | } else { 17 | t.Run("sse4", func(t *testing.T) { t.Skip("sse4 not available") }) 18 | t.Run("nosse4", fn) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /dsp/demod.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "math/cmplx" 4 | 5 | // PolarDiscriminator returns the phase angle between two complex vectors 6 | // equivalent to arg(a * conj(b)). The returned angle is in the range [-Pi, Pi]. 7 | func PolarDiscriminator(a, b complex128) float64 { 8 | return cmplx.Phase(a * cmplx.Conj(b)) 9 | } 10 | 11 | // PolarDiscriminator32 returns the phase angle between two complex vectors 12 | // equivalent to arg(a * conj(b)). The returned angle is in the range [-Pi, Pi]. 13 | func PolarDiscriminator32(a, b complex64) float32 { 14 | return FastAtan2(imag(a)*real(b)-real(a)*imag(b), real(a)*real(b)+imag(a)*imag(b)) 15 | } 16 | 17 | // FMDemodFilter is an FM demodulator filter using a polar disciminator. 18 | // 19 | // x(n)─────▶○───────────────────▶(X)──────────────────▶arctan──▶ 20 | // │ ▲ y(n)=x(n)x*(n-1) 21 | // │ ┌───┐ ┌───┐ │ 22 | // └──▶│z⁻¹├────▶│z^*├───┘ 23 | // └───┘ └───┘ 24 | type FMDemodFilter struct { 25 | pre complex64 26 | } 27 | 28 | func (fi *FMDemodFilter) Demodulate(input []complex64, output []float32) int { 29 | return fmDemodulateAsm(fi, input, output) 30 | } 31 | 32 | func fmDemodulateAsm(fi *FMDemodFilter, input []complex64, output []float32) int 33 | 34 | func fmDemodulate(fi *FMDemodFilter, input []complex64, output []float32) int { 35 | pre := fi.pre 36 | for i, inp := range input { 37 | // output[i] = PolarDiscriminator32(inp, pre) 38 | output[i] = FastAtan2(imag(inp)*real(pre)-real(inp)*imag(pre), real(inp)*real(pre)+imag(inp)*imag(pre)) 39 | pre = inp 40 | } 41 | fi.pre = pre 42 | return len(input) 43 | } 44 | -------------------------------------------------------------------------------- /dsp/demod_386.s: -------------------------------------------------------------------------------- 1 | TEXT ·fmDemodulateAsm(SB), 7, $0 2 | JMP ·fmDemodulate(SB) 3 | -------------------------------------------------------------------------------- /dsp/demod_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·fmDemodulateAsm(SB), NOSPLIT, $0 4 | JMP ·fmDemodulate(SB) 5 | -------------------------------------------------------------------------------- /dsp/demod_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·fmDemodulateAsm(SB), NOSPLIT, $0 4 | MOVW input+4(FP), R1 5 | MOVW input_len+8(FP), R2 6 | MOVW output+16(FP), R3 7 | MOVW output_len+20(FP), R4 8 | 9 | // Choose the shortest length 10 | CMP R2, R4 11 | MOVW.LT R4, R2 12 | 13 | // If no input then skip loop 14 | TEQ $0, R2 15 | BEQ fmDemod_done 16 | 17 | MOVW fi+0(FP), R0 18 | MOVF 0(R0), F5 // real(pre) 19 | MOVF 4(R0), F1 // imag(pre) 20 | 21 | fmDemod_loop: 22 | MOVF 0(R1), F2 // real(inp) 23 | MOVF 4(R1), F3 // imag(inp) 24 | ADD $8, R1 25 | 26 | MULF F3, F5, F6 // imag(inp)*real(pre) 27 | MULF F2, F1, F0 // real(inp)*imag(pre) 28 | MULF F2, F5, F4 // real(inp)*real(pre) 29 | MULF F3, F1, F7 // imag(inp)*imag(pre) 30 | SUBF F0, F6 31 | ADDF F7, F4 32 | 33 | MOVF F2, F5 // real(pre) = real(inp) 34 | MOVF F3, F1 // imag(pre) = imag(inp) 35 | 36 | // FastAtan2(y=F6, x=F4) 37 | 38 | ABSF F6, F2 39 | MOVF $1e-20, F0 40 | ADDF F0, F2 41 | WORD $0xeeb54ac0 // vcmpe.f32 s8, #0x0 42 | WORD $0xeef1fa10 // vmrs APSR_nzcv, fpscr 43 | BEQ fmDemod_atan_zero_x 44 | BGT fmDemod_atan_pos_x 45 | ADDF F2, F4, F7 // x + abs(y) 46 | SUBF F4, F2, F4 // abs(y) - x 47 | MOVF $2.356194496154785, F3 // pi * 3/4 48 | B fmDemod_atan_1 49 | 50 | fmDemod_atan_pos_x: 51 | SUBF F2, F4, F7 // x - abs(y) 52 | ADDF F2, F4, F4 // abs(y) + x 53 | MOVF $0.7853981852531433, F3 // pi * 1/4 54 | 55 | fmDemod_atan_1: 56 | DIVF F4, F7, F2 57 | MOVF $0.1963, F7 58 | MULF F2, F7 59 | MULF F2, F7 60 | MOVF $0.9817, F0 61 | SUBF F0, F7 62 | MULF F2, F7 63 | ADDF F3, F7 64 | WORD $0xeeb56ac0 // vcmpe.f32 s12, #0x0 65 | WORD $0xeef1fa10 // vmrs APSR_nzcv, fpscr 66 | WORD $0xbeb17a47 // vneglt.f32 s14, s14 67 | MOVF F7, 0(R3) 68 | B fmDemod_atan_done 69 | 70 | fmDemod_atan_zero_x: 71 | WORD $0xeeb56ac0 // vcmpe.f32 s12, #0x0 72 | WORD $0xeef1fa10 // vmrs APSR_nzcv, fpscr 73 | MOVF.LT $-1.570796326794896557998981734272092580795288085938, F6 74 | MOVF.GT $1.570796326794896557998981734272092580795288085938, F6 75 | MOVF F6, 0(R3) 76 | 77 | fmDemod_atan_done: 78 | 79 | // 80 | 81 | ADD $4, R3 82 | 83 | SUB $1, R2 84 | TEQ $0, R2 85 | BNE fmDemod_loop 86 | 87 | MOVF F5, 0(R0) // real(pre) 88 | MOVF F1, 4(R0) // imag(pre) 89 | 90 | fmDemod_done: 91 | MOVW input_len+8(FP), R0 92 | MOVW R0, output_len+28(FP) 93 | RET 94 | -------------------------------------------------------------------------------- /dsp/demod_arm64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·fmDemodulateAsm(SB), NOSPLIT, $0 4 | B ·fmDemodulate(SB) 5 | -------------------------------------------------------------------------------- /dsp/demod_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | ) 7 | 8 | var demodBenchSamples []complex64 9 | 10 | func init() { 11 | r := rand.New(rand.NewSource(0)) 12 | demodBenchSamples = make([]complex64, benchSize) 13 | for i := 0; i < benchSize; i++ { 14 | demodBenchSamples[i] = complex(r.Float32(), r.Float32()) 15 | } 16 | } 17 | 18 | func TestFMDemodulation(t *testing.T) { 19 | filter := &FMDemodFilter{} 20 | input := []complex64{complex(0.0, 2.0), complex(1.0, 2.0), complex(-3.0, 7.0), complex(4.0, -9.0)} 21 | output := make([]float32, len(input)) 22 | filter.pre = 0.0 23 | if n := fmDemodulateAsm(filter, input, output); n != len(input) { 24 | t.Fatalf("Expected n %d instead of %d", len(input), n) 25 | } 26 | expected := make([]float32, len(input)) 27 | filter.pre = 0.0 28 | if n := fmDemodulate(filter, input, expected); n != len(input) { 29 | t.Fatalf("Expected n %d instead of %d", len(input), n) 30 | } 31 | if len(output) != len(expected) { 32 | t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected) 33 | } 34 | for i := 0; i < len(output); i++ { 35 | if output[i] != expected[i] { 36 | t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected) 37 | } 38 | } 39 | } 40 | 41 | // func TestPolarDiscriminator32(t *testing.T) { 42 | // for i := 0; i < 1000; i++ { 43 | // x := complex(rand.Float32()-0.5, rand.Float32()-0.5) 44 | // y := complex(rand.Float32()-0.5, rand.Float32()-0.5) 45 | // expected := polarDiscriminator32(x, y) 46 | // output := PolarDiscriminator32(x, y) 47 | // if expected != output { 48 | // t.Fatalf("Output differs: %f != %f", output, expected) 49 | // } 50 | // } 51 | // } 52 | 53 | func BenchmarkPolarDiscriminator32(b *testing.B) { 54 | x := complex(float32(1), float32(2)) 55 | y := complex(float32(-3), float32(9)) 56 | for i := 0; i < b.N; i++ { 57 | _ = PolarDiscriminator32(x, y) 58 | } 59 | } 60 | 61 | func BenchmarkFMDemodulation(b *testing.B) { 62 | filter := &FMDemodFilter{} 63 | output := make([]float32, benchSize) 64 | b.SetBytes(benchSize) 65 | b.ResetTimer() 66 | for i := 0; i < b.N; i++ { 67 | _ = fmDemodulateAsm(filter, demodBenchSamples, output) 68 | } 69 | } 70 | 71 | func BenchmarkFMDemodulation_Go(b *testing.B) { 72 | filter := &FMDemodFilter{} 73 | output := make([]float32, benchSize) 74 | b.SetBytes(benchSize) 75 | b.ResetTimer() 76 | for i := 0; i < b.N; i++ { 77 | _ = fmDemodulate(filter, demodBenchSamples, output) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /dsp/downsample.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | type LowPassDownsampleComplexFilter struct { 4 | Downsample int 5 | 6 | now complex64 7 | prevIndex int 8 | } 9 | 10 | func (fi *LowPassDownsampleComplexFilter) Filter(samples []complex64) []complex64 { 11 | return lowPassDownsampleComplexFilterAsm(fi, samples) 12 | } 13 | 14 | func lowPassDownsampleComplexFilterAsm(fi *LowPassDownsampleComplexFilter, samples []complex64) []complex64 15 | 16 | func lowPassDownsampleComplexFilter(fi *LowPassDownsampleComplexFilter, samples []complex64) []complex64 { 17 | i2 := 0 18 | // outputScale := 1.0 / float32(fi.Downsample) 19 | for _, v := range samples { 20 | fi.now += v 21 | fi.prevIndex++ 22 | if fi.prevIndex < fi.Downsample { 23 | continue 24 | } 25 | samples[i2] = fi.now // complex(real(fi.now)*outputScale, imag(fi.now)*outputScale) 26 | fi.prevIndex = 0 27 | fi.now = 0 28 | i2++ 29 | } 30 | return samples[:i2] 31 | } 32 | 33 | type LowPassDownsampleRationalFilter struct { 34 | Fast, Slow int 35 | 36 | sum float32 37 | prevIndex int 38 | } 39 | 40 | func (fi *LowPassDownsampleRationalFilter) Filter(samples []float32) []float32 { 41 | return lowPassDownsampleRationalFilterAsm(fi, samples) 42 | } 43 | 44 | func lowPassDownsampleRationalFilterAsm(fi *LowPassDownsampleRationalFilter, samples []float32) []float32 45 | 46 | func lowPassDownsampleRationalFilter(fi *LowPassDownsampleRationalFilter, samples []float32) []float32 { 47 | i2 := 0 48 | fastSlowRatio := float32(fi.Slow) / float32(fi.Fast) 49 | for _, v := range samples { 50 | fi.sum += v 51 | fi.prevIndex += fi.Slow 52 | if fi.prevIndex < fi.Fast { 53 | continue 54 | } 55 | samples[i2] = fi.sum * fastSlowRatio 56 | i2++ 57 | fi.prevIndex -= fi.Fast 58 | fi.sum = 0.0 59 | } 60 | return samples[:i2] 61 | } 62 | -------------------------------------------------------------------------------- /dsp/downsample_386.s: -------------------------------------------------------------------------------- 1 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), 7, $0 2 | JMP ·lowPassDownsampleComplexFilter(SB) 3 | 4 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), 7, $0 5 | JMP ·lowPassDownsampleRationalFilter(SB) 6 | -------------------------------------------------------------------------------- /dsp/downsample_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), NOSPLIT, $0 4 | JMP ·lowPassDownsampleComplexFilter(SB) 5 | 6 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), NOSPLIT, $0 7 | JMP ·lowPassDownsampleRationalFilter(SB) 8 | -------------------------------------------------------------------------------- /dsp/downsample_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), NOSPLIT, $0 4 | MOVW fi+0(FP), R3 5 | MOVW 0(R3), R8 // fi.Downsample 6 | MOVW 12(R3), R7 // fi.prevIndex 7 | MOVW samples_len+8(FP), R2 8 | MOVW samples_data+4(FP), R5 // input 9 | MOVW R5, R6 // output 10 | MOVF 4(R3), F0 // real(fi.now) 11 | MOVF 8(R3), F1 // imag(fi.now) 12 | B complexLoopStart 13 | 14 | complexLoop: 15 | SUB $1, R2 16 | 17 | complexLoopStart: 18 | TEQ $0, R2 19 | BEQ complexLoopEnd 20 | 21 | // samples[i] 22 | MOVF 0(R5), F2 // real 23 | MOVF 4(R5), F3 // imag 24 | ADD $8, R5 25 | 26 | // fi.now += samples[i] 27 | ADDF F2, F0 28 | ADDF F3, F1 29 | 30 | // fi.prevIndex++ 31 | ADD $1, R7 32 | 33 | // if prevIndex < downsample: continue 34 | CMP R8, R7 35 | BLT complexLoop 36 | 37 | // samples[i2] = fi.now 38 | MOVF F0, 0(R6) 39 | MOVF F1, 4(R6) 40 | ADD $8, R6 41 | 42 | // fi.prevIndex = 0 43 | MOVW $0, R7 44 | 45 | // fi.now = 0.0 46 | MOVF $0.0, F0 47 | MOVF $0.0, F1 48 | 49 | B complexLoop 50 | 51 | complexLoopEnd: 52 | MOVW R7, 12(R3) // fi.prevIndex 53 | MOVF F0, 4(R3) // real(fi.now) 54 | MOVF F1, 8(R3) // imag(fi.now) 55 | 56 | MOVW samples_data+4(FP), R0 57 | SUB R0, R6 58 | MOVW R6>>3, R6 59 | MOVW R6, ret_len+20(FP) 60 | MOVW samples_cap+12(FP), R4 61 | MOVW R4, ret_cap+24(FP) 62 | MOVW samples_data+4(FP), R0 63 | MOVW R0, ret_data+16(FP) 64 | RET 65 | 66 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), NOSPLIT, $0 67 | MOVW fi+0(FP), R4 // fi 68 | 69 | MOVW 4(R4), R7 // fi.Slow 70 | MOVW R7, F4 71 | MOVWF F4, F4 72 | 73 | MOVW 0(R4), R8 // fi.Fast 74 | MOVW R8, F3 75 | MOVWF F3, F3 76 | 77 | DIVF F3, F4 // fi.Slow / fi.Fast 78 | 79 | MOVF 8(R4), F3 // fi.sum 80 | MOVW 12(R4), R2 // fi.prevIndex 81 | 82 | MOVW samples_ptr+4(FP), R5 // input 83 | MOVW R5, R6 // output 84 | MOVW samples_len+8(FP), R3 85 | ADD R3<<2, R5, R3 // end of input 86 | 87 | rationalLoop: 88 | CMP R5, R3 89 | BLE rationalLoopEnd 90 | 91 | MOVF (R5), F0 // samples[i] 92 | ADD $4, R5 93 | 94 | ADDF F0, F3 // fi.sum += samples[i] 95 | ADD R7, R2 // fi.prevIndex += fi.Slow 96 | 97 | CMP R8, R2 98 | BLT rationalLoop 99 | 100 | MULF F4, F3 // fi.sum * (Slow/Fast) 101 | 102 | MOVF F3, (R6) 103 | ADD $4, R6 104 | 105 | SUB R8, R2 // fi.prevIndex -= fi.Fast 106 | MOVF $0.0, F3 // fi.sum = 0.0 107 | 108 | B rationalLoop 109 | 110 | rationalLoopEnd: 111 | MOVW R2, 12(R4) // fi.prevIndex 112 | MOVF F3, 8(R4) // fi.sum 113 | 114 | MOVW samples_ptr+4(FP), R0 115 | SUB R0, R6 116 | MOVW R6>>2, R6 117 | MOVW R6, res_len+20(FP) 118 | MOVW samples_cap+12(FP), R4 119 | MOVW R4, res_cap+24(FP) 120 | MOVW samples_ptr+4(FP), R0 121 | MOVW R0, res_ptr+16(FP) 122 | RET 123 | -------------------------------------------------------------------------------- /dsp/downsample_arm64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), NOSPLIT, $0 4 | B ·lowPassDownsampleComplexFilter(SB) 5 | 6 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), NOSPLIT, $0 7 | B ·lowPassDownsampleRationalFilter(SB) 8 | -------------------------------------------------------------------------------- /dsp/downsample_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "testing" 4 | 5 | func TestLowPassDownsampleComplexFilter(t *testing.T) { 6 | filter := &LowPassDownsampleComplexFilter{Downsample: 2} 7 | input := []complex64{complex(0.0, 2.0), complex(1.0, 2.0), complex(-3.0, 7.0), complex(4.0, -9.0)} 8 | 9 | output := make([]complex64, 256) 10 | copy(output, input) 11 | filter.now = 0.0 12 | filter.prevIndex = 0 13 | output = lowPassDownsampleComplexFilterAsm(filter, output) 14 | 15 | expected := make([]complex64, 256) 16 | copy(expected, input) 17 | filter.now = 0.0 18 | filter.prevIndex = 0 19 | expected = lowPassDownsampleComplexFilter(filter, expected) 20 | 21 | if len(output) != len(expected) { 22 | t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected) 23 | } 24 | for i := 0; i < len(output); i++ { 25 | if output[i] != expected[i] { 26 | t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected) 27 | } 28 | } 29 | } 30 | 31 | func TestLowPassDownsampleRationalFilter(t *testing.T) { 32 | filter := &LowPassDownsampleRationalFilter{Fast: 3, Slow: 2} 33 | input := make([]float32, 256) 34 | for i := 0; i < len(input); i++ { 35 | input[i] = float32(i - 128) 36 | } 37 | 38 | output := make([]float32, 256) 39 | copy(output, input) 40 | filter.prevIndex = 0 41 | filter.sum = 0.0 42 | output = lowPassDownsampleRationalFilterAsm(filter, output) 43 | 44 | expected := make([]float32, 256) 45 | copy(expected, input) 46 | filter.prevIndex = 0 47 | filter.sum = 0.0 48 | expected = lowPassDownsampleRationalFilter(filter, expected) 49 | 50 | if len(output) != len(expected) { 51 | t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected) 52 | } 53 | for i := 0; i < len(output); i++ { 54 | if output[i] != expected[i] { 55 | t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected) 56 | } 57 | } 58 | } 59 | 60 | func BenchmarkLowPassDownsampleComplexFilter(b *testing.B) { 61 | filter := &LowPassDownsampleComplexFilter{Downsample: 2} 62 | input := make([]complex64, 256) 63 | for i := 0; i < 256; i++ { 64 | input[i] = complex(float32(i)-128.0, -(float32(i) - 128.0)) 65 | } 66 | for i := 0; i < b.N; i++ { 67 | _ = lowPassDownsampleComplexFilterAsm(filter, input) 68 | } 69 | } 70 | 71 | func BenchmarkLowPassDownsampleComplexFilter_Go(b *testing.B) { 72 | filter := &LowPassDownsampleComplexFilter{Downsample: 2} 73 | input := make([]complex64, 256) 74 | for i := 0; i < 256; i++ { 75 | input[i] = complex(float32(i)-128.0, -(float32(i) - 128.0)) 76 | } 77 | for i := 0; i < b.N; i++ { 78 | _ = lowPassDownsampleComplexFilter(filter, input) 79 | } 80 | } 81 | 82 | func BenchmarkLowPassDownsampleRationalFilter(b *testing.B) { 83 | filter := &LowPassDownsampleRationalFilter{Fast: 3, Slow: 2} 84 | input := make([]float32, 256) 85 | for i := 0; i < 256; i++ { 86 | input[i] = float32(i) - 128.0 87 | } 88 | for i := 0; i < b.N; i++ { 89 | _ = lowPassDownsampleRationalFilterAsm(filter, input) 90 | } 91 | } 92 | 93 | func BenchmarkLowPassDownsampleRationalFilter_Go(b *testing.B) { 94 | filter := &LowPassDownsampleRationalFilter{Fast: 3, Slow: 2} 95 | input := make([]float32, 256) 96 | for i := 0; i < 256; i++ { 97 | input[i] = float32(i) - 128.0 98 | } 99 | for i := 0; i < b.N; i++ { 100 | _ = lowPassDownsampleRationalFilter(filter, input) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /dsp/dtmf/dtmf.go: -------------------------------------------------------------------------------- 1 | package dtmf 2 | 3 | import ( 4 | "github.com/samuel/go-dsp/dsp" 5 | ) 6 | 7 | var ( 8 | Keypad = []rune{ 9 | '1', '2', '3', 'A', 10 | '4', '5', '6', 'B', 11 | '7', '8', '9', 'C', 12 | '*', '0', '#', 'D', 13 | } 14 | StdLowFreq = []uint64{697, 770, 852, 941} 15 | StdHighFreq = []uint64{1209, 1336, 1477, 1633} 16 | ) 17 | 18 | type DTMF struct { 19 | lowFreq *dsp.Goertzel32 20 | highFreq *dsp.Goertzel32 21 | nHigh int 22 | blockSize int 23 | w []float32 24 | } 25 | 26 | func New(lowFreq, highFreq []uint64, sampleRate, blockSize int, windowFunc func([]float32)) *DTMF { 27 | w := make([]float32, blockSize) 28 | if windowFunc != nil { 29 | windowFunc(w) 30 | } else { 31 | dsp.HammingWindowF32(w) 32 | } 33 | return &DTMF{ 34 | lowFreq: dsp.NewGoertzel32(lowFreq, sampleRate, blockSize), 35 | highFreq: dsp.NewGoertzel32(highFreq, sampleRate, blockSize), 36 | nHigh: len(highFreq), 37 | blockSize: blockSize, 38 | w: w, 39 | } 40 | } 41 | 42 | func NewStandard(sampleRate, blockSize int) *DTMF { 43 | return New(StdLowFreq, StdHighFreq, sampleRate, blockSize, dsp.HammingWindowF32) 44 | } 45 | 46 | // Return key number (lowFreqIndex * numHighFreq + highFreqIndex) and minimum magnitude 47 | func (d *DTMF) Feed(samples []float32) (int, float32) { 48 | if len(samples) > d.blockSize { 49 | samples = samples[:d.blockSize] 50 | } 51 | for i, s := range samples { 52 | samples[i] = s * d.w[i] 53 | } 54 | d.lowFreq.Reset() 55 | d.highFreq.Reset() 56 | d.lowFreq.Feed(samples) 57 | d.highFreq.Feed(samples) 58 | row, thresh1 := max(d.lowFreq.Magnitude()) 59 | col, thresh2 := max(d.highFreq.Magnitude()) 60 | if thresh2 < thresh1 { 61 | thresh1 = thresh2 62 | } 63 | return row*d.nHigh + col, thresh1 64 | } 65 | 66 | func max(val []float32) (int, float32) { 67 | lrg := float32(0.0) 68 | idx := 0 69 | for i, f := range val { 70 | if f > lrg { 71 | lrg = f 72 | idx = i 73 | } 74 | } 75 | return idx, lrg 76 | } 77 | -------------------------------------------------------------------------------- /dsp/filter.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | type IIRFilter struct { 4 | bCoef, aCoef []float64 5 | pIn, pOut []float64 6 | } 7 | 8 | type ComplexIIRFilter32 struct { 9 | bCoef, aCoef []complex64 10 | pIn, pOut []complex64 11 | } 12 | 13 | type ComplexIIRFilter struct { 14 | bCoef, aCoef []complex128 15 | pIn, pOut []complex128 16 | } 17 | 18 | func NewIIRFilter(bCoef, aCoef []float64) *IIRFilter { 19 | if len(bCoef) != len(aCoef) || len(bCoef) == 0 { 20 | panic("IIR filter must have len(b)==len(a) and len(b) > 0") 21 | } 22 | for i, c := range bCoef { 23 | bCoef[i] = c / aCoef[0] 24 | } 25 | for i, c := range aCoef[1:] { 26 | aCoef[i+1] = c / aCoef[0] 27 | } 28 | return &IIRFilter{ 29 | bCoef: bCoef, 30 | aCoef: aCoef, 31 | pIn: make([]float64, len(bCoef)-1), 32 | pOut: make([]float64, len(bCoef)-1), 33 | } 34 | } 35 | 36 | func NewComplexIIRFilter32(bCoef, aCoef []float32) *ComplexIIRFilter32 { 37 | if len(bCoef) != len(aCoef) || len(bCoef) == 0 { 38 | panic("IIR filter must have len(b)==len(a) and len(b) > 0") 39 | } 40 | for i, c := range bCoef { 41 | bCoef[i] = c / aCoef[0] 42 | } 43 | for i, c := range aCoef[1:] { 44 | aCoef[i+1] = c / aCoef[0] 45 | } 46 | return &ComplexIIRFilter32{ 47 | bCoef: rtoc32(bCoef), 48 | aCoef: rtoc32(aCoef), 49 | pIn: make([]complex64, len(bCoef)-1), 50 | pOut: make([]complex64, len(bCoef)-1), 51 | } 52 | } 53 | 54 | func NewComplexIIRFilter(bCoef, aCoef []float64) *ComplexIIRFilter { 55 | if len(bCoef) != len(aCoef) || len(bCoef) == 0 { 56 | panic("IIR filter must have len(b)==len(a) and len(b) > 0") 57 | } 58 | for i, c := range bCoef { 59 | bCoef[i] = c / aCoef[0] 60 | } 61 | for i, c := range aCoef[1:] { 62 | aCoef[i+1] = c / aCoef[0] 63 | } 64 | return &ComplexIIRFilter{ 65 | bCoef: rtoc(bCoef), 66 | aCoef: rtoc(aCoef), 67 | pIn: make([]complex128, len(bCoef)-1), 68 | pOut: make([]complex128, len(bCoef)-1), 69 | } 70 | } 71 | 72 | func (f *IIRFilter) Filter(input, output []float64) { 73 | for i, s := range input { 74 | sum := f.bCoef[0] * s 75 | for j, p := range f.pIn { 76 | sum += f.bCoef[j+1]*p - f.aCoef[j+1]*f.pOut[j] 77 | } 78 | for i := len(f.pIn) - 1; i > 0; i-- { 79 | f.pIn[i] = f.pIn[i-1] 80 | f.pOut[i] = f.pOut[i-1] 81 | } 82 | f.pIn[0] = s 83 | f.pOut[0] = sum 84 | output[i] = sum 85 | } 86 | } 87 | 88 | func (f *ComplexIIRFilter32) Filter(input, output []complex64) { 89 | for i, s := range input { 90 | sum := f.bCoef[0] * s 91 | for j, p := range f.pIn { 92 | sum += f.bCoef[j+1]*p - f.aCoef[j+1]*f.pOut[j] 93 | } 94 | for i := len(f.pIn) - 1; i > 0; i-- { 95 | f.pIn[i] = f.pIn[i-1] 96 | f.pOut[i] = f.pOut[i-1] 97 | } 98 | f.pIn[0] = s 99 | f.pOut[0] = sum 100 | output[i] = sum 101 | } 102 | } 103 | 104 | func (f *ComplexIIRFilter) Filter(input, output []complex128) { 105 | for i, s := range input { 106 | sum := f.bCoef[0] * s 107 | for j, p := range f.pIn { 108 | sum += f.bCoef[j+1]*p - f.aCoef[j+1]*f.pOut[j] 109 | } 110 | for i := len(f.pIn) - 1; i > 0; i-- { 111 | f.pIn[i] = f.pIn[i-1] 112 | f.pOut[i] = f.pOut[i-1] 113 | } 114 | f.pIn[0] = s 115 | f.pOut[0] = sum 116 | output[i] = sum 117 | } 118 | } 119 | 120 | type DCFilter struct { 121 | a float64 122 | w float64 123 | } 124 | 125 | func NewDCFilter(a float64) *DCFilter { 126 | return &DCFilter{a: a} 127 | } 128 | 129 | func (f *DCFilter) Filter(input, output []float64) { 130 | lw := f.w 131 | for i, x := range input { 132 | w := x + f.a*lw 133 | output[i] = w - lw 134 | lw = w 135 | } 136 | f.w = lw 137 | } 138 | 139 | func (f *DCFilter) FilterOne(x float64) float64 { 140 | w := x + f.a*f.w 141 | y := w - f.w 142 | f.w = w 143 | return y 144 | } 145 | 146 | type DCFilter32 struct { 147 | a float32 148 | w float32 149 | } 150 | 151 | func NewDCFilter32(a float32) *DCFilter32 { 152 | return &DCFilter32{a: a} 153 | } 154 | 155 | func (f *DCFilter32) Filter(input, output []float32) { 156 | lw := f.w 157 | for i, x := range input { 158 | w := x + f.a*lw 159 | output[i] = w - lw 160 | lw = w 161 | } 162 | f.w = lw 163 | } 164 | 165 | func (f *DCFilter32) FilterOne(x float32) float32 { 166 | w := x + f.a*f.w 167 | y := w - f.w 168 | f.w = w 169 | return y 170 | } 171 | 172 | // TODO: implement https://www.researchgate.net/publication/261775781_DC_Blocker_Algorithms -- https://www.dsprelated.com/showarticle/58.php 173 | // https://github.com/gnuradio/gnuradio/blob/master/gr-filter/include/gnuradio/filter/dc_blocker_ff.h 174 | // https://github.com/ghostop14/gr-correctiq 175 | -------------------------------------------------------------------------------- /dsp/fuzz.go: -------------------------------------------------------------------------------- 1 | // +build gofuzz 2 | 3 | package dsp 4 | 5 | func Fuzz(data []byte) int { 6 | data = data[:len(data)/2*2] 7 | output := make([]float32, len(data)/2) 8 | expected := make([]float32, len(data)/2) 9 | I16bleToF32(data, output, 2.0) 10 | i16bleToF32(data, expected, 2.0) 11 | for i, v := range expected { 12 | if output[i] != v { 13 | return 0 14 | } 15 | } 16 | return 1 17 | } 18 | -------------------------------------------------------------------------------- /dsp/goertzel.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "math" 4 | 5 | type Goertzel struct { 6 | freq []*goertzel 7 | mag []float64 8 | cplx []complex128 9 | } 10 | 11 | type Goertzel32 struct { 12 | freq []*goertzel 13 | mag []float32 14 | cplx []complex64 15 | } 16 | 17 | type ComplexGoertzel struct { 18 | freq []*goertzel 19 | mag []float64 20 | cplx []complex128 21 | } 22 | 23 | type goertzel struct { 24 | coeff float64 25 | cos, sin float64 26 | q1, q2 float64 27 | q1i, q2i float64 28 | } 29 | 30 | func NewGoertzel32(targetFreqs []uint64, sampleRate, blockSize int) *Goertzel32 { 31 | freq := make([]*goertzel, len(targetFreqs)) 32 | for i, f := range targetFreqs { 33 | // k is the closest bucket for the frequency 34 | k := uint64(0.5 + float64(uint64(blockSize)*f)/float64(sampleRate)) 35 | w := 2.0 * math.Pi * float64(k) / float64(blockSize) 36 | sin := math.Sin(w) 37 | cos := math.Cos(w) 38 | freq[i] = &goertzel{ 39 | coeff: 2.0 * cos, 40 | cos: cos, 41 | sin: sin, 42 | } 43 | } 44 | return &Goertzel32{ 45 | freq: freq, 46 | mag: make([]float32, len(targetFreqs)), 47 | cplx: make([]complex64, len(targetFreqs)), 48 | } 49 | } 50 | 51 | func (g *Goertzel32) Reset() { 52 | for _, freq := range g.freq { 53 | freq.q1 = 0.0 54 | freq.q2 = 0.0 55 | } 56 | } 57 | 58 | func (g *Goertzel32) Feed(samples []float32) { 59 | for _, samp := range samples { 60 | for _, freq := range g.freq { 61 | q0 := freq.coeff*freq.q1 - freq.q2 + float64(samp) 62 | freq.q2 = freq.q1 63 | freq.q1 = q0 64 | } 65 | } 66 | } 67 | 68 | func (g *Goertzel32) Magnitude() []float32 { 69 | for i, freq := range g.freq { 70 | g.mag[i] = float32(freq.q1*freq.q1 + freq.q2*freq.q2 - freq.q1*freq.q2*freq.coeff) 71 | } 72 | return g.mag 73 | } 74 | 75 | func (g *Goertzel32) Complex() []complex64 { 76 | for i, freq := range g.freq { 77 | g.cplx[i] = complex(float32(freq.q1*freq.cos-freq.q2), float32(freq.q1*freq.sin)) 78 | } 79 | return g.cplx 80 | } 81 | 82 | func NewGoertzel(targetFreqs []uint64, sampleRate, blockSize int) *Goertzel { 83 | freq := make([]*goertzel, len(targetFreqs)) 84 | for i, f := range targetFreqs { 85 | // k is the closest bucket for the frequency 86 | k := uint64(0.5 + float64(uint64(blockSize)*f)/float64(sampleRate)) 87 | w := 2.0 * math.Pi * float64(k) / float64(blockSize) 88 | sin := math.Sin(w) 89 | cos := math.Cos(w) 90 | freq[i] = &goertzel{ 91 | coeff: 2.0 * cos, 92 | cos: cos, 93 | sin: sin, 94 | } 95 | } 96 | return &Goertzel{ 97 | freq: freq, 98 | mag: make([]float64, len(targetFreqs)), 99 | cplx: make([]complex128, len(targetFreqs)), 100 | } 101 | } 102 | 103 | func (g *Goertzel) Reset() { 104 | for _, freq := range g.freq { 105 | freq.q1 = 0.0 106 | freq.q2 = 0.0 107 | } 108 | } 109 | 110 | func (g *Goertzel) Feed(samples []float64) { 111 | for _, samp := range samples { 112 | for _, freq := range g.freq { 113 | q0 := freq.coeff*freq.q1 - freq.q2 + samp 114 | freq.q2 = freq.q1 115 | freq.q1 = q0 116 | } 117 | } 118 | } 119 | 120 | func (g *Goertzel) Magnitude() []float64 { 121 | for i, freq := range g.freq { 122 | g.mag[i] = freq.q1*freq.q1 + freq.q2*freq.q2 - freq.q1*freq.q2*freq.coeff 123 | } 124 | return g.mag 125 | } 126 | 127 | func (g *Goertzel) Complex() []complex128 { 128 | for i, freq := range g.freq { 129 | g.cplx[i] = complex(freq.q1*freq.cos-freq.q2, freq.q1*freq.sin) 130 | } 131 | return g.cplx 132 | } 133 | 134 | func NewComplexGoertzel(targetFreqs []uint64, sampleRate, blockSize int) *ComplexGoertzel { 135 | freq := make([]*goertzel, len(targetFreqs)) 136 | for i, f := range targetFreqs { 137 | k := uint64(0.5 + float64(uint64(blockSize)*f)/float64(sampleRate)) 138 | w := 2.0 * math.Pi * float64(k) / float64(blockSize) 139 | sin := math.Sin(w) 140 | cos := math.Cos(w) 141 | freq[i] = &goertzel{ 142 | coeff: 2.0 * cos, 143 | cos: cos, 144 | sin: sin, 145 | } 146 | } 147 | return &ComplexGoertzel{ 148 | freq: freq, 149 | mag: make([]float64, len(targetFreqs)), 150 | cplx: make([]complex128, len(targetFreqs)), 151 | } 152 | } 153 | 154 | func (g *ComplexGoertzel) Reset() { 155 | for _, freq := range g.freq { 156 | freq.q1 = 0.0 157 | freq.q2 = 0.0 158 | freq.q1i = 0.0 159 | freq.q2i = 0.0 160 | } 161 | } 162 | 163 | func (g *ComplexGoertzel) Feed(samples []complex128) { 164 | for _, samp := range samples { 165 | for _, freq := range g.freq { 166 | q0 := freq.coeff*freq.q1 - freq.q2 + real(samp) 167 | freq.q2 = freq.q1 168 | freq.q1 = q0 169 | q0 = freq.coeff*freq.q1i - freq.q2i + imag(samp) 170 | freq.q2i = freq.q1i 171 | freq.q1i = q0 172 | } 173 | } 174 | } 175 | 176 | func (g *ComplexGoertzel) Magnitude() []float64 { 177 | for i, f := range g.freq { 178 | re := f.q1*f.cos - f.q2 - f.q1i*f.sin 179 | im := f.q1*f.sin + f.q1i*f.cos - f.q2i 180 | // q1*cos - q2 - q1i*sin 181 | g.mag[i] = re*re + im*im 182 | } 183 | return g.mag 184 | } 185 | 186 | func (g *ComplexGoertzel) Complex() []complex128 { 187 | for i, f := range g.freq { 188 | g.cplx[i] = complex( 189 | f.q1*f.cos-f.q2-f.q1i*f.sin, 190 | f.q1*f.sin+f.q1i*f.cos-f.q2i, 191 | ) 192 | } 193 | return g.cplx 194 | } 195 | 196 | // Sliding Goertzel implements a sliding version of the Goertzel filter. 197 | // 198 | // x(n) y(n) 199 | // ──────┬──────(+)──(+)────────────────┬────────(+)─────▶ 200 | // ▼ ▲ ▲ ▼ ▼ ▲ 201 | // ┌───┐ │ │ ╲ ┌───┐ │ 202 | // │z⁻ⁿ│ │ │ ╲ │z⁻ⁿ│ │ 203 | // └─┬─┘ │ │ ╲ └─┬─┘ │ 204 | // └─▶(x)──┘ │ ╲ │ │ 205 | // │ (x)◀───────●───────▶(x) 206 | // │ ▲ │ ▲ 207 | // │ │ ┌─▼─┐ │ 208 | // │ 2cos(2πk/N) │z⁻ⁿ│ -e^(-j2πk/N) 209 | // │ └─┬─┘ 210 | // └──────(x)◀───────┘ 211 | // ▲ 212 | // │ 213 | // -1 214 | // TODO 215 | // type SlidingGoertzel struct { 216 | // } 217 | // func NewSlidingGoertzel(k, n int) *SlidingGoertzel { 218 | // return &SlidingGoertzel{} 219 | // } 220 | -------------------------------------------------------------------------------- /dsp/goertzel_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "math/cmplx" 6 | "testing" 7 | ) 8 | 9 | func TestGoertzel(t *testing.T) { 10 | samplerate := 1024 11 | blocksize := 1024 12 | freq := 128 13 | samples := make([]float64, blocksize) 14 | w := 2 * math.Pi / float64(samplerate) 15 | for i := 0; i < blocksize; i++ { 16 | samples[i] = math.Sin(float64(i) * float64(freq) * w) 17 | } 18 | g := NewGoertzel([]uint64{128, 129}, samplerate, blocksize) 19 | g.Feed(samples) 20 | m := g.Magnitude() 21 | if e := math.Pow(float64(blocksize)/2, 2); !approxEqual(m[0], e, 1e-8) { 22 | t.Errorf("Goertzel magnitude = %f. Want %f", m[0], e) 23 | } 24 | if !approxEqual(float64(m[1]), 0.0, 1e-10) { 25 | t.Errorf("Foertzel magnitude = %f. Want 0.0", m[1]) 26 | } 27 | c := g.Complex() 28 | if e, m := math.Sqrt(math.Pow(float64(blocksize)/2, 2)), cmplx.Abs(complex128(c[0])); !approxEqual(m, e, 1e-8) { 29 | t.Errorf("Goertzel magnitude = %f. Want %f", m, e) 30 | } 31 | if e, p := -math.Pi/2, cmplx.Phase(complex128(c[0])); !approxEqual(p, e, 1e-12) { 32 | t.Errorf("Goertzel phase = %f. Want %f", p, e) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /dsp/iaca.h: -------------------------------------------------------------------------------- 1 | #define IACA_SSC_MARK(MARK_ID) \ 2 | BYTE $0xBB; BYTE MARK_ID; BYTE $0x00; BYTE $0x00; BYTE $0x00 \ 3 | BYTE $0x64; BYTE $0x67; BYTE $0x90 4 | #define IACA_UD_BYTES BYTE $0x0F; BYTE $0x0B 5 | #define IACA_START IACA_UD_BYTES; IACA_SSC_MARK($111) 6 | #define IACA_END IACA_SSC_MARK($222); IACA_UD_BYTES 7 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package cpu implements processor feature detection 6 | // used by the Go standard library. 7 | package cpu 8 | 9 | // DebugOptions is set to true by the runtime if the OS supports reading 10 | // GODEBUG early in runtime startup. 11 | // This should not be changed after it is initialized. 12 | var DebugOptions bool 13 | 14 | // CacheLinePad is used to pad structs to avoid false sharing. 15 | type CacheLinePad struct{ _ [CacheLinePadSize]byte } 16 | 17 | // CacheLineSize is the CPU's assumed cache line size. 18 | // There is currently no runtime detection of the real cache line size 19 | // so we use the constant per GOARCH CacheLinePadSize as an approximation. 20 | var CacheLineSize uintptr = CacheLinePadSize 21 | 22 | // The booleans in X86 contain the correspondingly named cpuid feature bit. 23 | // HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers 24 | // in addition to the cpuid feature bit being set. 25 | // The struct is padded to avoid false sharing. 26 | var X86 struct { 27 | _ CacheLinePad 28 | HasAES bool 29 | HasADX bool 30 | HasAVX bool 31 | HasAVX2 bool 32 | HasBMI1 bool 33 | HasBMI2 bool 34 | HasERMS bool 35 | HasFMA bool 36 | HasOSXSAVE bool 37 | HasPCLMULQDQ bool 38 | HasPOPCNT bool 39 | HasSSE2 bool 40 | HasSSE3 bool 41 | HasSSSE3 bool 42 | HasSSE41 bool 43 | HasSSE42 bool 44 | _ CacheLinePad 45 | } 46 | 47 | // The booleans in ARM contain the correspondingly named cpu feature bit. 48 | // The struct is padded to avoid false sharing. 49 | var ARM struct { 50 | _ CacheLinePad 51 | HasVFPv4 bool 52 | HasIDIVA bool 53 | _ CacheLinePad 54 | } 55 | 56 | // The booleans in ARM64 contain the correspondingly named cpu feature bit. 57 | // The struct is padded to avoid false sharing. 58 | var ARM64 struct { 59 | _ CacheLinePad 60 | HasAES bool 61 | HasPMULL bool 62 | HasSHA1 bool 63 | HasSHA2 bool 64 | HasCRC32 bool 65 | HasATOMICS bool 66 | HasCPUID bool 67 | IsNeoverseN1 bool 68 | IsZeus bool 69 | _ CacheLinePad 70 | } 71 | 72 | var MIPS64X struct { 73 | _ CacheLinePad 74 | HasMSA bool // MIPS SIMD architecture 75 | _ CacheLinePad 76 | } 77 | 78 | // For ppc64(le), it is safe to check only for ISA level starting on ISA v3.00, 79 | // since there are no optional categories. There are some exceptions that also 80 | // require kernel support to work (darn, scv), so there are feature bits for 81 | // those as well. The minimum processor requirement is POWER8 (ISA 2.07). 82 | // The struct is padded to avoid false sharing. 83 | var PPC64 struct { 84 | _ CacheLinePad 85 | HasDARN bool // Hardware random number generator (requires kernel enablement) 86 | HasSCV bool // Syscall vectored (requires kernel enablement) 87 | IsPOWER8 bool // ISA v2.07 (POWER8) 88 | IsPOWER9 bool // ISA v3.00 (POWER9) 89 | _ CacheLinePad 90 | } 91 | 92 | var S390X struct { 93 | _ CacheLinePad 94 | HasZARCH bool // z architecture mode is active [mandatory] 95 | HasSTFLE bool // store facility list extended [mandatory] 96 | HasLDISP bool // long (20-bit) displacements [mandatory] 97 | HasEIMM bool // 32-bit immediates [mandatory] 98 | HasDFP bool // decimal floating point 99 | HasETF3EH bool // ETF-3 enhanced 100 | HasMSA bool // message security assist (CPACF) 101 | HasAES bool // KM-AES{128,192,256} functions 102 | HasAESCBC bool // KMC-AES{128,192,256} functions 103 | HasAESCTR bool // KMCTR-AES{128,192,256} functions 104 | HasAESGCM bool // KMA-GCM-AES{128,192,256} functions 105 | HasGHASH bool // KIMD-GHASH function 106 | HasSHA1 bool // K{I,L}MD-SHA-1 functions 107 | HasSHA256 bool // K{I,L}MD-SHA-256 functions 108 | HasSHA512 bool // K{I,L}MD-SHA-512 functions 109 | HasSHA3 bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions 110 | HasVX bool // vector facility. Note: the runtime sets this when it processes auxv records. 111 | HasVXE bool // vector-enhancements facility 1 112 | HasKDSA bool // elliptic curve functions 113 | HasECDSA bool // NIST curves 114 | HasEDDSA bool // Edwards curves 115 | _ CacheLinePad 116 | } 117 | 118 | // Initialize examines the processor and sets the relevant variables above. 119 | // This is called by the runtime package early in program initialization, 120 | // before normal init functions are run. env is set by runtime if the OS supports 121 | // cpu feature options in GODEBUG. 122 | func Initialize(env string) { 123 | doinit() 124 | processOptions(env) 125 | } 126 | 127 | // options contains the cpu debug options that can be used in GODEBUG. 128 | // Options are arch dependent and are added by the arch specific doinit functions. 129 | // Features that are mandatory for the specific GOARCH should not be added to options 130 | // (e.g. SSE2 on amd64). 131 | var options []option 132 | 133 | // Option names should be lower case. e.g. avx instead of AVX. 134 | type option struct { 135 | Name string 136 | Feature *bool 137 | Specified bool // whether feature value was specified in GODEBUG 138 | Enable bool // whether feature should be enabled 139 | Required bool // whether feature is mandatory and can not be disabled 140 | } 141 | 142 | // processOptions enables or disables CPU feature values based on the parsed env string. 143 | // The env string is expected to be of the form cpu.feature1=value1,cpu.feature2=value2... 144 | // where feature names is one of the architecture specific list stored in the 145 | // cpu packages options variable and values are either 'on' or 'off'. 146 | // If env contains cpu.all=off then all cpu features referenced through the options 147 | // variable are disabled. Other feature names and values result in warning messages. 148 | func processOptions(env string) { 149 | field: 150 | for env != "" { 151 | field := "" 152 | i := indexByte(env, ',') 153 | if i < 0 { 154 | field, env = env, "" 155 | } else { 156 | field, env = env[:i], env[i+1:] 157 | } 158 | if len(field) < 4 || field[:4] != "cpu." { 159 | continue 160 | } 161 | i = indexByte(field, '=') 162 | if i < 0 { 163 | print("GODEBUG: no value specified for \"", field, "\"\n") 164 | continue 165 | } 166 | key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on" 167 | 168 | var enable bool 169 | switch value { 170 | case "on": 171 | enable = true 172 | case "off": 173 | enable = false 174 | default: 175 | print("GODEBUG: value \"", value, "\" not supported for cpu option \"", key, "\"\n") 176 | continue field 177 | } 178 | 179 | if key == "all" { 180 | for i := range options { 181 | options[i].Specified = true 182 | options[i].Enable = enable || options[i].Required 183 | } 184 | continue field 185 | } 186 | 187 | for i := range options { 188 | if options[i].Name == key { 189 | options[i].Specified = true 190 | options[i].Enable = enable 191 | continue field 192 | } 193 | } 194 | 195 | print("GODEBUG: unknown cpu feature \"", key, "\"\n") 196 | } 197 | 198 | for _, o := range options { 199 | if !o.Specified { 200 | continue 201 | } 202 | 203 | if o.Enable && !*o.Feature { 204 | print("GODEBUG: can not enable \"", o.Name, "\", missing CPU support\n") 205 | continue 206 | } 207 | 208 | if !o.Enable && o.Required { 209 | print("GODEBUG: can not disable \"", o.Name, "\", required CPU feature\n") 210 | continue 211 | } 212 | 213 | *o.Feature = o.Enable 214 | } 215 | } 216 | 217 | // indexByte returns the index of the first instance of c in s, 218 | // or -1 if c is not present in s. 219 | func indexByte(s string, c byte) int { 220 | for i := 0; i < len(s); i++ { 221 | if s[i] == c { 222 | return i 223 | } 224 | } 225 | return -1 226 | } 227 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu.s: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // This assembly file exists to allow internal/cpu to call 6 | // non-exported runtime functions that use "go:linkname". -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_386.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const GOARCH = "386" 8 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_amd64.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const GOARCH = "amd64" 8 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 32 8 | 9 | // arm doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2. 10 | // These are initialized by archauxv() and should not be changed after they are 11 | // initialized. 12 | var HWCap uint 13 | var HWCap2 uint 14 | 15 | // HWCAP/HWCAP2 bits. These are exposed by Linux and FreeBSD. 16 | const ( 17 | hwcap_VFPv4 = 1 << 16 18 | hwcap_IDIVA = 1 << 17 19 | ) 20 | 21 | func doinit() { 22 | options = []option{ 23 | {Name: "vfpv4", Feature: &ARM.HasVFPv4}, 24 | {Name: "idiva", Feature: &ARM.HasIDIVA}, 25 | } 26 | 27 | // HWCAP feature bits 28 | ARM.HasVFPv4 = isSet(HWCap, hwcap_VFPv4) 29 | ARM.HasIDIVA = isSet(HWCap, hwcap_IDIVA) 30 | } 31 | 32 | func isSet(hwc uint, value uint) bool { 33 | return hwc&value != 0 34 | } 35 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 64 8 | 9 | func doinit() { 10 | options = []option{ 11 | {Name: "aes", Feature: &ARM64.HasAES}, 12 | {Name: "pmull", Feature: &ARM64.HasPMULL}, 13 | {Name: "sha1", Feature: &ARM64.HasSHA1}, 14 | {Name: "sha2", Feature: &ARM64.HasSHA2}, 15 | {Name: "crc32", Feature: &ARM64.HasCRC32}, 16 | {Name: "atomics", Feature: &ARM64.HasATOMICS}, 17 | {Name: "cpuid", Feature: &ARM64.HasCPUID}, 18 | {Name: "isNeoverseN1", Feature: &ARM64.IsNeoverseN1}, 19 | {Name: "isZeus", Feature: &ARM64.IsZeus}, 20 | } 21 | 22 | // arm64 uses different ways to detect CPU features at runtime depending on the operating system. 23 | osInit() 24 | } 25 | 26 | func getisar0() uint64 27 | 28 | func getMIDR() uint64 29 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64.s: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "textflag.h" 6 | 7 | // func getisar0() uint64 8 | TEXT ·getisar0(SB),NOSPLIT,$0 9 | // get Instruction Set Attributes 0 into R0 10 | MRS ID_AA64ISAR0_EL1, R0 11 | MOVD R0, ret+0(FP) 12 | RET 13 | 14 | // func getMIDR() uint64 15 | TEXT ·getMIDR(SB), NOSPLIT, $0-8 16 | MRS MIDR_EL1, R0 17 | MOVD R0, ret+0(FP) 18 | RET 19 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64_android.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build arm64 6 | // +build arm64 7 | 8 | package cpu 9 | 10 | func osInit() { 11 | hwcapInit("android") 12 | } 13 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64_darwin.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build arm64 && darwin && !ios 6 | // +build arm64,darwin,!ios 7 | 8 | package cpu 9 | 10 | func osInit() { 11 | ARM64.HasATOMICS = sysctlEnabled([]byte("hw.optional.armv8_1_atomics\x00")) 12 | ARM64.HasCRC32 = sysctlEnabled([]byte("hw.optional.armv8_crc32\x00")) 13 | 14 | // There are no hw.optional sysctl values for the below features on Mac OS 11.0 15 | // to detect their supported state dynamically. Assume the CPU features that 16 | // Apple Silicon M1 supports to be available as a minimal set of features 17 | // to all Go programs running on darwin/arm64. 18 | ARM64.HasAES = true 19 | ARM64.HasPMULL = true 20 | ARM64.HasSHA1 = true 21 | ARM64.HasSHA2 = true 22 | } 23 | 24 | //go:noescape 25 | func getsysctlbyname(name []byte) (int32, int32) 26 | 27 | func sysctlEnabled(name []byte) bool { 28 | ret, value := getsysctlbyname(name) 29 | if ret < 0 { 30 | return false 31 | } 32 | return value > 0 33 | } 34 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64_freebsd.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build arm64 6 | // +build arm64 7 | 8 | package cpu 9 | 10 | func osInit() { 11 | // Retrieve info from system register ID_AA64ISAR0_EL1. 12 | isar0 := getisar0() 13 | 14 | // ID_AA64ISAR0_EL1 15 | switch extractBits(isar0, 4, 7) { 16 | case 1: 17 | ARM64.HasAES = true 18 | case 2: 19 | ARM64.HasAES = true 20 | ARM64.HasPMULL = true 21 | } 22 | 23 | switch extractBits(isar0, 8, 11) { 24 | case 1: 25 | ARM64.HasSHA1 = true 26 | } 27 | 28 | switch extractBits(isar0, 12, 15) { 29 | case 1, 2: 30 | ARM64.HasSHA2 = true 31 | } 32 | 33 | switch extractBits(isar0, 16, 19) { 34 | case 1: 35 | ARM64.HasCRC32 = true 36 | } 37 | 38 | switch extractBits(isar0, 20, 23) { 39 | case 2: 40 | ARM64.HasATOMICS = true 41 | } 42 | } 43 | 44 | func extractBits(data uint64, start, end uint) uint { 45 | return (uint)(data>>start) & ((1 << (end - start + 1)) - 1) 46 | } 47 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64_hwcap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build arm64 && linux 6 | // +build arm64,linux 7 | 8 | package cpu 9 | 10 | // HWCap may be initialized by archauxv and 11 | // should not be changed after it was initialized. 12 | var HWCap uint 13 | 14 | // HWCAP bits. These are exposed by Linux. 15 | const ( 16 | hwcap_AES = 1 << 3 17 | hwcap_PMULL = 1 << 4 18 | hwcap_SHA1 = 1 << 5 19 | hwcap_SHA2 = 1 << 6 20 | hwcap_CRC32 = 1 << 7 21 | hwcap_ATOMICS = 1 << 8 22 | hwcap_CPUID = 1 << 11 23 | ) 24 | 25 | func hwcapInit(os string) { 26 | // HWCap was populated by the runtime from the auxiliary vector. 27 | // Use HWCap information since reading aarch64 system registers 28 | // is not supported in user space on older linux kernels. 29 | ARM64.HasAES = isSet(HWCap, hwcap_AES) 30 | ARM64.HasPMULL = isSet(HWCap, hwcap_PMULL) 31 | ARM64.HasSHA1 = isSet(HWCap, hwcap_SHA1) 32 | ARM64.HasSHA2 = isSet(HWCap, hwcap_SHA2) 33 | ARM64.HasCRC32 = isSet(HWCap, hwcap_CRC32) 34 | ARM64.HasCPUID = isSet(HWCap, hwcap_CPUID) 35 | 36 | // The Samsung S9+ kernel reports support for atomics, but not all cores 37 | // actually support them, resulting in SIGILL. See issue #28431. 38 | // TODO(elias.naur): Only disable the optimization on bad chipsets on android. 39 | ARM64.HasATOMICS = isSet(HWCap, hwcap_ATOMICS) && os != "android" 40 | 41 | // Check to see if executing on a NeoverseN1 and in order to do that, 42 | // check the AUXV for the CPUID bit. The getMIDR function executes an 43 | // instruction which would normally be an illegal instruction, but it's 44 | // trapped by the kernel, the value sanitized and then returned. Without 45 | // the CPUID bit the kernel will not trap the instruction and the process 46 | // will be terminated with SIGILL. 47 | if ARM64.HasCPUID { 48 | midr := getMIDR() 49 | part_num := uint16((midr >> 4) & 0xfff) 50 | implementor := byte((midr >> 24) & 0xff) 51 | 52 | if implementor == 'A' && part_num == 0xd0c { 53 | ARM64.IsNeoverseN1 = true 54 | } 55 | if implementor == 'A' && part_num == 0xd40 { 56 | ARM64.IsZeus = true 57 | } 58 | } 59 | } 60 | 61 | func isSet(hwc uint, value uint) bool { 62 | return hwc&value != 0 63 | } 64 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64_linux.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build arm64 && linux && !android 6 | // +build arm64,linux,!android 7 | 8 | package cpu 9 | 10 | func osInit() { 11 | hwcapInit("linux") 12 | } 13 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_arm64_other.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build arm64 && !linux && !freebsd && !android && (!darwin || ios) 6 | // +build arm64 7 | // +build !linux 8 | // +build !freebsd 9 | // +build !android 10 | // +build !darwin ios 11 | 12 | package cpu 13 | 14 | func osInit() { 15 | // Other operating systems do not support reading HWCap from auxiliary vector, 16 | // reading privileged aarch64 system registers or sysctl in user space to detect 17 | // CPU features at runtime. 18 | } 19 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_mips.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 32 8 | 9 | func doinit() { 10 | } 11 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_mips64x.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build mips64 || mips64le 6 | // +build mips64 mips64le 7 | 8 | package cpu 9 | 10 | const CacheLinePadSize = 32 11 | 12 | // This is initialized by archauxv and should not be changed after it is 13 | // initialized. 14 | var HWCap uint 15 | 16 | // HWCAP bits. These are exposed by the Linux kernel 5.4. 17 | const ( 18 | // CPU features 19 | hwcap_MIPS_MSA = 1 << 1 20 | ) 21 | 22 | func doinit() { 23 | options = []option{ 24 | {Name: "msa", Feature: &MIPS64X.HasMSA}, 25 | } 26 | 27 | // HWCAP feature bits 28 | MIPS64X.HasMSA = isSet(HWCap, hwcap_MIPS_MSA) 29 | } 30 | 31 | func isSet(hwc uint, value uint) bool { 32 | return hwc&value != 0 33 | } 34 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_mipsle.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 32 8 | 9 | func doinit() { 10 | } 11 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_no_name.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build !386 && !amd64 6 | // +build !386,!amd64 7 | 8 | package cpu 9 | 10 | // Name returns the CPU name given by the vendor 11 | // if it can be read directly from memory or by CPU instructions. 12 | // If the CPU name can not be determined an empty string is returned. 13 | // 14 | // Implementations that use the Operating System (e.g. sysctl or /sys/) 15 | // to gather CPU information for display should be placed in internal/sysinfo. 16 | func Name() string { 17 | // "A CPU has no name". 18 | return "" 19 | } 20 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_ppc64x.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build ppc64 || ppc64le 6 | // +build ppc64 ppc64le 7 | 8 | package cpu 9 | 10 | const CacheLinePadSize = 128 11 | 12 | func doinit() { 13 | options = []option{ 14 | {Name: "darn", Feature: &PPC64.HasDARN}, 15 | {Name: "scv", Feature: &PPC64.HasSCV}, 16 | {Name: "power9", Feature: &PPC64.IsPOWER9}, 17 | } 18 | 19 | osinit() 20 | } 21 | 22 | func isSet(hwc uint, value uint) bool { 23 | return hwc&value != 0 24 | } 25 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_ppc64x_aix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build ppc64 || ppc64le 6 | // +build ppc64 ppc64le 7 | 8 | package cpu 9 | 10 | const ( 11 | // getsystemcfg constants 12 | _SC_IMPL = 2 13 | _IMPL_POWER9 = 0x20000 14 | ) 15 | 16 | func osinit() { 17 | impl := getsystemcfg(_SC_IMPL) 18 | PPC64.IsPOWER9 = isSet(impl, _IMPL_POWER9) 19 | } 20 | 21 | // getsystemcfg is defined in runtime/os2_aix.go 22 | func getsystemcfg(label uint) uint 23 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_ppc64x_linux.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build ppc64 || ppc64le 6 | // +build ppc64 ppc64le 7 | 8 | package cpu 9 | 10 | // ppc64 doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2. 11 | // These are initialized by archauxv and should not be changed after they are 12 | // initialized. 13 | var HWCap uint 14 | var HWCap2 uint 15 | 16 | // HWCAP bits. These are exposed by Linux. 17 | const ( 18 | // ISA Level 19 | hwcap2_ARCH_3_00 = 0x00800000 20 | 21 | // CPU features 22 | hwcap2_DARN = 0x00200000 23 | hwcap2_SCV = 0x00100000 24 | ) 25 | 26 | func osinit() { 27 | PPC64.IsPOWER9 = isSet(HWCap2, hwcap2_ARCH_3_00) 28 | PPC64.HasDARN = isSet(HWCap2, hwcap2_DARN) 29 | PPC64.HasSCV = isSet(HWCap2, hwcap2_SCV) 30 | } 31 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_riscv64.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 32 8 | 9 | func doinit() { 10 | } 11 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_s390x.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 256 8 | 9 | var HWCap uint 10 | 11 | // bitIsSet reports whether the bit at index is set. The bit index 12 | // is in big endian order, so bit index 0 is the leftmost bit. 13 | func bitIsSet(bits []uint64, index uint) bool { 14 | return bits[index/64]&((1<<63)>>(index%64)) != 0 15 | } 16 | 17 | // function is the function code for the named function. 18 | type function uint8 19 | 20 | const ( 21 | // KM{,A,C,CTR} function codes 22 | aes128 function = 18 // AES-128 23 | aes192 function = 19 // AES-192 24 | aes256 function = 20 // AES-256 25 | 26 | // K{I,L}MD function codes 27 | sha1 function = 1 // SHA-1 28 | sha256 function = 2 // SHA-256 29 | sha512 function = 3 // SHA-512 30 | sha3_224 function = 32 // SHA3-224 31 | sha3_256 function = 33 // SHA3-256 32 | sha3_384 function = 34 // SHA3-384 33 | sha3_512 function = 35 // SHA3-512 34 | shake128 function = 36 // SHAKE-128 35 | shake256 function = 37 // SHAKE-256 36 | 37 | // KLMD function codes 38 | ghash function = 65 // GHASH 39 | ) 40 | 41 | const ( 42 | // KDSA function codes 43 | ecdsaVerifyP256 function = 1 // NIST P256 44 | ecdsaVerifyP384 function = 2 // NIST P384 45 | ecdsaVerifyP521 function = 3 // NIST P521 46 | ecdsaSignP256 function = 9 // NIST P256 47 | ecdsaSignP384 function = 10 // NIST P384 48 | ecdsaSignP521 function = 11 // NIST P521 49 | eddsaVerifyEd25519 function = 32 // Curve25519 50 | eddsaVerifyEd448 function = 36 // Curve448 51 | eddsaSignEd25519 function = 40 // Curve25519 52 | eddsaSignEd448 function = 44 // Curve448 53 | ) 54 | 55 | // queryResult contains the result of a Query function 56 | // call. Bits are numbered in big endian order so the 57 | // leftmost bit (the MSB) is at index 0. 58 | type queryResult struct { 59 | bits [2]uint64 60 | } 61 | 62 | // Has reports whether the given functions are present. 63 | func (q *queryResult) Has(fns ...function) bool { 64 | if len(fns) == 0 { 65 | panic("no function codes provided") 66 | } 67 | for _, f := range fns { 68 | if !bitIsSet(q.bits[:], uint(f)) { 69 | return false 70 | } 71 | } 72 | return true 73 | } 74 | 75 | // facility is a bit index for the named facility. 76 | type facility uint8 77 | 78 | const ( 79 | // mandatory facilities 80 | zarch facility = 1 // z architecture mode is active 81 | stflef facility = 7 // store-facility-list-extended 82 | ldisp facility = 18 // long-displacement 83 | eimm facility = 21 // extended-immediate 84 | 85 | // miscellaneous facilities 86 | dfp facility = 42 // decimal-floating-point 87 | etf3eh facility = 30 // extended-translation 3 enhancement 88 | 89 | // cryptography facilities 90 | msa facility = 17 // message-security-assist 91 | msa3 facility = 76 // message-security-assist extension 3 92 | msa4 facility = 77 // message-security-assist extension 4 93 | msa5 facility = 57 // message-security-assist extension 5 94 | msa8 facility = 146 // message-security-assist extension 8 95 | msa9 facility = 155 // message-security-assist extension 9 96 | 97 | // vector facilities 98 | vxe facility = 135 // vector-enhancements 1 99 | 100 | // Note: vx requires kernel support 101 | // and so must be fetched from HWCAP. 102 | 103 | hwcap_VX = 1 << 11 // vector facility 104 | ) 105 | 106 | // facilityList contains the result of an STFLE call. 107 | // Bits are numbered in big endian order so the 108 | // leftmost bit (the MSB) is at index 0. 109 | type facilityList struct { 110 | bits [4]uint64 111 | } 112 | 113 | // Has reports whether the given facilities are present. 114 | func (s *facilityList) Has(fs ...facility) bool { 115 | if len(fs) == 0 { 116 | panic("no facility bits provided") 117 | } 118 | for _, f := range fs { 119 | if !bitIsSet(s.bits[:], uint(f)) { 120 | return false 121 | } 122 | } 123 | return true 124 | } 125 | 126 | // The following feature detection functions are defined in cpu_s390x.s. 127 | // They are likely to be expensive to call so the results should be cached. 128 | func stfle() facilityList 129 | func kmQuery() queryResult 130 | func kmcQuery() queryResult 131 | func kmctrQuery() queryResult 132 | func kmaQuery() queryResult 133 | func kimdQuery() queryResult 134 | func klmdQuery() queryResult 135 | func kdsaQuery() queryResult 136 | 137 | func doinit() { 138 | options = []option{ 139 | {Name: "zarch", Feature: &S390X.HasZARCH}, 140 | {Name: "stfle", Feature: &S390X.HasSTFLE}, 141 | {Name: "ldisp", Feature: &S390X.HasLDISP}, 142 | {Name: "msa", Feature: &S390X.HasMSA}, 143 | {Name: "eimm", Feature: &S390X.HasEIMM}, 144 | {Name: "dfp", Feature: &S390X.HasDFP}, 145 | {Name: "etf3eh", Feature: &S390X.HasETF3EH}, 146 | {Name: "vx", Feature: &S390X.HasVX}, 147 | {Name: "vxe", Feature: &S390X.HasVXE}, 148 | {Name: "kdsa", Feature: &S390X.HasKDSA}, 149 | } 150 | 151 | aes := []function{aes128, aes192, aes256} 152 | facilities := stfle() 153 | 154 | S390X.HasZARCH = facilities.Has(zarch) 155 | S390X.HasSTFLE = facilities.Has(stflef) 156 | S390X.HasLDISP = facilities.Has(ldisp) 157 | S390X.HasEIMM = facilities.Has(eimm) 158 | S390X.HasDFP = facilities.Has(dfp) 159 | S390X.HasETF3EH = facilities.Has(etf3eh) 160 | S390X.HasMSA = facilities.Has(msa) 161 | 162 | if S390X.HasMSA { 163 | // cipher message 164 | km, kmc := kmQuery(), kmcQuery() 165 | S390X.HasAES = km.Has(aes...) 166 | S390X.HasAESCBC = kmc.Has(aes...) 167 | if facilities.Has(msa4) { 168 | kmctr := kmctrQuery() 169 | S390X.HasAESCTR = kmctr.Has(aes...) 170 | } 171 | if facilities.Has(msa8) { 172 | kma := kmaQuery() 173 | S390X.HasAESGCM = kma.Has(aes...) 174 | } 175 | 176 | // compute message digest 177 | kimd := kimdQuery() // intermediate (no padding) 178 | klmd := klmdQuery() // last (padding) 179 | S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1) 180 | S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256) 181 | S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512) 182 | S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist 183 | sha3 := []function{ 184 | sha3_224, sha3_256, sha3_384, sha3_512, 185 | shake128, shake256, 186 | } 187 | S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...) 188 | S390X.HasKDSA = facilities.Has(msa9) // elliptic curves 189 | if S390X.HasKDSA { 190 | kdsa := kdsaQuery() 191 | S390X.HasECDSA = kdsa.Has(ecdsaVerifyP256, ecdsaSignP256, ecdsaVerifyP384, ecdsaSignP384, ecdsaVerifyP521, ecdsaSignP521) 192 | S390X.HasEDDSA = kdsa.Has(eddsaVerifyEd25519, eddsaSignEd25519, eddsaVerifyEd448, eddsaSignEd448) 193 | } 194 | } 195 | 196 | S390X.HasVX = isSet(HWCap, hwcap_VX) 197 | 198 | if S390X.HasVX { 199 | S390X.HasVXE = facilities.Has(vxe) 200 | } 201 | } 202 | 203 | func isSet(hwc uint, value uint) bool { 204 | return hwc&value != 0 205 | } 206 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_s390x.s: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "textflag.h" 6 | 7 | // func stfle() facilityList 8 | TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32 9 | MOVD $ret+0(FP), R1 10 | MOVD $3, R0 // last doubleword index to store 11 | XC $32, (R1), (R1) // clear 4 doublewords (32 bytes) 12 | WORD $0xb2b01000 // store facility list extended (STFLE) 13 | RET 14 | 15 | // func kmQuery() queryResult 16 | TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16 17 | MOVD $0, R0 // set function code to 0 (KM-Query) 18 | MOVD $ret+0(FP), R1 // address of 16-byte return value 19 | WORD $0xB92E0024 // cipher message (KM) 20 | RET 21 | 22 | // func kmcQuery() queryResult 23 | TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16 24 | MOVD $0, R0 // set function code to 0 (KMC-Query) 25 | MOVD $ret+0(FP), R1 // address of 16-byte return value 26 | WORD $0xB92F0024 // cipher message with chaining (KMC) 27 | RET 28 | 29 | // func kmctrQuery() queryResult 30 | TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16 31 | MOVD $0, R0 // set function code to 0 (KMCTR-Query) 32 | MOVD $ret+0(FP), R1 // address of 16-byte return value 33 | WORD $0xB92D4024 // cipher message with counter (KMCTR) 34 | RET 35 | 36 | // func kmaQuery() queryResult 37 | TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16 38 | MOVD $0, R0 // set function code to 0 (KMA-Query) 39 | MOVD $ret+0(FP), R1 // address of 16-byte return value 40 | WORD $0xb9296024 // cipher message with authentication (KMA) 41 | RET 42 | 43 | // func kimdQuery() queryResult 44 | TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16 45 | MOVD $0, R0 // set function code to 0 (KIMD-Query) 46 | MOVD $ret+0(FP), R1 // address of 16-byte return value 47 | WORD $0xB93E0024 // compute intermediate message digest (KIMD) 48 | RET 49 | 50 | // func klmdQuery() queryResult 51 | TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16 52 | MOVD $0, R0 // set function code to 0 (KLMD-Query) 53 | MOVD $ret+0(FP), R1 // address of 16-byte return value 54 | WORD $0xB93F0024 // compute last message digest (KLMD) 55 | RET 56 | 57 | // func kdsaQuery() queryResult 58 | TEXT ·kdsaQuery(SB), NOSPLIT|NOFRAME, $0-16 59 | MOVD $0, R0 // set function code to 0 (KLMD-Query) 60 | MOVD $ret+0(FP), R1 // address of 16-byte return value 61 | WORD $0xB93A0008 // compute digital signature authentication 62 | RET 63 | 64 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_s390x_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu_test 6 | 7 | import ( 8 | "errors" 9 | . "internal/cpu" 10 | "os" 11 | "regexp" 12 | "testing" 13 | ) 14 | 15 | func getFeatureList() ([]string, error) { 16 | cpuinfo, err := os.ReadFile("/proc/cpuinfo") 17 | if err != nil { 18 | return nil, err 19 | } 20 | r := regexp.MustCompile("features\\s*:\\s*(.*)") 21 | b := r.FindSubmatch(cpuinfo) 22 | if len(b) < 2 { 23 | return nil, errors.New("no feature list in /proc/cpuinfo") 24 | } 25 | return regexp.MustCompile("\\s+").Split(string(b[1]), -1), nil 26 | } 27 | 28 | func TestS390XAgainstCPUInfo(t *testing.T) { 29 | // mapping of linux feature strings to S390X fields 30 | mapping := make(map[string]*bool) 31 | for _, option := range Options { 32 | mapping[option.Name] = option.Feature 33 | } 34 | 35 | // these must be true on the machines Go supports 36 | mandatory := make(map[string]bool) 37 | mandatory["zarch"] = false 38 | mandatory["eimm"] = false 39 | mandatory["ldisp"] = false 40 | mandatory["stfle"] = false 41 | 42 | features, err := getFeatureList() 43 | if err != nil { 44 | t.Error(err) 45 | } 46 | for _, feature := range features { 47 | if _, ok := mandatory[feature]; ok { 48 | mandatory[feature] = true 49 | } 50 | if flag, ok := mapping[feature]; ok { 51 | if !*flag { 52 | t.Errorf("feature '%v' not detected", feature) 53 | } 54 | } else { 55 | t.Logf("no entry for '%v'", feature) 56 | } 57 | } 58 | for k, v := range mandatory { 59 | if !v { 60 | t.Errorf("mandatory feature '%v' not detected", k) 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu_test 6 | 7 | import ( 8 | . "internal/cpu" 9 | "internal/testenv" 10 | "os" 11 | "os/exec" 12 | "runtime" 13 | "strings" 14 | "testing" 15 | ) 16 | 17 | func TestMinimalFeatures(t *testing.T) { 18 | // TODO: maybe do MustSupportFeatureDectection(t) ? 19 | if runtime.GOARCH == "arm64" { 20 | switch runtime.GOOS { 21 | case "linux", "android", "darwin": 22 | default: 23 | t.Skipf("%s/%s is not supported", runtime.GOOS, runtime.GOARCH) 24 | } 25 | } 26 | 27 | for _, o := range Options { 28 | if o.Required && !*o.Feature { 29 | t.Errorf("%v expected true, got false", o.Name) 30 | } 31 | } 32 | } 33 | 34 | func MustHaveDebugOptionsSupport(t *testing.T) { 35 | if !DebugOptions { 36 | t.Skipf("skipping test: cpu feature options not supported by OS") 37 | } 38 | } 39 | 40 | func MustSupportFeatureDectection(t *testing.T) { 41 | // TODO: add platforms that do not have CPU feature detection support. 42 | } 43 | 44 | func runDebugOptionsTest(t *testing.T, test string, options string) { 45 | MustHaveDebugOptionsSupport(t) 46 | 47 | testenv.MustHaveExec(t) 48 | 49 | env := "GODEBUG=" + options 50 | 51 | cmd := exec.Command(os.Args[0], "-test.run="+test) 52 | cmd.Env = append(cmd.Env, env) 53 | 54 | output, err := cmd.CombinedOutput() 55 | lines := strings.Fields(string(output)) 56 | lastline := lines[len(lines)-1] 57 | 58 | got := strings.TrimSpace(lastline) 59 | want := "PASS" 60 | if err != nil || got != want { 61 | t.Fatalf("%s with %s: want %s, got %v", test, env, want, got) 62 | } 63 | } 64 | 65 | func TestDisableAllCapabilities(t *testing.T) { 66 | MustSupportFeatureDectection(t) 67 | runDebugOptionsTest(t, "TestAllCapabilitiesDisabled", "cpu.all=off") 68 | } 69 | 70 | func TestAllCapabilitiesDisabled(t *testing.T) { 71 | MustHaveDebugOptionsSupport(t) 72 | 73 | if os.Getenv("GODEBUG") != "cpu.all=off" { 74 | t.Skipf("skipping test: GODEBUG=cpu.all=off not set") 75 | } 76 | 77 | for _, o := range Options { 78 | want := o.Required 79 | if got := *o.Feature; got != want { 80 | t.Errorf("%v: expected %v, got %v", o.Name, want, got) 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_wasm.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | const CacheLinePadSize = 64 8 | 9 | func doinit() { 10 | } 11 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_x86.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build 386 || amd64 6 | // +build 386 amd64 7 | 8 | package cpu 9 | 10 | const CacheLinePadSize = 64 11 | 12 | // cpuid is implemented in cpu_x86.s. 13 | func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) 14 | 15 | // xgetbv with ecx = 0 is implemented in cpu_x86.s. 16 | func xgetbv() (eax, edx uint32) 17 | 18 | const ( 19 | // edx bits 20 | cpuid_SSE2 = 1 << 26 21 | 22 | // ecx bits 23 | cpuid_SSE3 = 1 << 0 24 | cpuid_PCLMULQDQ = 1 << 1 25 | cpuid_SSSE3 = 1 << 9 26 | cpuid_FMA = 1 << 12 27 | cpuid_SSE41 = 1 << 19 28 | cpuid_SSE42 = 1 << 20 29 | cpuid_POPCNT = 1 << 23 30 | cpuid_AES = 1 << 25 31 | cpuid_OSXSAVE = 1 << 27 32 | cpuid_AVX = 1 << 28 33 | 34 | // ebx bits 35 | cpuid_BMI1 = 1 << 3 36 | cpuid_AVX2 = 1 << 5 37 | cpuid_BMI2 = 1 << 8 38 | cpuid_ERMS = 1 << 9 39 | cpuid_ADX = 1 << 19 40 | ) 41 | 42 | var maxExtendedFunctionInformation uint32 43 | 44 | func doinit() { 45 | options = []option{ 46 | {Name: "adx", Feature: &X86.HasADX}, 47 | {Name: "aes", Feature: &X86.HasAES}, 48 | {Name: "avx", Feature: &X86.HasAVX}, 49 | {Name: "avx2", Feature: &X86.HasAVX2}, 50 | {Name: "bmi1", Feature: &X86.HasBMI1}, 51 | {Name: "bmi2", Feature: &X86.HasBMI2}, 52 | {Name: "erms", Feature: &X86.HasERMS}, 53 | {Name: "fma", Feature: &X86.HasFMA}, 54 | {Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ}, 55 | {Name: "popcnt", Feature: &X86.HasPOPCNT}, 56 | {Name: "sse3", Feature: &X86.HasSSE3}, 57 | {Name: "sse41", Feature: &X86.HasSSE41}, 58 | {Name: "sse42", Feature: &X86.HasSSE42}, 59 | {Name: "ssse3", Feature: &X86.HasSSSE3}, 60 | 61 | // These capabilities should always be enabled on amd64: 62 | {Name: "sse2", Feature: &X86.HasSSE2, Required: GOARCH == "amd64"}, 63 | } 64 | 65 | maxID, _, _, _ := cpuid(0, 0) 66 | 67 | if maxID < 1 { 68 | return 69 | } 70 | 71 | maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0) 72 | 73 | _, _, ecx1, edx1 := cpuid(1, 0) 74 | X86.HasSSE2 = isSet(edx1, cpuid_SSE2) 75 | 76 | X86.HasSSE3 = isSet(ecx1, cpuid_SSE3) 77 | X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ) 78 | X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3) 79 | X86.HasSSE41 = isSet(ecx1, cpuid_SSE41) 80 | X86.HasSSE42 = isSet(ecx1, cpuid_SSE42) 81 | X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT) 82 | X86.HasAES = isSet(ecx1, cpuid_AES) 83 | 84 | // OSXSAVE can be false when using older Operating Systems 85 | // or when explicitly disabled on newer Operating Systems by 86 | // e.g. setting the xsavedisable boot option on Windows 10. 87 | X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE) 88 | 89 | // The FMA instruction set extension only has VEX prefixed instructions. 90 | // VEX prefixed instructions require OSXSAVE to be enabled. 91 | // See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2 92 | // Section 2.4 "AVX and SSE Instruction Exception Specification" 93 | X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasOSXSAVE 94 | 95 | osSupportsAVX := false 96 | // For XGETBV, OSXSAVE bit is required and sufficient. 97 | if X86.HasOSXSAVE { 98 | eax, _ := xgetbv() 99 | // Check if XMM and YMM registers have OS support. 100 | osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2) 101 | } 102 | 103 | X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX 104 | 105 | if maxID < 7 { 106 | return 107 | } 108 | 109 | _, ebx7, _, _ := cpuid(7, 0) 110 | X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) 111 | X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX 112 | X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) 113 | X86.HasERMS = isSet(ebx7, cpuid_ERMS) 114 | X86.HasADX = isSet(ebx7, cpuid_ADX) 115 | } 116 | 117 | func isSet(hwc uint32, value uint32) bool { 118 | return hwc&value != 0 119 | } 120 | 121 | // Name returns the CPU name given by the vendor. 122 | // If the CPU name can not be determined an 123 | // empty string is returned. 124 | func Name() string { 125 | if maxExtendedFunctionInformation < 0x80000004 { 126 | return "" 127 | } 128 | 129 | data := make([]byte, 0, 3*4*4) 130 | 131 | var eax, ebx, ecx, edx uint32 132 | eax, ebx, ecx, edx = cpuid(0x80000002, 0) 133 | data = appendBytes(data, eax, ebx, ecx, edx) 134 | eax, ebx, ecx, edx = cpuid(0x80000003, 0) 135 | data = appendBytes(data, eax, ebx, ecx, edx) 136 | eax, ebx, ecx, edx = cpuid(0x80000004, 0) 137 | data = appendBytes(data, eax, ebx, ecx, edx) 138 | 139 | // Trim leading spaces. 140 | for len(data) > 0 && data[0] == ' ' { 141 | data = data[1:] 142 | } 143 | 144 | // Trim tail after and including the first null byte. 145 | for i, c := range data { 146 | if c == '\x00' { 147 | data = data[:i] 148 | break 149 | } 150 | } 151 | 152 | return string(data) 153 | } 154 | 155 | func appendBytes(b []byte, args ...uint32) []byte { 156 | for _, arg := range args { 157 | b = append(b, 158 | byte((arg >> 0)), 159 | byte((arg >> 8)), 160 | byte((arg >> 16)), 161 | byte((arg >> 24))) 162 | } 163 | return b 164 | } 165 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_x86.s: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build 386 || amd64 6 | // +build 386 amd64 7 | 8 | #include "textflag.h" 9 | 10 | // func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) 11 | TEXT ·cpuid(SB), NOSPLIT, $0-24 12 | MOVL eaxArg+0(FP), AX 13 | MOVL ecxArg+4(FP), CX 14 | CPUID 15 | MOVL AX, eax+8(FP) 16 | MOVL BX, ebx+12(FP) 17 | MOVL CX, ecx+16(FP) 18 | MOVL DX, edx+20(FP) 19 | RET 20 | 21 | // func xgetbv() (eax, edx uint32) 22 | TEXT ·xgetbv(SB),NOSPLIT,$0-8 23 | MOVL $0, CX 24 | XGETBV 25 | MOVL AX, eax+0(FP) 26 | MOVL DX, edx+4(FP) 27 | RET 28 | -------------------------------------------------------------------------------- /dsp/internal/cpu/cpu_x86_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build 386 || amd64 6 | // +build 386 amd64 7 | 8 | package cpu_test 9 | 10 | import ( 11 | . "internal/cpu" 12 | "os" 13 | "runtime" 14 | "testing" 15 | ) 16 | 17 | func TestX86ifAVX2hasAVX(t *testing.T) { 18 | if X86.HasAVX2 && !X86.HasAVX { 19 | t.Fatalf("HasAVX expected true when HasAVX2 is true, got false") 20 | } 21 | } 22 | 23 | func TestDisableSSE2(t *testing.T) { 24 | runDebugOptionsTest(t, "TestSSE2DebugOption", "cpu.sse2=off") 25 | } 26 | 27 | func TestSSE2DebugOption(t *testing.T) { 28 | MustHaveDebugOptionsSupport(t) 29 | 30 | if os.Getenv("GODEBUG") != "cpu.sse2=off" { 31 | t.Skipf("skipping test: GODEBUG=cpu.sse2=off not set") 32 | } 33 | 34 | want := runtime.GOARCH != "386" // SSE2 can only be disabled on 386. 35 | if got := X86.HasSSE2; got != want { 36 | t.Errorf("X86.HasSSE2 on %s expected %v, got %v", runtime.GOARCH, want, got) 37 | } 38 | } 39 | 40 | func TestDisableSSE3(t *testing.T) { 41 | runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off") 42 | } 43 | 44 | func TestSSE3DebugOption(t *testing.T) { 45 | MustHaveDebugOptionsSupport(t) 46 | 47 | if os.Getenv("GODEBUG") != "cpu.sse3=off" { 48 | t.Skipf("skipping test: GODEBUG=cpu.sse3=off not set") 49 | } 50 | 51 | want := false 52 | if got := X86.HasSSE3; got != want { 53 | t.Errorf("X86.HasSSE3 expected %v, got %v", want, got) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /dsp/internal/cpu/export_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package cpu 6 | 7 | var ( 8 | Options = options 9 | ) 10 | -------------------------------------------------------------------------------- /dsp/interpolate.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "math" 4 | 5 | // Linear interpolates using linear interpolation. 6 | func Linear(samples []float64, x float64) float64 { 7 | var samp float64 8 | low := math.Floor(x) 9 | lowInt := int(low) 10 | if lowInt < len(samples) { 11 | lowValue := samples[lowInt] 12 | var highValue float64 13 | if i := lowInt + 1; i >= len(samples) { 14 | highValue = 0 15 | } else { 16 | highValue = samples[i] 17 | } 18 | samp = lowValue + (x-low)*(highValue-lowValue) 19 | } 20 | return samp 21 | } 22 | 23 | // LinearF32 interpolates using linear interpolation. 24 | func LinearF32(samples []float32, x float32) float32 { 25 | if x < 0 || x > math.MaxFloat32 { 26 | return 0 27 | } 28 | var samp float32 29 | low := float32(math.Floor(float64(x))) 30 | lowInt := int(low) 31 | if lowInt < len(samples) { 32 | lowValue := samples[lowInt] 33 | var highValue float32 34 | if i := lowInt + 1; i >= len(samples) { 35 | highValue = 0 36 | } else { 37 | highValue = samples[i] 38 | } 39 | samp = lowValue + (x-low)*(highValue-lowValue) 40 | } 41 | return samp 42 | } 43 | 44 | // Hermite4p3o interpolates using 4-point, 3rd-order Hermite (x-form) 45 | func Hermite4p3o(samples []float64, x float64) float64 { 46 | xi := int(x) 47 | 48 | var s [4]float64 49 | for i := -1; i <= 2; i++ { 50 | if j := xi + i; j >= 0 && j < len(samples) { 51 | s[i+1] = samples[j] 52 | } 53 | } 54 | 55 | x -= math.Floor(x) 56 | c0 := s[1] 57 | c1 := 1.0 / 2.0 * (s[2] - s[0]) 58 | c2 := s[0] - 5.0/2.0*s[1] + 2.0*s[2] - 1.0/2.0*s[3] 59 | c3 := 1.0/2.0*(s[3]-s[0]) + 3.0/2.0*(s[1]-s[2]) 60 | return ((c3*x+c2)*x+c1)*x + c0 61 | } 62 | 63 | // Hermite4p3oF32 interpolates using 4-point, 3rd-order Hermite (x-form) 64 | func Hermite4p3oF32(samples []float32, x float32) float32 { 65 | xi := int(x) 66 | 67 | var s [4]float32 68 | for i := -1; i <= 2; i++ { 69 | if j := xi + i; j >= 0 && j < len(samples) { 70 | s[i+1] = float32(samples[j]) 71 | } 72 | } 73 | 74 | x -= float32(math.Floor(float64(x))) 75 | c0 := s[1] 76 | c1 := 1.0 / 2.0 * (s[2] - s[0]) 77 | c2 := s[0] - 5.0/2.0*s[1] + 2.0*s[2] - 1.0/2.0*s[3] 78 | c3 := 1.0/2.0*(s[3]-s[0]) + 3.0/2.0*(s[1]-s[2]) 79 | return ((c3*x+c2)*x+c1)*x + c0 80 | } 81 | 82 | // Optimal2x4p4o interpolates using optimal 2x (4-point, 4th-order) (z-form) 83 | func Optimal2x4p4o(samples []float64, x float64) float64 { 84 | const middle = 1 85 | 86 | xi := int(x) 87 | 88 | var s [6]float64 89 | for i := -1; i <= 2; i++ { 90 | if j := xi + i; j >= 0 && j < len(samples) { 91 | s[middle+i] = samples[j] 92 | } 93 | } 94 | 95 | even1 := s[middle+1] + s[middle] 96 | odd1 := s[middle+1] - s[middle] 97 | even2 := s[middle+2] + s[middle-1] 98 | odd2 := s[middle+2] - s[middle-1] 99 | c0 := even1*0.45645918406487612 + even2*0.04354173901996461 100 | c1 := odd1*0.47236675362442071 + odd2*0.17686613581136501 101 | c2 := even1*-0.253674794204558521 + even2*0.25371918651882464 102 | c3 := odd1*-0.37917091811631082 + odd2*0.11952965967158000 103 | c4 := even1*0.04252164479749607 + even2*-0.04289144034653719 104 | 105 | z := x - math.Floor(x) - 1.0/2.0 106 | return (((c4*z+c3)*z+c2)*z+c1)*z + c0 107 | } 108 | 109 | // Optimal2x4p4oF32 interpolates using optimal 2x (4-point, 4th-order) (z-form) 110 | func Optimal2x4p4oF32(samples []float32, x float32) float32 { 111 | const middle = 1 112 | 113 | xi := int(x) 114 | 115 | var s [6]float32 116 | for i := -1; i <= 2; i++ { 117 | if j := xi + i; j >= 0 && j < len(samples) { 118 | s[middle+i] = samples[j] 119 | } 120 | } 121 | 122 | even1 := s[middle+1] + s[middle] 123 | odd1 := s[middle+1] - s[middle] 124 | even2 := s[middle+2] + s[middle-1] 125 | odd2 := s[middle+2] - s[middle-1] 126 | c0 := even1*0.45645918406487612 + even2*0.04354173901996461 127 | c1 := odd1*0.47236675362442071 + odd2*0.17686613581136501 128 | c2 := even1*-0.253674794204558521 + even2*0.25371918651882464 129 | c3 := odd1*-0.37917091811631082 + odd2*0.11952965967158000 130 | c4 := even1*0.04252164479749607 + even2*-0.04289144034653719 131 | 132 | z := x - float32(math.Floor(float64(x))) - 1.0/2.0 133 | return (((c4*z+c3)*z+c2)*z+c1)*z + c0 134 | } 135 | 136 | // Optimal2x6p5o interpolates using optimal 2x (6-point, 5th-order) (z-form) 137 | func Optimal2x6p5o(samples []float64, x float64) float64 { 138 | const middle = 2 139 | xi := int(x) 140 | 141 | var s [6]float64 142 | for i := -2; i <= 3; i++ { 143 | if j := xi + i; j >= 0 && j < len(samples) { 144 | s[middle+i] = samples[j] 145 | } 146 | } 147 | 148 | even1 := s[middle+1] + s[middle] 149 | odd1 := s[middle+1] - s[middle] 150 | even2 := s[middle+2] + s[middle-1] 151 | odd2 := s[middle+2] - s[middle-1] 152 | even3 := s[middle+3] + s[middle-2] 153 | odd3 := s[middle+3] - s[middle-2] 154 | c0 := even1*0.40513396007145713 + even2*0.09251794438424393 + even3*0.00234806603570670 155 | c1 := odd1*0.28342806338906690 + odd2*0.21703277024054901 + odd3*0.01309294748731515 156 | c2 := even1*-0.191337682540351941 + even2*0.16187844487943592 + even3*0.02946017143111912 157 | c3 := odd1*-0.16471626190554542 + odd2*-0.00154547203542499 + odd3*0.03399271444851909 158 | c4 := even1*0.03845798729588149 + even2*-0.05712936104242644 + even3*0.01866750929921070 159 | c5 := odd1*0.04317950185225609 + odd2*-0.01802814255926417 + odd3*0.00152170021558204 160 | 161 | z := x - math.Floor(x) - 1.0/2.0 162 | return ((((c5*z+c4)*z+c3)*z+c2)*z+c1)*z + c0 163 | } 164 | 165 | // Optimal2x6p5oF32 interpolates using optimal 2x (6-point, 5th-order) (z-form) 166 | func Optimal2x6p5oF32(samples []float32, x float32) float32 { 167 | const middle = 2 168 | xi := int(x) 169 | 170 | var s [6]float32 171 | for i := -2; i <= 3; i++ { 172 | if j := xi + i; j >= 0 && j < len(samples) { 173 | s[middle+i] = float32(samples[j]) 174 | } 175 | } 176 | 177 | even1 := s[middle+1] + s[middle] 178 | odd1 := s[middle+1] - s[middle] 179 | even2 := s[middle+2] + s[middle-1] 180 | odd2 := s[middle+2] - s[middle-1] 181 | even3 := s[middle+3] + s[middle-2] 182 | odd3 := s[middle+3] - s[middle-2] 183 | c0 := even1*0.40513396007145713 + even2*0.09251794438424393 + even3*0.00234806603570670 184 | c1 := odd1*0.28342806338906690 + odd2*0.21703277024054901 + odd3*0.01309294748731515 185 | c2 := even1*-0.191337682540351941 + even2*0.16187844487943592 + even3*0.02946017143111912 186 | c3 := odd1*-0.16471626190554542 + odd2*-0.00154547203542499 + odd3*0.03399271444851909 187 | c4 := even1*0.03845798729588149 + even2*-0.05712936104242644 + even3*0.01866750929921070 188 | c5 := odd1*0.04317950185225609 + odd2*-0.01802814255926417 + odd3*0.00152170021558204 189 | 190 | z := x - float32(math.Floor(float64(x))) - 1.0/2.0 191 | return ((((c5*z+c4)*z+c3)*z+c2)*z+c1)*z + c0 192 | } 193 | -------------------------------------------------------------------------------- /dsp/math32.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "unsafe" 6 | ) 7 | 8 | // VMulC64xF32 multiplies a vector of complex values with a vector with real values. 9 | // This is useful for applying a window to complex samples. 10 | // 11 | // output[i] = complex(real(input[i])*mul[i], imag(input[i])*mul[i]) 12 | func VMulC64xF32(input, output []complex64, mul []float32) 13 | func vMulC64xF32(input, output []complex64, mul []float32) { 14 | n := len(input) 15 | if len(output) < n { 16 | n = len(output) 17 | } 18 | if len(mul) < n { 19 | n = len(mul) 20 | } 21 | for i, v := range input[:n] { 22 | w := mul[i] 23 | output[i] = complex(real(v)*w, imag(v)*w) 24 | } 25 | } 26 | 27 | // VMulC64 multiplies eache value of the input by the matching value in the multiplier. 28 | // 29 | // output[i] = input[i] * mul[i] 30 | func VMulC64(input, output, mul []complex64) { 31 | n := len(input) 32 | if len(output) < n { 33 | n = len(output) 34 | } 35 | if len(mul) < n { 36 | n = len(mul) 37 | } 38 | for i, v := range input[:n] { 39 | output[i] = v * mul[i] 40 | } 41 | } 42 | 43 | func VAddF32(input, output []float32) { 44 | n := len(input) 45 | if len(output) < n { 46 | n = len(output) 47 | } 48 | for i, v := range input[:n] { 49 | output[i] += v 50 | } 51 | } 52 | 53 | func VAddC64(input, output []complex64) { 54 | n := len(input) 55 | if len(output) < n { 56 | n = len(output) 57 | } 58 | for i, v := range input[:n] { 59 | output[i] += v 60 | } 61 | } 62 | 63 | func VScaleC64(input, output []complex64, scale float32) { 64 | in := (*[2 << 25]float32)(unsafe.Pointer(&input[0]))[:len(input)*2] 65 | out := (*[2 << 25]float32)(unsafe.Pointer(&output[0]))[:len(output)*2] 66 | VScaleF32(in, out, scale) 67 | } 68 | 69 | func VScaleF32(input, output []float32, scale float32) 70 | func vscaleF32(input, output []float32, scale float32) { 71 | n := len(input) 72 | if len(output) < n { 73 | n = len(output) 74 | } 75 | for i, v := range input[:n] { 76 | output[i] = v * scale 77 | } 78 | } 79 | 80 | func VAbsC64(input []complex64, output []float32) 81 | func vAbsC64(input []complex64, output []float32) { 82 | n := len(input) 83 | if len(output) < n { 84 | n = len(output) 85 | } 86 | _ = output[n-1] // eliminate bounds check 87 | for i, v := range input[:n] { 88 | output[i] = float32(math.Sqrt(float64(real(v)*real(v) + imag(v)*imag(v)))) 89 | } 90 | } 91 | 92 | // VMaxF32 returns the maximum value from an array of 32-bit floating point values. 93 | func VMaxF32(input []float32) float32 94 | func vMaxF32(input []float32) float32 { 95 | max := float32(math.Inf(-1)) 96 | for _, v := range input { 97 | if v > max { 98 | max = v 99 | } 100 | } 101 | return max 102 | } 103 | 104 | // VMinF32 returns the minimum value from an array of 32-bit floating point values. 105 | func VMinF32(input []float32) float32 106 | func vMinF32(input []float32) float32 { 107 | min := float32(math.Inf(1)) 108 | for _, v := range input { 109 | if v < min { 110 | min = v 111 | } 112 | } 113 | return min 114 | } 115 | 116 | func Conj32(x complex64) complex64 { return complex(real(x), -imag(x)) } 117 | func FastPhase32(x complex64) float32 { return FastAtan2(imag(x), real(x)) } 118 | func Phase32(x complex64) float32 { return float32(math.Atan2(float64(imag(x)), float64(real(x)))) } 119 | 120 | const ( 121 | pi2 = math.Pi / 2 122 | pi4 = math.Pi / 4 123 | pi34 = math.Pi * 3 / 4 124 | ) 125 | 126 | // max |error| < 0.01 127 | func FastAtan2(y, x float32) float32 128 | func fastAtan2(y, x float32) float32 { 129 | absY := y 130 | if absY < 0 { 131 | absY = -absY 132 | } 133 | absY += 1e-20 // kludge to prevent 0/0 condition 134 | var angle float32 135 | if x < 0.0 { 136 | r := (x + absY) / (absY - x) 137 | angle = pi34 + (0.1963*r*r-0.9817)*r 138 | } else if x > 0.0 { 139 | r := (x - absY) / (x + absY) 140 | angle = pi4 + (0.1963*r*r-0.9817)*r 141 | } else if y < 0.0 { 142 | return -pi2 143 | } else if y > 0.0 { 144 | return pi2 145 | } else { 146 | return 0.0 147 | } 148 | if y < 0.0 { 149 | return -angle // negate if in quad III or IV 150 | } 151 | return angle 152 | } 153 | 154 | // |error| < 0.005 155 | func FastAtan2_2(y, x float32) float32 156 | func fastAtan2_2(y, x float32) float32 { 157 | if x == 0.0 { 158 | switch { 159 | case y > 0.0: 160 | return pi2 161 | case y < 0.0: 162 | return -pi2 163 | } 164 | return 0.0 165 | } 166 | z := y / x 167 | zz := z * z 168 | if zz < 1.0 { 169 | atan := z / (1.0 + 0.28*zz) 170 | if x < 0.0 { 171 | if y < 0.0 { 172 | return atan - math.Pi 173 | } 174 | return atan + math.Pi 175 | } 176 | return atan 177 | } 178 | atan := pi2 - z/(zz+0.28) 179 | if y < 0.0 { 180 | return atan - math.Pi 181 | } 182 | return atan 183 | } 184 | -------------------------------------------------------------------------------- /dsp/math32_386.s: -------------------------------------------------------------------------------- 1 | TEXT ·FastAtan2(SB), 7, $0 2 | JMP ·fastAtan2(SB) 3 | 4 | TEXT ·FastAtan2_2(SB), 7, $0 5 | JMP ·fastAtan2_2(SB) 6 | 7 | TEXT ·VScaleF32(SB), 7, $0 8 | JMP ·vscaleF32(SB) 9 | 10 | TEXT ·VAbsC64(SB), 7, $0 11 | JMP ·vAbsC64(SB) 12 | 13 | TEXT ·VMaxF32(SB), 7, $0 14 | JMP ·vMaxF32(SB) 15 | 16 | TEXT ·VMulC64xF32(SB), 7, $0 17 | JMP ·vMulC64xF32(SB) 18 | -------------------------------------------------------------------------------- /dsp/math32_amd64.s: -------------------------------------------------------------------------------- 1 | #include "go_asm.h" 2 | #include "textflag.h" 3 | 4 | TEXT ·FastAtan2(SB), NOSPLIT, $0 5 | JMP ·fastAtan2(SB) 6 | 7 | TEXT ·FastAtan2_2(SB), NOSPLIT, $0 8 | JMP ·fastAtan2_2(SB) 9 | 10 | TEXT ·VAbsC64(SB), NOSPLIT, $0 11 | JMP ·vAbsC64(SB) 12 | 13 | TEXT ·VMaxF32(SB), NOSPLIT, $0-28 14 | MOVQ input+0(FP), SI 15 | MOVQ input_len+8(FP), CX 16 | 17 | MOVL $0xff800000, AX // -InF 18 | MOVL AX, X0 19 | 20 | MOVQ $0, DX 21 | 22 | //CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 23 | CMPB ·useAVX2(SB), $1 24 | JE vmaxf32_avx2 25 | //CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 26 | CMPB ·useSSE2(SB), $1 27 | JE vmaxf32_sse2 28 | JMP vmaxf32_scalar 29 | 30 | vmaxf32_avx2: 31 | MOVQ CX, BX 32 | ANDQ $-32, BX 33 | CMPQ DX, BX 34 | JGE vmaxf32_scalar 35 | 36 | VBROADCASTSS X0, Y0 37 | VMOVUPS Y0, Y1 38 | VMOVUPS Y0, Y2 39 | VMOVUPS Y0, Y3 40 | 41 | vmaxf32_avx2_loop: 42 | VMOVUPS (SI), Y4 43 | VMOVUPS 32(SI), Y5 44 | VMOVUPS 64(SI), Y6 45 | VMOVUPS 96(SI), Y7 46 | VMAXPS Y4, Y0, Y0 47 | VMAXPS Y5, Y1, Y1 48 | VMAXPS Y6, Y2, Y2 49 | VMAXPS Y7, Y3, Y3 50 | ADDQ $128, SI 51 | ADDQ $32, DX 52 | CMPQ DX, BX 53 | JLT vmaxf32_avx2_loop 54 | 55 | VMAXPS Y1, Y0, Y0 56 | VMAXPS Y2, Y0, Y0 57 | VMAXPS Y3, Y0, Y0 58 | VEXTRACTF128 $1, Y0, X1 59 | MAXPS X1, X0 60 | MOVHLPS X0, X1 61 | MAXPS X1, X0 62 | PSHUFD $0x55, X0, X1 63 | MAXPS X1, X0 64 | JMP vmaxf32_scalar 65 | 66 | vmaxf32_sse2: 67 | MOVQ CX, BX 68 | ANDQ $-16, BX 69 | CMPQ DX, BX 70 | JGE vmaxf32_scalar 71 | 72 | PSHUFD $0, X0, X0 73 | MOVUPS X0, X1 74 | MOVUPS X0, X2 75 | MOVUPS X0, X3 76 | 77 | vmaxf32_sse_loop: 78 | MOVUPS (SI), X4 79 | MOVUPS 16(SI), X5 80 | MOVUPS 32(SI), X6 81 | MOVUPS 48(SI), X7 82 | MAXPS X4, X0 83 | MAXPS X5, X1 84 | MAXPS X6, X2 85 | MAXPS X7, X3 86 | ADDQ $64, SI 87 | ADDQ $16, DX 88 | CMPQ DX, BX 89 | JLT vmaxf32_sse_loop 90 | 91 | MAXPS X1, X0 92 | MAXPS X2, X0 93 | MAXPS X3, X0 94 | MOVHLPS X0, X1 95 | MAXPS X1, X0 96 | PSHUFD $0x55, X0, X1 97 | MAXPS X1, X0 98 | 99 | vmaxf32_scalar: 100 | CMPQ DX, CX 101 | JGE vmaxf32_done 102 | 103 | vmaxf32_scalar_loop: 104 | MOVSS (SI), X1 105 | UCOMISS X0, X1 106 | JLS vmaxf32_not_max 107 | MOVO X1, X0 108 | 109 | vmaxf32_not_max: 110 | ADDQ $4, SI 111 | INCQ DX 112 | CMPQ DX, CX 113 | JLT vmaxf32_scalar_loop 114 | 115 | vmaxf32_done: 116 | MOVSS X0, ret+24(FP) 117 | RET 118 | 119 | TEXT ·VMinF32(SB), NOSPLIT, $0-28 120 | MOVQ input+0(FP), SI 121 | MOVQ input_len+8(FP), CX 122 | 123 | MOVL $0x7f800000, AX // InF 124 | MOVL AX, X0 125 | 126 | MOVQ $0, DX 127 | 128 | //CMPB ·x86+const_offsetX86HasAVX2(SB), $1 129 | CMPB ·useAVX2(SB), $1 130 | JE vminf32_avx2 131 | //CMPB ·x86+const_offsetX86HasSSE2(SB), $1 132 | CMPB ·useSSE2(SB), $1 133 | JE vminf32_sse2 134 | JMP vminf32_scalar 135 | 136 | vminf32_avx2: 137 | MOVQ CX, BX 138 | ANDQ $-32, BX 139 | CMPQ DX, BX 140 | JGE vminf32_scalar 141 | 142 | VBROADCASTSS X0, Y0 143 | VMOVUPS Y0, Y1 144 | VMOVUPS Y0, Y2 145 | VMOVUPS Y0, Y3 146 | 147 | vminf32_avx2_loop: 148 | VMOVUPS (SI), Y4 149 | VMOVUPS 32(SI), Y5 150 | VMOVUPS 64(SI), Y6 151 | VMOVUPS 96(SI), Y7 152 | VMINPS Y4, Y0, Y0 153 | VMINPS Y5, Y1, Y1 154 | VMINPS Y6, Y2, Y2 155 | VMINPS Y7, Y3, Y3 156 | ADDQ $128, SI 157 | ADDQ $32, DX 158 | CMPQ DX, BX 159 | JLT vminf32_avx2_loop 160 | 161 | VMINPS Y1, Y0, Y0 162 | VMINPS Y2, Y0, Y0 163 | VMINPS Y3, Y0, Y0 164 | VEXTRACTF128 $1, Y0, X1 165 | MINPS X1, X0 166 | MOVHLPS X0, X1 167 | MINPS X1, X0 168 | PSHUFD $0x55, X0, X1 169 | MINPS X1, X0 170 | JMP vminf32_scalar 171 | 172 | vminf32_sse2: 173 | MOVQ CX, BX 174 | ANDQ $-16, BX 175 | CMPQ DX, BX 176 | JGE vminf32_scalar 177 | 178 | PSHUFD $0, X0, X0 179 | MOVUPS X0, X1 180 | MOVUPS X0, X2 181 | MOVUPS X0, X3 182 | 183 | vminf32_sse_loop: 184 | MOVUPS (SI), X4 185 | MOVUPS 16(SI), X5 186 | MOVUPS 32(SI), X6 187 | MOVUPS 48(SI), X7 188 | MINPS X4, X0 189 | MINPS X5, X1 190 | MINPS X6, X2 191 | MINPS X7, X3 192 | ADDQ $64, SI 193 | ADDQ $16, DX 194 | CMPQ DX, BX 195 | JLT vminf32_sse_loop 196 | 197 | MINPS X1, X0 198 | MINPS X2, X0 199 | MINPS X3, X0 200 | MOVHLPS X0, X1 201 | MINPS X1, X0 202 | PSHUFD $0x55, X0, X1 203 | MINPS X1, X0 204 | 205 | vminf32_scalar: 206 | CMPQ DX, CX 207 | JGE vminf32_done 208 | 209 | vminf32_scalar_loop: 210 | MOVSS (SI), X1 211 | UCOMISS X1, X0 212 | JLS vminf32_not_min 213 | MOVO X1, X0 214 | 215 | vminf32_not_min: 216 | ADDQ $4, SI 217 | INCQ DX 218 | CMPQ DX, CX 219 | JLT vminf32_scalar_loop 220 | 221 | vminf32_done: 222 | MOVSS X0, ret+24(FP) 223 | RET 224 | 225 | TEXT ·VMulC64xF32(SB), NOSPLIT, $0 226 | JMP ·vMulC64xF32(SB) 227 | 228 | TEXT ·VScaleF32(SB), NOSPLIT, $0 229 | MOVQ input+0(FP), SI 230 | MOVQ input_len+8(FP), AX 231 | MOVQ output+24(FP), DI 232 | MOVQ output_len+32(FP), CX 233 | MOVSS scale+48(FP), X8 234 | PSHUFD $0, X8, X8 235 | 236 | CMPQ AX, CX 237 | JGE vscalef32_min_len 238 | MOVQ AX, CX 239 | vscalef32_min_len: 240 | MOVQ CX, DX 241 | 242 | MOVQ $0, AX 243 | 244 | //CMPB ·x86+const_offsetX86HasAVX2(SB), $1 245 | CMPB ·useAVX2(SB), $1 246 | JE vscalef32_avx2 247 | //CMPB ·x86+const_offsetX86HasSSE2(SB), $1 248 | CMPB ·useSSE2(SB), $1 249 | JE vscalef32_sse2 250 | JMP vscalef32_scalar 251 | 252 | vscalef32_avx2: 253 | MOVQ CX, DX 254 | ANDQ $(~63), CX 255 | CMPQ AX, CX 256 | JGE vscalef32_scalar 257 | 258 | VBROADCASTSS X8, Y8 259 | 260 | vscalef32_avx2_loop: 261 | VMOVUPS (SI), Y0 262 | VMOVUPS 32(SI), Y1 263 | VMOVUPS 64(SI), Y2 264 | VMOVUPS 96(SI), Y3 265 | VMULPS Y8, Y0, Y0 266 | VMULPS Y8, Y1, Y1 267 | VMULPS Y8, Y2, Y2 268 | VMULPS Y8, Y3, Y3 269 | VMOVUPS Y0, (DI) 270 | VMOVUPS Y1, 32(DI) 271 | VMOVUPS Y2, 64(DI) 272 | VMOVUPS Y3, 96(DI) 273 | ADDQ $32, AX 274 | ADDQ $128, SI 275 | ADDQ $128, DI 276 | CMPQ AX, CX 277 | JLT vscalef32_avx2_loop 278 | 279 | JMP vscalef32_scalar 280 | 281 | vscalef32_sse2: 282 | MOVQ CX, DX 283 | ANDQ $(~31), CX 284 | CMPQ AX, CX 285 | JGE vscalef32_scalar 286 | 287 | vscalef32_sse2_loop: 288 | MOVUPS (SI), X0 289 | MOVUPS 16(SI), X1 290 | MOVUPS 32(SI), X2 291 | MOVUPS 48(SI), X3 292 | MULPS X8, X0 293 | MULPS X8, X1 294 | MULPS X8, X2 295 | MULPS X8, X3 296 | MOVUPS X0, (DI) 297 | MOVUPS X1, 16(DI) 298 | MOVUPS X2, 32(DI) 299 | MOVUPS X3, 48(DI) 300 | ADDQ $16, AX 301 | ADDQ $64, SI 302 | ADDQ $64, DI 303 | CMPQ AX, CX 304 | JLT vscalef32_sse2_loop 305 | 306 | vscalef32_scalar: 307 | CMPQ AX, DX 308 | JGE vscalef32_done 309 | 310 | vscalef32_scalar_loop: 311 | MOVSS (SI), X0 312 | MULSS X8, X0 313 | MOVSS X0, (DI) 314 | INCQ AX 315 | ADDQ $4, SI 316 | ADDQ $4, DI 317 | CMPQ AX, CX 318 | JLT vscalef32_scalar_loop 319 | 320 | vscalef32_done: 321 | RET 322 | -------------------------------------------------------------------------------- /dsp/math32_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | #define pi $3.14159265358979323846264338327950288419716939937510582097494459 4 | #define halfPi $1.570796326794896557998981734272092580795288085938 5 | #define negativeHalfPi $-1.570796326794896557998981734272092580795288085938 6 | 7 | #define vmrs_APSR_nzcv_fpscr WORD $0xeef1fa10 8 | 9 | // Uses F0, F1, F2, F3, F4, F6 10 | TEXT ·FastAtan2(SB), NOSPLIT, $-4 11 | MOVF y+0(FP), F6 12 | MOVF x+4(FP), F4 13 | 14 | ABSF F6, F2 15 | 16 | MOVF $1e-20, F0 17 | ADDF F0, F2 18 | 19 | WORD $0xeeb54ac0 // vcmpe.f32 s8, #0x0 20 | vmrs_APSR_nzcv_fpscr 21 | BGT fatan2_pos_x 22 | BEQ fatan2_zero_x 23 | 24 | ADDF F2, F4, F1 // x + abs(y) 25 | SUBF F4, F2, F4 // abs(y) - x 26 | MOVF $2.356194496154785, F3 // pi * 3/4 27 | B fatan2_2 28 | 29 | fatan2_pos_x: 30 | SUBF F2, F4, F1 // x - abs(y) 31 | ADDF F2, F4, F4 // abs(y) + x 32 | MOVF $0.7853981852531433, F3 // pi * 1/4 33 | 34 | fatan2_2: 35 | DIVF F4, F1, F2 36 | 37 | MOVF $0.1963, F1 38 | MULF F2, F1 39 | MULF F2, F1 40 | MOVF $0.9817, F0 41 | SUBF F0, F1 42 | MULF F2, F1 43 | ADDF F3, F1 44 | 45 | WORD $0xeeb56ac0 // vcmpe.f32 s12, #0x0 46 | vmrs_APSR_nzcv_fpscr 47 | WORD $0xbeb11a41 // vneglt.f32 s2, s2 48 | MOVF F1, ret+8(FP) 49 | RET 50 | 51 | fatan2_zero_x: 52 | WORD $0xeeb56ac0 // vcmpe.f32 s12, #0x0 53 | vmrs_APSR_nzcv_fpscr 54 | BGT fatan2_pos_y 55 | BLT fatan2_neg_y 56 | MOVF F6, ret+8(FP) 57 | RET 58 | 59 | fatan2_neg_y: 60 | MOVF negativeHalfPi, F6 61 | MOVF F6, ret+8(FP) 62 | RET 63 | 64 | fatan2_pos_y: 65 | MOVF halfPi, F6 66 | MOVF F6, ret+8(FP) 67 | RET 68 | 69 | // Uses F0, F1, F2, F3, F4, F6 70 | TEXT ·FastAtan2_2(SB), NOSPLIT, $-4 71 | MOVF x+4(FP), F6 72 | MOVF y+0(FP), F3 73 | WORD $0xeeb56ac0 // vcmpe.f32 s12, #0x0 74 | vmrs_APSR_nzcv_fpscr 75 | BEQ fatan22_zero_x 76 | 77 | // y / x 78 | DIVF F6, F3, F1 79 | MULF F1, F1, F2 80 | MOVF $1.0, F0 81 | 82 | // CMPF F0, F2 83 | WORD $0xeeb42ac0 // vcmpe.f32 s4, s0 84 | vmrs_APSR_nzcv_fpscr 85 | BGT fatan22_5 86 | 87 | // z / (1.0 + 0.28*z*z) 88 | MOVF $0.28, F4 89 | MULF F4, F2 90 | ADDF F0, F2 91 | DIVF F2, F1, F2 92 | WORD $0xeeb56ac0 // vcmpe.f32 s12, #0x0 93 | vmrs_APSR_nzcv_fpscr 94 | BGE fatan22_6 95 | MOVF pi, F1 96 | WORD $0xeeb53ac0 // vcmpe.f32 s6, #0x0 97 | vmrs_APSR_nzcv_fpscr 98 | SUBF.LT F1, F2 99 | ADDF.GE F1, F2 100 | 101 | fatan22_6: 102 | MOVF F2, ret+8(FP) 103 | RET 104 | 105 | fatan22_5: 106 | // pi2 - z/(z*z+0.28) 107 | MOVF $0.28, F4 108 | ADDF F4, F2 109 | DIVF F2, F1, F2 110 | MOVF halfPi, F1 111 | SUBF F2, F1, F2 112 | MOVF pi, F1 113 | WORD $0xeeb53ac0 // vcmpe.f32 s6, #0x0 114 | vmrs_APSR_nzcv_fpscr 115 | SUBF.LT F1, F2 116 | MOVF F2, ret+8(FP) 117 | RET 118 | 119 | fatan22_zero_x: 120 | WORD $0xeeb53ac0 // vcmpe.f32 s6, #0x0 121 | vmrs_APSR_nzcv_fpscr 122 | 123 | // MOVF.LT negativeHalfPi, F6 124 | // MOVF.GT halfPi, F6 125 | // MOVF F6, ret+8(FP) 126 | // RET 127 | BGT fatan22_pi2 128 | BLT fatan22_neg_pi2 129 | MOVF F6, ret+8(FP) 130 | RET 131 | 132 | fatan22_neg_pi2: 133 | MOVF negativeHalfPi, F6 134 | MOVF F6, ret+8(FP) 135 | RET 136 | 137 | fatan22_pi2: 138 | MOVF halfPi, F6 139 | MOVF F6, ret+8(FP) 140 | RET 141 | 142 | TEXT ·VScaleF32(SB), NOSPLIT, $0 143 | MOVW input+0(FP), R0 144 | MOVW input_len+4(FP), R2 145 | MOVW output+12(FP), R1 146 | MOVW output_len+16(FP), R3 147 | MOVF scale+24(FP), F0 148 | 149 | // Choose the shortest length 150 | CMP R2, R3 151 | MOVW.LT R3, R2 152 | 153 | TEQ $0, R2 154 | BEQ vscalef32_done 155 | 156 | MOVBU ·HaveNEON+0(SB), R3 157 | CMP $0, R3 158 | BEQ vscalef32_scalar_loop 159 | 160 | CMP $16, R2 161 | BLT vscalef32_scalar_loop 162 | 163 | PLD (R0) 164 | vscalef32_neon_loop: 165 | PLD (4*16)(R0) 166 | WORD $0xecb02b10 // vldmia r0!, {q1, q2, q3, q4} 167 | WORD $0xf3a22940 // vmul.f32 q1, q1, d0[0] 168 | WORD $0xf3a44940 // vmul.f32 q2, q2, d0[0] 169 | WORD $0xf3a66940 // vmul.f32 q3, q3, d0[0] 170 | WORD $0xf3a88940 // vmul.f32 q4, q4, d0[0] 171 | WORD $0xeca12b10 // vstmia r1!, {q1, q2, q3, q4} 172 | SUB $16, R2 173 | CMP $16, R2 174 | BGE vscalef32_neon_loop 175 | 176 | vscalef32_scalar: 177 | TEQ $0, R2 178 | BEQ vscalef32_done 179 | 180 | vscalef32_scalar_loop: 181 | MOVF (R0), F1 182 | ADD $4, R0 183 | MULF F0, F1, F1 184 | MOVF F1, (R1) 185 | ADD $4, R1 186 | SUB $1, R2 187 | TEQ $0, R2 188 | BNE vscalef32_scalar_loop 189 | 190 | vscalef32_done: 191 | RET 192 | 193 | 194 | TEXT ·VMulC64xF32(SB), NOSPLIT, $0 195 | B ·vMulC64xF32(SB) 196 | 197 | TEXT ·VAbsC64(SB), NOSPLIT, $0 198 | MOVW input+0(FP), R0 199 | MOVW output+12(FP), R1 200 | MOVW input_len+4(FP), R2 201 | MOVW output_len+16(FP), R3 202 | 203 | // Choose the shortest length 204 | CMP R2, R3 205 | MOVW.LT R3, R2 206 | 207 | // If no input then skip loop 208 | CMP $0, R2 209 | BEQ vabsc64_done 210 | 211 | MOVBU ·UseVector+0(SB), R3 212 | TEQ $0, R3 213 | BEQ vabsc64_scalar_loop 214 | 215 | CMP $4, R2 216 | BLT vabsc64_scalar_loop 217 | 218 | PLD (R0) 219 | PLD 64(R0) 220 | PLD (2*64)(R0) 221 | PLD (3*64)(R0) 222 | 223 | // Set vector length to 4 and stride to 2 224 | WORD $0xeef13a10 // vmrs r3, fpscr 225 | BIC $((7<<16)|(3<<20)), R3 226 | ORR $((3<<16)|(1<<20)), R3 227 | WORD $0xeee13a10 // fmxr fpscr, r3 228 | 229 | vabsc64_vector_loop: 230 | PLD (4*64)(R0) 231 | 232 | WORD $0xecb04a08 // vldmia r0!, {s8-s15} 233 | WORD $0xee244a04 // vmul.f32 s8, s8, s8 234 | WORD $0xee044aa4 // vmla.f32 s8, s9, s9 235 | WORD $0xeeb14ac4 // vsqrt.f32 s8, s8 236 | WORD $0xed814a00 // vstr s8, [r1] 237 | WORD $0xed815a01 // vstr s10, [r1, #0x4] 238 | WORD $0xed816a02 // vstr s12, [r1, #0x8] 239 | WORD $0xed817a03 // vstr s14, [r1, #0xc] 240 | ADD $16, R1 241 | 242 | SUB $4, R2 243 | CMP $4, R2 244 | BGE vabsc64_vector_loop 245 | 246 | // Clear vector mode 247 | WORD $0xeef13a10 // vmrs r3, fpscr 248 | BIC $((7<<16)|(3<<20)), R3 249 | WORD $0xeee13a10 // fmxr fpscr, r3 250 | 251 | TEQ $0, R2 252 | BEQ vabsc64_done 253 | 254 | vabsc64_scalar_loop: 255 | MOVF 0(R0), F0 // real 256 | MOVF 4(R0), F1 // imag 257 | ADD $8, R0 258 | MULF F0, F0 259 | MULF F1, F1 260 | ADDF F1, F0 261 | SQRTF F0, F0 262 | MOVF F0, 0(R1) 263 | ADD $4, R1 264 | SUB $1, R2 265 | TEQ $0, R2 266 | BNE vabsc64_scalar_loop 267 | 268 | vabsc64_done: 269 | RET 270 | 271 | TEXT ·VMaxF32(SB), 7, $0 272 | MOVW input+0(FP), R0 273 | MOVW input_len+4(FP), R2 274 | 275 | MOVW $0xff800000, R1 276 | MOVW R1, F4 277 | 278 | CMP $0, R2 279 | BEQ vmaxf32_done 280 | 281 | MOVBU ·HaveNEON+0(SB), R3 282 | CMP $0, R3 283 | BEQ vmaxf32_batch 284 | 285 | CMP $16, R2 286 | BLT vmaxf32_batch 287 | 288 | WORD $0xecb08b08 // vldmia r0!, {q4,q5} 289 | SUB $8, R2 290 | 291 | //PLD (R0) 292 | vmaxf32_neon_loop: 293 | //PLD (12*16)(R0) 294 | WORD $0xecb02b08 // vldmia r0!, {q1, q2} 295 | WORD $0xf2088f42 // vmax.f32 q4, q4, q1 296 | WORD $0xf20aaf44 // vmax.f32 q5, q5, q2 297 | SUB $8, R2 298 | CMP $8, R2 299 | BGE vmaxf32_neon_loop 300 | 301 | WORD $0xf2080f4a // vmax.f32 q0, q4, q5 302 | WORD $0xf3004f01 // vpmax.f32 d4, d0, d1 303 | WORD $0xf3044f04 // vpmax.f32 d4, d4, d4 304 | 305 | B vmaxf32_scalar 306 | 307 | vmaxf32_batch: 308 | CMP $4, R2 309 | BLT vmaxf32_scalar_loop 310 | 311 | PLD (R0) 312 | PLD 64(R0) 313 | PLD (2*64)(R0) 314 | 315 | vmaxf32_batch_loop: 316 | PLD (3*64)(R0) 317 | WORD $0xecb00a04 // vldmia r0!, {s0-s3} 318 | WORD $0xeeb40ac4 // vcmpe.f32 s0, s8 319 | vmrs_APSR_nzcv_fpscr 320 | WORD $0xceb04a40 // vmovgt.f32 s8, s0 321 | WORD $0xeef40ac4 // vcmpe.f32 s1, s8 322 | vmrs_APSR_nzcv_fpscr 323 | WORD $0xceb04a60 // vmovgt.f32 s8, s1 324 | WORD $0xeeb41ac4 // vcmpe.f32 s2, s8 325 | vmrs_APSR_nzcv_fpscr 326 | WORD $0xceb04a41 // vmovgt.f32 s8, s2 327 | WORD $0xeef41ac4 // vcmpe.f32 s3, s8 328 | vmrs_APSR_nzcv_fpscr 329 | WORD $0xceb04a61 // vmovgt.f32 s8, s3 330 | SUB $4, R2 331 | CMP $4, R2 332 | BGE vmaxf32_batch_loop 333 | 334 | vmaxf32_scalar: 335 | TEQ $0, R2 336 | BEQ vmaxf32_done 337 | 338 | vmaxf32_scalar_loop: 339 | MOVF 0(R0), F1 340 | ADD $4, R0 341 | 342 | // CMPF F4, F1 343 | WORD $0xeeb41ac4 // vcmpe.f32 s2, s8 344 | vmrs_APSR_nzcv_fpscr 345 | MOVF.GT F1, F4 346 | SUB $1, R2 347 | TEQ $0, R2 348 | BNE vmaxf32_scalar_loop 349 | 350 | vmaxf32_done: 351 | MOVF F4, ret+12(FP) 352 | RET 353 | -------------------------------------------------------------------------------- /dsp/math32_arm64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | #define Inf32 0x7f800000 4 | #define NegInf32 0xff800000 5 | 6 | TEXT ·FastAtan2(SB), NOSPLIT, $0 7 | B ·fastAtan2(SB) 8 | 9 | TEXT ·FastAtan2_2(SB), NOSPLIT, $0 10 | B ·fastAtan2_2(SB) 11 | 12 | TEXT ·VScaleF32(SB), NOSPLIT, $0 13 | MOVD input(FP), R0 14 | MOVD input_len+8(FP), R1 15 | MOVD output+24(FP), R2 16 | MOVD output_len+32(FP), R3 17 | FMOVS scale+48(FP), F0 18 | 19 | CMP R3, R1 20 | BLT vscalef32_min_len 21 | MOVD R3, R1 22 | vscalef32_min_len: 23 | 24 | #define BLOCK_SIZE 16 25 | 26 | CMP $BLOCK_SIZE, R1 27 | BLT vscalef32_scaler 28 | vscalef32_simd_loop: 29 | //VLD1 (R0), [V1.S4] 30 | //WORD $0x3dc00001 // ldr q1, [x0] 31 | //ADD $16, R0 32 | 33 | //WORD $0xf9802003 // prfm pldl2strm, [x0, 64] 34 | //WORD $0xf980a000 // prfm pldl1keep, [x0, 320] 35 | //WORD $0xf980a001 // prfm pldl1strm, [x0, 320] 36 | WORD $0xf9804001 // prfm pldl1strm, [x0, 128] 37 | //WORD $0xf9804000 // prfm pldl1keep, [x0, 128] 38 | 39 | //WORD $0x4c402801 // ld1 {v1.4s,v2.4s,v3.4s,v4.4s}, [x0] 40 | WORD $0xad400801 // ldp q1, q2, [x0] 41 | WORD $0xad411003 // ldp q3, q4, [x0,32] 42 | //WORD $0xad421805 // ldp q5, q6, [x0,64] 43 | ADD $(BLOCK_SIZE*4), R0 44 | 45 | WORD $0x4f809021 // fmul v1.4s, v1.4s, v0.s[0] 46 | WORD $0x4f809042 // fmul v2.4s, v2.4s, v0.s[0] 47 | WORD $0x4f809063 // fmul v3.4s, v3.4s, v0.s[0] 48 | WORD $0x4f809084 // fmul v4.4s, v4.4s, v0.s[0] 49 | //WORD $0x4f8090a5 // fmul v5.4s, v5.4s, v0.s[0] 50 | //WORD $0x4f8090c6 // fmul v6.4s, v6.4s, v0.s[0] 51 | 52 | //VST1 [V1.S4], (R2) 53 | //WORD $0x3d800041 // str q1, [x2] 54 | //ADD $16, R2 55 | 56 | //WORD $0x4c00a841 // st1 {v1.4s,v2.4s}, [x2] 57 | WORD $0xad000841 // stp q1, q2, [x2] 58 | WORD $0xad011043 // stp q3, q4, [x2,32] 59 | //WORD $0xad021845 // stp q5, q6, [x2,64] 60 | ADD $(BLOCK_SIZE*4), R2 61 | 62 | SUB $BLOCK_SIZE, R1 63 | CMP $BLOCK_SIZE, R1 64 | BGE vscalef32_simd_loop 65 | 66 | vscalef32_scaler: 67 | CMP ZR, R1 68 | BEQ vscalef32_done 69 | vscalef32_scaler_loop: 70 | FMOVS.P 4(R0), F1 71 | FMULS F0, F1, F1 72 | FMOVS.P F1, 4(R2) 73 | SUBS $1, R1 74 | BNE vscalef32_scaler_loop 75 | vscalef32_done: 76 | RET 77 | 78 | TEXT ·VMulC64xF32(SB), NOSPLIT, $0 79 | B ·vMulC64xF32(SB) 80 | 81 | TEXT ·VAbsC64(SB), NOSPLIT, $0 82 | B ·vAbsC64(SB) 83 | 84 | TEXT ·VMaxF32(SB), NOSPLIT, $0 85 | MOVD input(FP), R0 86 | MOVD input_len+8(FP), R1 87 | MOVW $NegInf32, R2 88 | FMOVS R2, F31 89 | 90 | #undef BLOCK_SIZE 91 | #define BLOCK_SIZE 16 92 | 93 | CMP $(8+BLOCK_SIZE), R1 94 | BLT vmaxf32_scaler 95 | 96 | //VLD1.P 16(R0), [V0.S4] // ld1 {v0.4s}, [x0], #16 97 | WORD $0xad401c00 // ldp q0, q7, [x0] 98 | ADD $(BLOCK_SIZE/2*4), R0 99 | SUB $(BLOCK_SIZE/2), R1 100 | vmaxf32_simd_loop: 101 | //VLD1.P (R0), [V1.S4,V2.S4,V3.S4,V4.S4] 102 | // ldp is faster than vld1 103 | WORD $0xad400801 // ldp q1, q2, [x0] 104 | WORD $0xad411003 // ldp q3, q4, [x0,32] 105 | ADD $(BLOCK_SIZE*4), R0 106 | WORD $0x4e21f400 // fmax v0.4s, v0.4s, v1.4s 107 | WORD $0x4e23f4e7 // fmax v7.4s, v7.4s, v3.4s 108 | WORD $0x4e22f400 // fmax v0.4s, v0.4s, v2.4s 109 | WORD $0x4e24f4e7 // fmax v7.4s, v7.4s, v4.4s 110 | SUB $BLOCK_SIZE, R1 111 | CMP $BLOCK_SIZE, R1 112 | BGE vmaxf32_simd_loop 113 | WORD $0x6e30f81e // fmaxv s30, v0.4s 114 | WORD $0x6e30f8ff // fmaxv s31, v7.4s 115 | FMAXS F31, F30, F31 116 | 117 | vmaxf32_scaler: 118 | CMP ZR, R1 119 | BEQ vmaxf32_done 120 | vmaxf32_loop: 121 | //FMOVS.P (R0), F1 122 | FMOVS (R0), F1 123 | ADD $4, R0 124 | FMAXS F31, F1, F31 125 | SUBS $1, R1 126 | BNE vmaxf32_loop 127 | vmaxf32_done: 128 | FMOVS F31, ret+24(FP) 129 | RET 130 | 131 | TEXT ·VMinF32(SB), NOSPLIT, $0 132 | MOVD input(FP), R0 133 | MOVD input_len+8(FP), R1 134 | MOVW $Inf32, R2 135 | FMOVS R2, F31 136 | 137 | #undef BLOCK_SIZE 138 | #define BLOCK_SIZE 16 139 | 140 | CMP $(8+BLOCK_SIZE), R1 141 | BLT vmaxf32_scaler 142 | 143 | //VLD1.P 16(R0), [V0.S4] // ld1 {v0.4s}, [x0], #16 144 | WORD $0xad401c00 // ldp q0, q7, [x0] 145 | ADD $(BLOCK_SIZE/2*4), R0 146 | SUB $(BLOCK_SIZE/2), R1 147 | vmaxf32_simd_loop: 148 | //VLD1.P (R0), [V1.S4,V2.S4,V3.S4,V4.S4] 149 | // ldp is faster than vld1 150 | WORD $0xad400801 // ldp q1, q2, [x0] 151 | WORD $0xad411003 // ldp q3, q4, [x0,32] 152 | ADD $(BLOCK_SIZE*4), R0 153 | WORD $0x4ea1f400 // fmin v0.4s, v0.4s, v1.4s 154 | WORD $0x4ea3f4e7 // fmin v7.4s, v7.4s, v3.4s 155 | WORD $0x4ea2f400 // fmin v0.4s, v0.4s, v2.4s 156 | WORD $0x4ea4f4e7 // fmin v7.4s, v7.4s, v4.4s 157 | SUB $BLOCK_SIZE, R1 158 | CMP $BLOCK_SIZE, R1 159 | BGE vmaxf32_simd_loop 160 | WORD $0x6eb0f81e // fminv s30, v0.4s 161 | WORD $0x6eb0f8ff // fminv s31, v7.4s 162 | FMINS F31, F30, F31 163 | 164 | vmaxf32_scaler: 165 | CMP ZR, R1 166 | BEQ vmaxf32_done 167 | vmaxf32_loop: 168 | //FMOVS.P (R0), F1 169 | FMOVS (R0), F1 170 | ADD $4, R0 171 | FMINS F31, F1, F31 172 | SUBS $1, R1 173 | BNE vmaxf32_loop 174 | vmaxf32_done: 175 | FMOVS F31, ret+24(FP) 176 | RET 177 | -------------------------------------------------------------------------------- /dsp/math32_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "testing" 7 | ) 8 | 9 | const ( 10 | approxErrorLimit = 0.011 11 | ) 12 | 13 | var ( 14 | atanBenchTable = [][2]float32{} 15 | atanBenchTableFixed = [][2]int{} 16 | ) 17 | 18 | func init() { 19 | for y := -1.0; y <= 1.0; y += 0.5 { 20 | for x := -1.0; x <= 1.0; x += 0.5 { 21 | atanBenchTable = append(atanBenchTable, [2]float32{float32(x), float32(y)}) 22 | atanBenchTableFixed = append(atanBenchTableFixed, [2]int{int(x * (1 << 14)), int(y * (1 << 14))}) 23 | } 24 | } 25 | } 26 | 27 | func TestAtan2(t *testing.T) { 28 | for y := -1.0; y <= 1.0; y += 0.01 { 29 | for x := -1.0; x <= 1.0; x += 0.01 { 30 | expected := float32(math.Atan2(y, x)) 31 | if err := math.Abs(float64(expected - FastAtan2(float32(y), float32(x)))); err > approxErrorLimit { 32 | t.Errorf("FastAtan2 gave an error of %f for x=%f y=%f", err, x, y) 33 | } 34 | if err := math.Abs(float64(expected - FastAtan2_2(float32(y), float32(x)))); err > approxErrorLimit { 35 | t.Errorf("FastAtan2_2 gave an error of %f for x=%f y=%f", err, x, y) 36 | } 37 | } 38 | } 39 | x, y := 0.0, 0.0 40 | expected := float32(math.Atan2(y, x)) 41 | if err := math.Abs(float64(expected - FastAtan2(float32(y), float32(x)))); err > approxErrorLimit { 42 | t.Errorf("FastAtan2 gave an error of %f for x=%f y=%f", err, x, y) 43 | } 44 | if err := math.Abs(float64(expected - FastAtan2_2(float32(y), float32(x)))); err > approxErrorLimit { 45 | t.Errorf("FastAtan2_2 gave an error of %f for x=%f y=%f", err, x, y) 46 | } 47 | } 48 | 49 | func TestFastAtan2Error(t *testing.T) { 50 | maxE := 0.0 51 | sumE := 0.0 52 | count := 0 53 | for y := -1.0; y <= 1.0; y += 0.01 { 54 | for x := -1.0; x <= 1.0; x += 0.01 { 55 | ai := float64(FastAtan2(float32(y), float32(x))) 56 | af := math.Atan2(y, x) 57 | e := math.Abs(ai - af) 58 | sumE += e 59 | if e > maxE { 60 | maxE = e 61 | } 62 | count++ 63 | } 64 | } 65 | if maxE > 0.0102 { 66 | t.Errorf("Expected max error of 0.0102 got %f", maxE) 67 | } 68 | t.Logf("Max error %f\n", maxE) 69 | t.Logf("Mean absolute error %f", sumE/float64(count)) 70 | } 71 | 72 | func TestFastAtan2_2Error(t *testing.T) { 73 | maxE := 0.0 74 | sumE := 0.0 75 | count := 0 76 | for y := -1.0; y <= 1.0; y += 0.01 { 77 | for x := -1.0; x <= 1.0; x += 0.01 { 78 | ai := float64(FastAtan2_2(float32(y), float32(x))) 79 | af := math.Atan2(y, x) 80 | e := math.Abs(ai - af) 81 | sumE += e 82 | if e > maxE { 83 | maxE = e 84 | } 85 | count++ 86 | } 87 | } 88 | if maxE > 0.005 { 89 | t.Errorf("Expected max error of 0.005 got %f", maxE) 90 | } 91 | t.Logf("Max error %f\n", maxE) 92 | t.Logf("Mean absolute error %f", sumE/float64(count)) 93 | } 94 | 95 | func TestVScaleF32(t *testing.T) { 96 | simdTest(t, func(t *testing.T) { 97 | input := make([]float32, 257) 98 | for i := 0; i < len(input); i++ { 99 | input[i] = float32(i) 100 | } 101 | expected := make([]float32, len(input)) 102 | output := make([]float32, len(input)) 103 | vscaleF32(input, expected, 1.0/256.0) 104 | VScaleF32(input, output, 1.0/256.0) 105 | for i, v := range expected { 106 | if output[i] != v { 107 | t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected) 108 | } 109 | } 110 | 111 | // Unaligned 112 | input = input[1:] 113 | expected = make([]float32, len(input)+1)[1:] 114 | output = make([]float32, len(input)+1)[1:] 115 | vscaleF32(input, expected, 1.0/256.0) 116 | VScaleF32(input, output, 1.0/256.0) 117 | for i, v := range expected { 118 | if output[i] != v { 119 | t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected) 120 | } 121 | } 122 | }) 123 | } 124 | 125 | func TestVAbsC64(t *testing.T) { 126 | input := []complex64{ 127 | complex(0.0, 0.0), 128 | complex(1.0, 1.0), 129 | complex(1.3, -2.7), 130 | complex(0.0, -1.0), 131 | complex(1.0, 0.0), 132 | complex(-2.3, 1.9), 133 | } 134 | expected := make([]float32, len(input)) 135 | for i, v := range input { 136 | expected[i] = float32(math.Sqrt(float64(real(v)*real(v) + imag(v)*imag(v)))) 137 | } 138 | output := make([]float32, len(input)) 139 | VAbsC64(input, output) 140 | for i, v := range output { 141 | if !approxEqual32(v, expected[i], 1e-20) { 142 | t.Errorf("Expected %+v got %+v for %+v", expected[i], v, input[i]) 143 | } 144 | } 145 | } 146 | 147 | func TestVMaxF32(t *testing.T) { 148 | simdTest(t, func(t *testing.T) { 149 | input := make([]float32, 123) 150 | for i := 0; i < len(input); i++ { 151 | input[i] = rand.Float32() - 0.5 152 | } 153 | expected := vMaxF32(input) 154 | max := VMaxF32(input) 155 | if max != expected { 156 | t.Fatalf("Expected %f got %f", expected, max) 157 | } 158 | 159 | // Test SIMD by having max in each specific lane 160 | for i := 0; i < 1024; i++ { 161 | input := make([]float32, 1024) 162 | input[i] = 1.0 163 | if max := VMaxF32(input); max != 1.0 { 164 | t.Fatalf("Expected 1.0 got %f at position %d", max, i) 165 | } 166 | } 167 | 168 | // Ascending 169 | input = []float32{-4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0} 170 | if max := VMaxF32(input); max != 4.0 { 171 | t.Fatalf("Expected 4.0 got %f", max) 172 | } 173 | 174 | // Descending 175 | input = []float32{4.0, 3.0, 2.0, 1.0, 0.0, -1.0, -2.0, -3.0, -4.0} 176 | if max := VMaxF32(input); max != 4.0 { 177 | t.Fatalf("Expected 4.0 got %f", max) 178 | } 179 | 180 | // Unordered 181 | input = []float32{1.5, -4.0, 8.0, 0.0, -1.0, 2.0, -3.0} 182 | if max := VMaxF32(input); max != 8.0 { 183 | t.Fatalf("Expected 8.0 got %f", max) 184 | } 185 | }) 186 | } 187 | 188 | func TestVMinF32(t *testing.T) { 189 | simdTest(t, func(t *testing.T) { 190 | input := make([]float32, 123) 191 | for i := 0; i < len(input); i++ { 192 | input[i] = rand.Float32() - 0.5 193 | } 194 | expected := vMinF32(input) 195 | min := VMinF32(input) 196 | if min != expected { 197 | t.Fatalf("Expected %f got %f", expected, min) 198 | } 199 | 200 | // Test SIMD by having min in each specific lane 201 | for i := 0; i < 1024; i++ { 202 | input := make([]float32, 1024) 203 | input[i] = -1.0 204 | if min := VMinF32(input); min != -1.0 { 205 | t.Fatalf("Expected -1.0 got %f at position %d", min, i) 206 | } 207 | } 208 | 209 | // Ascending 210 | input = []float32{-4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0} 211 | if min := VMinF32(input); min != -4.0 { 212 | t.Fatalf("Expected -4.0 got %f", min) 213 | } 214 | 215 | // Descending 216 | input = []float32{4.0, 3.0, 2.0, 1.0, 0.0, -1.0, -2.0, -3.0, -4.0} 217 | if min := VMinF32(input); min != -4.0 { 218 | t.Fatalf("Expected -4.0 got %f", min) 219 | } 220 | 221 | // Unordered 222 | input = []float32{1.5, -4.0, 8.0, 0.0, -1.0, 2.0, -3.0} 223 | if min := VMinF32(input); min != -4.0 { 224 | t.Fatalf("Expected -4.0 got %f", min) 225 | } 226 | }) 227 | } 228 | 229 | func BenchmarkConj32(b *testing.B) { 230 | in := complex64(complex(1.0, -0.2)) 231 | for i := 0; i < b.N; i++ { 232 | _ = Conj32(in) 233 | } 234 | } 235 | 236 | func BenchmarkFastAtan2(b *testing.B) { 237 | for i := 0; i < b.N; i++ { 238 | for _, xy := range atanBenchTable { 239 | FastAtan2(xy[1], xy[0]) 240 | } 241 | } 242 | } 243 | 244 | func BenchmarkFastAtan2_Go(b *testing.B) { 245 | for i := 0; i < b.N; i++ { 246 | for _, xy := range atanBenchTable { 247 | fastAtan2(xy[1], xy[0]) 248 | } 249 | } 250 | } 251 | 252 | func BenchmarkFastAtan2_2(b *testing.B) { 253 | for i := 0; i < b.N; i++ { 254 | for _, xy := range atanBenchTable { 255 | FastAtan2_2(xy[1], xy[0]) 256 | } 257 | } 258 | } 259 | 260 | func BenchmarkFastAtan2_2_Go(b *testing.B) { 261 | for i := 0; i < b.N; i++ { 262 | for _, xy := range atanBenchTable { 263 | fastAtan2_2(xy[1], xy[0]) 264 | } 265 | } 266 | } 267 | 268 | func BenchmarkAtan2(b *testing.B) { 269 | for i := 0; i < b.N; i++ { 270 | for _, xy := range atanBenchTable { 271 | math.Atan2(float64(xy[1]), float64(xy[0])) 272 | } 273 | } 274 | } 275 | 276 | func BenchmarkVScaleF32(b *testing.B) { 277 | input := make([]float32, benchSize) 278 | output := make([]float32, len(input)) 279 | b.SetBytes(benchSize) 280 | b.ResetTimer() 281 | for i := 0; i < b.N; i++ { 282 | VScaleF32(input, output, 1.0/benchSize) 283 | } 284 | } 285 | 286 | func BenchmarkVScaleF32_Go(b *testing.B) { 287 | input := make([]float32, benchSize) 288 | output := make([]float32, len(input)) 289 | b.SetBytes(benchSize) 290 | b.ResetTimer() 291 | for i := 0; i < b.N; i++ { 292 | vscaleF32(input, output, 1.0/benchSize) 293 | } 294 | } 295 | 296 | func BenchmarkVAbsC64(b *testing.B) { 297 | input := make([]complex64, benchSize) 298 | output := make([]float32, len(input)) 299 | b.SetBytes(benchSize) 300 | b.ResetTimer() 301 | for i := 0; i < b.N; i++ { 302 | VAbsC64(input, output) 303 | } 304 | } 305 | 306 | func BenchmarkVAbsC64_Go(b *testing.B) { 307 | input := make([]complex64, benchSize) 308 | output := make([]float32, len(input)) 309 | b.SetBytes(benchSize) 310 | b.ResetTimer() 311 | for i := 0; i < b.N; i++ { 312 | vAbsC64(input, output) 313 | } 314 | } 315 | 316 | func BenchmarkVMaxF32_Random(b *testing.B) { 317 | input := make([]float32, benchSize) 318 | r := rand.New(rand.NewSource(0)) 319 | for i := 0; i < len(input); i++ { 320 | input[i] = r.Float32() 321 | } 322 | b.SetBytes(benchSize) 323 | b.ResetTimer() 324 | for i := 0; i < b.N; i++ { 325 | _ = VMaxF32(input) 326 | } 327 | } 328 | 329 | func BenchmarkVMaxF32_Ascending(b *testing.B) { 330 | input := make([]float32, benchSize) 331 | for i := 0; i < len(input); i++ { 332 | input[i] = float32(i) 333 | } 334 | b.SetBytes(benchSize) 335 | b.ResetTimer() 336 | for i := 0; i < b.N; i++ { 337 | _ = VMaxF32(input) 338 | } 339 | } 340 | 341 | func BenchmarkVMaxF32_Descending(b *testing.B) { 342 | input := make([]float32, benchSize) 343 | for i := 0; i < len(input); i++ { 344 | input[i] = float32(-i) 345 | } 346 | b.SetBytes(benchSize) 347 | b.ResetTimer() 348 | for i := 0; i < b.N; i++ { 349 | _ = VMaxF32(input) 350 | } 351 | } 352 | 353 | func BenchmarkVMaxF32_Alternating(b *testing.B) { 354 | input := make([]float32, benchSize) 355 | for i := 0; i < len(input); i++ { 356 | if i&1 == 0 { 357 | input[i] = float32(i) 358 | } else { 359 | input[i] = float32(-i) 360 | } 361 | } 362 | b.SetBytes(benchSize) 363 | b.ResetTimer() 364 | for i := 0; i < b.N; i++ { 365 | _ = VMaxF32(input) 366 | } 367 | } 368 | 369 | func BenchmarkVMaxF32_Go_Random(b *testing.B) { 370 | input := make([]float32, benchSize) 371 | r := rand.New(rand.NewSource(0)) 372 | for i := 0; i < len(input); i++ { 373 | input[i] = r.Float32() 374 | } 375 | b.SetBytes(benchSize) 376 | b.ResetTimer() 377 | for i := 0; i < b.N; i++ { 378 | _ = vMaxF32(input) 379 | } 380 | } 381 | 382 | func BenchmarkVMaxF32_Go_Ascending(b *testing.B) { 383 | input := make([]float32, benchSize) 384 | for i := 0; i < len(input); i++ { 385 | input[i] = float32(i) 386 | } 387 | b.SetBytes(benchSize) 388 | b.ResetTimer() 389 | for i := 0; i < b.N; i++ { 390 | _ = vMaxF32(input) 391 | } 392 | } 393 | 394 | func BenchmarkVMaxF32_Go_Decending(b *testing.B) { 395 | input := make([]float32, benchSize) 396 | for i := 0; i < len(input); i++ { 397 | input[i] = float32(-i) 398 | } 399 | b.SetBytes(benchSize) 400 | b.ResetTimer() 401 | for i := 0; i < b.N; i++ { 402 | _ = vMaxF32(input) 403 | } 404 | } 405 | 406 | func BenchmarkVMaxF32_Go_Alternating(b *testing.B) { 407 | input := make([]float32, benchSize) 408 | for i := 0; i < len(input); i++ { 409 | if i&1 == 0 { 410 | input[i] = float32(i) 411 | } else { 412 | input[i] = float32(-i) 413 | } 414 | } 415 | b.SetBytes(benchSize) 416 | b.ResetTimer() 417 | for i := 0; i < b.N; i++ { 418 | _ = vMaxF32(input) 419 | } 420 | } 421 | -------------------------------------------------------------------------------- /dsp/mathfixed.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import "math" 4 | 5 | const ( 6 | fixedPi = 1 << 14 7 | fixedPi4 = fixedPi / 4 8 | fixedPi34 = 3 * fixedPi / 4 9 | ) 10 | 11 | const ( 12 | atanLUTSize = 131072 // 512 KiB 13 | atanLUTCoef = 8 14 | ) 15 | 16 | var atanLUT []int 17 | 18 | func init() { 19 | atanLUT = make([]int, atanLUTSize) 20 | for i := 0; i < atanLUTSize; i++ { 21 | atanLUT[i] = int(math.Atan(float64(i)/float64(1< 0: 29 | return 1 << 13 30 | case y < 0: 31 | return -(1 << 13) 32 | } 33 | return 0 34 | } 35 | 36 | t := (y << atanLUTCoef) / x 37 | if t == 0 { 38 | switch { 39 | case x > 0: 40 | return 0 41 | case y < 0: 42 | return -(1 << 14) 43 | } 44 | return 1 << 14 45 | } 46 | 47 | if t >= atanLUTSize || -t >= atanLUTSize { 48 | if y > 0 { 49 | return 1 << 13 50 | } 51 | return -(1 << 13) 52 | } 53 | 54 | if t > 0 { 55 | if y > 0 { 56 | return atanLUT[t] 57 | } 58 | return atanLUT[t] - (1 << 14) 59 | } 60 | if y > 0 { 61 | return (1 << 14) - atanLUT[-t] 62 | } 63 | return -atanLUT[-t] 64 | } 65 | 66 | func FastAtan2Fixed(y, x int) int { 67 | if x == 0 && y == 0 { 68 | return 0 69 | } 70 | 71 | yAbs := y 72 | if yAbs < 0 { 73 | yAbs = -yAbs 74 | } 75 | 76 | var angle int 77 | if x >= 0 { 78 | angle = fixedPi4 - fixedPi4*(x-yAbs)/(x+yAbs) 79 | } else { 80 | angle = fixedPi34 - fixedPi4*(x+yAbs)/(yAbs-x) 81 | } 82 | if y < 0 { 83 | return -angle 84 | } 85 | return angle 86 | } 87 | -------------------------------------------------------------------------------- /dsp/mathfixed_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | // TEXT ·Atan2LUT(SB),NOSPLIT,$0 4 | // MOVW x+4(FP), R4 5 | // MOVW y+0(FP), R3 6 | // MOVW $0, R0 7 | // TEQ $0, R4 8 | // BNE L1 9 | // CMP $0, R3 10 | // BLE L2 11 | // MOVW $8192, R0 12 | // MOVW R0, res+8(FP) 13 | // RET 14 | // L2: 15 | // CMP $0, R3 16 | // BGE L3 17 | // MOVW $-8192, R0 18 | // MOVW R0, res+8(FP) 19 | // RET 20 | // L3: 21 | // MOVW $0, R0 22 | // MOVW R0, res+8(FP) 23 | // RET 24 | // L1: 25 | // MOVW R3<<8, R0 26 | // MOVW 0(R13), R11 27 | // MOVW.W R11, -8(R13) 28 | // MOVW R4, 4(R13) 29 | // MOVW R0, R11 30 | // BL _div(SB) 31 | // MOVW R11, R2 32 | // ADD $8, R13 33 | // TEQ $0, R2 34 | // BNE L4 35 | // CMP $0, R4 36 | // BLE L5 37 | // MOVW $0,R0 38 | // MOVW R0, res+8(FP) 39 | // RET 40 | // L5: 41 | // CMP $0, R3, 42 | // BGE L6 43 | // MOVW $-16384, R0 44 | // MOVW R0, res+8(FP) 45 | // RET 46 | // L6: 47 | // MOVW $16384,R0 48 | // MOVW R0, res+8(FP) 49 | // RET 50 | // L4: 51 | // MOVW $131072, R4 52 | // CMP R4, R2, 53 | // BGE L7 54 | // RSB $0, R2, R4 55 | // MOVW $131072, R5 56 | // CMP R5, R4 57 | // BGE L7 58 | // CMP $0, R2 59 | // BLE L8 60 | // CMP $0, R3 61 | // BLE L9 62 | // MOVW $atanLUT+0(SB), R0 63 | // // MOVW R2, R1 64 | // // MOVW 4(R0), R2 65 | // // CMP R2, R1 66 | // // BLO L10 67 | // // PCDATA $1,$0 68 | // // BL ,runtime.panicindex(SB) 69 | // // L10: 70 | // MOVW 0(R0), R0 71 | // MOVW R1<<2(R0), R1 72 | // MOVW R1, res+8(FP) 73 | // RET 74 | // L9: 75 | // MOVW $atanLUT+0(SB),R0 76 | // // 0x0120 00288 MOVW R2,R1 77 | // // 0x0124 00292 MOVW 4(R0),R2 78 | // // 0x0128 00296 CMP R2,R1, 79 | // // 0x012c 00300 BLO ,312 80 | // // 0x0130 00304 PCDATA $1,$0 81 | // // 0x0130 00304 BL ,runtime.panicindex(SB) 82 | // // 0x0134 00308 UNDEF , 83 | // MOVW 0(R0),R0 84 | // MOVW R1<<2(R0),R0 85 | // SUB $16384,R0 86 | // MOVW R0, res+8(FP) 87 | // RET 88 | // L8: 89 | // CMP $0, R3, 90 | // BLE L10 91 | // RSB $0,R2,R1 92 | // MOVW $atanLUT+0(SB),R0 93 | // // 0x015c 00348 MOVW 4(R0),R2 94 | // // 0x0160 00352 CMP R2,R1, 95 | // // 0x0164 00356 BLO ,368 96 | // // 0x0168 00360 PCDATA $1,$0 97 | // // 0x0168 00360 BL ,runtime.panicindex(SB) 98 | // // 0x016c 00364 UNDEF , 99 | // MOVW 0(R0),R0 100 | // MOVW R1<<2(R0),R0 101 | // MOVW $16384,R1 102 | // SUB R0,R1 103 | // MOVW R1, res+8(FP) 104 | // RET 105 | // L10: 106 | // RSB $0,R2,R1 107 | // MOVW $atanLUT+0(SB),R0 108 | // // 0x0190 00400 MOVW 4(R0),R2 109 | // // 0x0194 00404 CMP R2,R1, 110 | // // 0x0198 00408 BLO ,420 111 | // // 0x019c 00412 PCDATA $1,$0 112 | // // 0x019c 00412 BL ,runtime.panicindex(SB) 113 | // // 0x01a0 00416 UNDEF , 114 | // MOVW 0(R0),R0 115 | // MOVW R1<<2(R0),R0 116 | // RSB $0,R0 117 | // MOVW R0, res+8(FP) 118 | // RET 119 | // L7: 120 | // CMP $0, R3 121 | // BLE L11 122 | // MOVW $8192,R0 123 | // MOVW R0, res+8(FP) 124 | // RET 125 | // L11: 126 | // MOVW $-8192, R0 127 | // MOVW R0, res+8(FP) 128 | // RET 129 | // // 0x01dc 00476 WORD ,$-8192 130 | // // 0x01e0 00480 WORD ,$-16384 131 | // // 0x01e4 00484 WORD ,$"".atanLUT+0(SB) 132 | 133 | // TEXT ·FastAtan2Fixed(SB),NOSPLIT,$0-12 134 | // MOVW y+0(FP), R5 135 | // MOVW x+4(FP), R4 136 | // TEQ $0, R4 137 | // BNE fatan2fixed_1 138 | // TEQ $0, R5 139 | // BNE fatan2fixed_1 140 | // MOVW R4, res+8(FP) 141 | // RET 142 | // fatan2fixed_1: 143 | // MOVW R5, R3 // yAbs = y 144 | // CMP $0, R5 145 | // RSB.LT $0, R3, R3 // if yAbs < 0 : yAbs = -yAbs 146 | // CMP $0, R4 147 | // BLT fatan2fixed_2 148 | 149 | // SUB R3, R4, R11 150 | // MOVW R11<<12, R11 151 | // ADD R3, R4, R1 152 | 153 | // MOVW 0(R13), R0 154 | // MOVW.W R0, -8(R13) 155 | // MOVW R1, 4(R13) 156 | // BL _div(SB) 157 | // ADD $8, R13 158 | 159 | // MOVW $4096, R1 160 | // SUB R11, R1, R2 161 | // CMP $0, R5 162 | // RSB.LT $0, R2, R2 163 | // MOVW R2, res+8(FP) 164 | // RET 165 | // fatan2fixed_2: 166 | // ADD R3, R4, R11 167 | // MOVW R11<<12, R11 168 | // SUB R4, R3, R1 169 | 170 | // MOVW 0(R13), R0 171 | // MOVW.W R0, -8(R13) 172 | // MOVW R1, 4(R13) 173 | // BL _div(SB) 174 | // ADD $8, R13 175 | 176 | // MOVW $12288, R1 177 | // SUB R11, R1, R2 178 | // CMP $0, R5 179 | // RSB.LT $0, R2, R2 180 | // MOVW R2, res+8(FP) 181 | // RET 182 | 183 | -------------------------------------------------------------------------------- /dsp/mathfixed_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | func TestFastAtan2FixedError(t *testing.T) { 9 | maxE := 0.0 10 | sumE := 0.0 11 | count := 0 12 | for y := -32768; y < 32768; y += 64 { 13 | for x := -32768; x < 32768; x += 64 { 14 | ai := float64(FastAtan2Fixed(y, x)) * math.Pi / (1 << 14) 15 | af := math.Atan2(float64(y), float64(x)) 16 | e := math.Abs(ai - af) 17 | sumE += e 18 | if e > maxE { 19 | maxE = e 20 | } 21 | count++ 22 | } 23 | } 24 | if maxE > 0.08 { 25 | t.Errorf("Expected max error of 0.08 got %f", maxE) 26 | } 27 | t.Logf("Max error %f\n", maxE) 28 | t.Logf("Mean absolute error %f", sumE/float64(count)) 29 | } 30 | 31 | func TestAtan2LUTError(t *testing.T) { 32 | maxE := 0.0 33 | sumE := 0.0 34 | count := 0 35 | for y := -32768; y < 32768; y += 64 { 36 | for x := -32768; x < 32768; x += 64 { 37 | ai := float64(Atan2LUT(y, x)) * math.Pi / (1 << 14) 38 | af := math.Atan2(float64(y), float64(x)) 39 | e := math.Abs(ai - af) 40 | sumE += e 41 | if e > maxE { 42 | maxE = e 43 | } 44 | count++ 45 | } 46 | } 47 | if maxE > 0.005 { 48 | t.Errorf("Expected max error of 0.005 got %f", maxE) 49 | } 50 | t.Logf("Max error %f\n", maxE) 51 | t.Logf("Mean absolute error %f", sumE/float64(count)) 52 | } 53 | 54 | func BenchmarkFastAtan2Fixed(b *testing.B) { 55 | for i := 0; i < b.N; i++ { 56 | for _, xy := range atanBenchTableFixed { 57 | FastAtan2Fixed(xy[1], xy[0]) 58 | } 59 | } 60 | } 61 | 62 | func BenchmarkAtan2LUT(b *testing.B) { 63 | for i := 0; i < b.N; i++ { 64 | for _, xy := range atanBenchTableFixed { 65 | Atan2LUT(xy[1], xy[0]) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /dsp/sdft.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "math/cmplx" 6 | ) 7 | 8 | // TODO: damping 9 | 10 | // SDFT is a sliding DFT. 11 | type SDFT struct { 12 | i int 13 | w []complex128 14 | s []complex128 15 | x []complex128 16 | e []complex128 17 | } 18 | 19 | func NewSDFT(k, n int, window []float64) *SDFT { 20 | var win []complex128 21 | if len(window) == 0 { 22 | win = []complex128{complex(1, 0)} 23 | } else { 24 | win = make([]complex128, len(window)) 25 | for i, w := range window { 26 | win[i] = complex(w, 0) 27 | } 28 | } 29 | s := &SDFT{ 30 | w: win, 31 | x: make([]complex128, n), 32 | e: make([]complex128, len(win)), 33 | s: make([]complex128, len(win)), 34 | } 35 | for i := 0; i < len(win); i++ { 36 | j := k - len(win)/2 + i 37 | if j < 0 { 38 | j += n 39 | } else if j >= n { 40 | j -= n 41 | } 42 | s.e[i] = cmplx.Exp(complex(0, 2*math.Pi*float64(j)/float64(n))) 43 | } 44 | return s 45 | } 46 | 47 | func (sd *SDFT) Filter(x complex128) complex128 { 48 | i := (sd.i + 1) % len(sd.x) 49 | x0 := sd.x[i] 50 | sd.x[i] = x 51 | sd.i = i 52 | xd := x - x0 53 | var sum complex128 54 | for i, w := range sd.w { 55 | s := (xd + sd.s[i]) * sd.e[i] 56 | sd.s[i] = s 57 | sum += w * s 58 | } 59 | return sum 60 | } 61 | 62 | // SDFT32 is a 32-bit float version of a sliding DFT. 63 | type SDFT32 struct { 64 | i int 65 | w []complex64 66 | s []complex64 67 | x []complex64 68 | e []complex64 69 | } 70 | 71 | func NewSDFT32(k, n int, window []float32) *SDFT32 { 72 | var win []complex64 73 | if len(window) == 0 { 74 | win = []complex64{complex(1, 0)} 75 | } else { 76 | win = make([]complex64, len(window)) 77 | for i, w := range window { 78 | win[i] = complex(w, 0) 79 | } 80 | } 81 | s := &SDFT32{ 82 | w: win, 83 | x: make([]complex64, n), 84 | e: make([]complex64, len(win)), 85 | s: make([]complex64, len(win)), 86 | } 87 | for i := 0; i < len(win); i++ { 88 | j := k - len(win)/2 + i 89 | if j < 0 { 90 | j += n 91 | } else if j >= n { 92 | j -= n 93 | } 94 | s.e[i] = complex64(cmplx.Exp(complex(0, 2*math.Pi*float64(j)/float64(n)))) 95 | } 96 | return s 97 | } 98 | 99 | func (sd *SDFT32) Filter(x complex64) complex64 { 100 | i := (sd.i + 1) % len(sd.x) 101 | x0 := sd.x[i] 102 | sd.x[i] = x 103 | sd.i = i 104 | xd := x - x0 105 | var sum complex64 106 | for i, w := range sd.w { 107 | s := (xd + sd.s[i]) * sd.e[i] 108 | sd.s[i] = s 109 | sum += w * s 110 | } 111 | return sum 112 | } 113 | -------------------------------------------------------------------------------- /dsp/stub_windows.go: -------------------------------------------------------------------------------- 1 | // Code generated by command: go run conversion_avo_amd64.go -out conversion_avo_amd64.s -stubs stub_windows.go. DO NOT EDIT. 2 | 3 | package dsp 4 | 5 | // Ui8tof32 converts unsigned 8-bit samples to 32-bit float. 6 | func Ui8tof32(input []byte, output []float32) 7 | -------------------------------------------------------------------------------- /dsp/util.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | type ComplexSource interface { 4 | Source() ([]complex64, error) 5 | } 6 | 7 | type RealSink interface { 8 | Sink([]float32) error 9 | } 10 | 11 | type ComplexFilter interface { 12 | Filter([]complex64) ([]complex64, error) 13 | } 14 | 15 | type Demodulator interface { 16 | Demodulate(input []complex64, output []float32) (int, error) 17 | } 18 | 19 | type Rotate90Filter struct { 20 | } 21 | 22 | func (fi *Rotate90Filter) Filter(samples []complex64) []complex64 { 23 | return rotate90FilterAsm(fi, samples) 24 | } 25 | 26 | func rotate90FilterAsm(fi *Rotate90Filter, samples []complex64) []complex64 27 | 28 | func rotate90Filter(fi *Rotate90Filter, samples []complex64) []complex64 { 29 | for i := 0; i < len(samples); i += 4 { 30 | samples[i+1] = complex(-imag(samples[i+1]), real(samples[i+1])) 31 | samples[i+2] = -samples[i+2] 32 | samples[i+3] = complex(imag(samples[i+3]), -real(samples[i+3])) 33 | } 34 | return samples 35 | } 36 | 37 | type I32Rotate90Filter struct { 38 | } 39 | 40 | func (fi *I32Rotate90Filter) Filter(samples []int32) []int32 { 41 | return i32Rotate90FilterAsm(fi, samples) 42 | } 43 | 44 | func i32Rotate90FilterAsm(fi *I32Rotate90Filter, samples []int32) []int32 45 | 46 | func i32Rotate90Filter(fi *I32Rotate90Filter, samples []int32) []int32 { 47 | for i := 0; i < len(samples); i += 8 { 48 | samples[i+2], samples[i+3] = -samples[i+3], samples[i+2] 49 | samples[i+4] = -samples[i+4] 50 | samples[i+5] = -samples[i+5] 51 | samples[i+6], samples[i+7] = samples[i+7], -samples[i+6] 52 | } 53 | return samples 54 | } 55 | 56 | func rtoc(r []float64) []complex128 { 57 | c := make([]complex128, len(r)) 58 | for i, v := range r { 59 | c[i] = complex(v, 0) 60 | } 61 | return c 62 | } 63 | 64 | func rtoc32(r []float32) []complex64 { 65 | c := make([]complex64, len(r)) 66 | for i, v := range r { 67 | c[i] = complex(v, 0) 68 | } 69 | return c 70 | } 71 | -------------------------------------------------------------------------------- /dsp/util_386.s: -------------------------------------------------------------------------------- 1 | TEXT ·rotate90FilterAsm(SB), 7, $0 2 | JMP ·rotate90Filter(SB) 3 | 4 | TEXT ·i32Rotate90FilterAsm(SB), 7, $0 5 | JMP ·i32Rotate90Filter(SB) 6 | -------------------------------------------------------------------------------- /dsp/util_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·rotate90FilterAsm(SB), NOSPLIT, $0 4 | JMP ·rotate90Filter(SB) 5 | 6 | TEXT ·i32Rotate90FilterAsm(SB), NOSPLIT, $0 7 | JMP ·i32Rotate90Filter(SB) 8 | -------------------------------------------------------------------------------- /dsp/util_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·rotate90FilterAsm(SB), NOSPLIT, $0 4 | MOVW samples_len+8(FP), R7 5 | MOVW samples_ptr+4(FP), R8 6 | AND $(~3), R7 // round down to nearest multiple of 4 7 | 8 | TEQ $0, R7 9 | BEQ r90_end 10 | 11 | ADD R7<<3, R8, R7 12 | 13 | r90_loop: 14 | // First sample of the group of 4 doesn't change 15 | ADD $8, R8 16 | 17 | MOVM.IA (R8), [R0-R5] 18 | 19 | // samples[i+1] = complex(-imag(samples[i+1]), real(samples[i+1])) 20 | MOVW R0, R6 21 | EOR $(1<<31), R1, R0 22 | MOVW R6, R1 23 | 24 | // samples[i+2] = -samples[i+2] 25 | EOR $(1<<31), R2 26 | EOR $(1<<31), R3 27 | 28 | // samples[i+3] = complex(imag(samples[i+3]), -real(samples[i+3])) 29 | EOR $(1<<31), R4, R6 30 | MOVW R5, R4 31 | MOVW R6, R5 32 | MOVM.IA.W [R0-R5], (R8) 33 | 34 | CMP R8, R7 35 | BGT r90_loop 36 | 37 | r90_end: 38 | MOVW samples_ptr+4(FP), R0 39 | MOVW R0, ret_ptr+16(FP) 40 | MOVW samples_len+8(FP), R0 41 | MOVW R0, ret_len+20(FP) 42 | MOVW samples_cap+12(FP), R0 43 | MOVW R0, ret_cap+24(FP) 44 | RET 45 | 46 | TEXT ·i32Rotate90FilterAsm(SB), NOSPLIT, $0 47 | MOVW samples_len+8(FP), R7 48 | MOVW samples_ptr+4(FP), R8 49 | AND $(~3), R7 // round down to nearest multiple of 4 50 | 51 | TEQ $0, R7 52 | BEQ i32r90_end 53 | 54 | ADD R7<<2, R8, R7 55 | 56 | i32r90_loop: 57 | // First sample of the group of 4 doesn't change 58 | ADD $8, R8 59 | 60 | MOVM.IA (R8), [R0-R5] 61 | 62 | // samples[i+1] = complex(-imag(samples[i+1]), real(samples[i+1])) 63 | MOVW R0, R6 64 | MVN R1, R0 65 | MOVW R6, R1 66 | 67 | // samples[i+2] = -samples[i+2] 68 | MVN R2, R2 69 | MVN R3, R3 70 | 71 | // samples[i+3] = complex(imag(samples[i+3]), -real(samples[i+3])) 72 | MVN R4, R6 73 | MOVW R5, R4 74 | MOVW R6, R5 75 | MOVM.IA.W [R0-R5], (R8) 76 | 77 | CMP R8, R7 78 | BGT i32r90_loop 79 | 80 | i32r90_end: 81 | MOVW samples_ptr+4(FP), R0 82 | MOVW R0, ret_ptr+16(FP) 83 | MOVW samples_len+8(FP), R0 84 | MOVW R0, ret_len+20(FP) 85 | MOVW samples_cap+12(FP), R0 86 | MOVW R0, ret_cap+24(FP) 87 | RET 88 | -------------------------------------------------------------------------------- /dsp/util_arm64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | 3 | TEXT ·rotate90FilterAsm(SB), NOSPLIT, $0 4 | B ·rotate90Filter(SB) 5 | 6 | TEXT ·i32Rotate90FilterAsm(SB), NOSPLIT, $0 7 | B ·i32Rotate90Filter(SB) 8 | -------------------------------------------------------------------------------- /dsp/util_test.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | func approxEqual(a, b, e float64) bool { 9 | return math.Abs(a-b) <= e 10 | } 11 | 12 | func approxEqual32(a, b, e float32) bool { 13 | return math.Abs(float64(a)-float64(b)) <= float64(e) 14 | } 15 | 16 | func TestRotate90Filter(t *testing.T) { 17 | filter := &Rotate90Filter{} 18 | input := make([]complex64, 256) 19 | for i := 0; i < 256; i++ { 20 | input[i] = complex(float32(i)-128.0, -(float32(i) - 128.0)) 21 | } 22 | output := make([]complex64, 256) 23 | copy(output, input) 24 | output = rotate90FilterAsm(filter, output) 25 | expected := make([]complex64, 256) 26 | copy(expected, input) 27 | expected = rotate90Filter(filter, expected) 28 | if len(output) != len(expected) { 29 | t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected) 30 | } 31 | for i := 0; i < len(output); i++ { 32 | if output[i] != expected[i] { 33 | t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected) 34 | } 35 | } 36 | } 37 | 38 | func BenchmarkRotate90Filter(b *testing.B) { 39 | filter := &Rotate90Filter{} 40 | input := make([]complex64, benchSize) 41 | b.SetBytes(benchSize) 42 | b.ResetTimer() 43 | for i := 0; i < b.N; i++ { 44 | _ = rotate90FilterAsm(filter, input) 45 | } 46 | } 47 | 48 | func BenchmarkRotate90Filter_Go(b *testing.B) { 49 | filter := &Rotate90Filter{} 50 | input := make([]complex64, benchSize) 51 | b.SetBytes(benchSize) 52 | b.ResetTimer() 53 | for i := 0; i < b.N; i++ { 54 | _ = rotate90Filter(filter, input) 55 | } 56 | } 57 | 58 | func BenchmarkI32Rotate90Filter(b *testing.B) { 59 | filter := &I32Rotate90Filter{} 60 | input := make([]int32, 2*benchSize) 61 | b.SetBytes(benchSize) 62 | b.ResetTimer() 63 | for i := 0; i < b.N; i++ { 64 | _ = i32Rotate90FilterAsm(filter, input) 65 | } 66 | } 67 | 68 | func BenchmarkI32Rotate90Filter_Go(b *testing.B) { 69 | filter := &I32Rotate90Filter{} 70 | input := make([]int32, 2*benchSize) 71 | b.SetBytes(benchSize) 72 | b.ResetTimer() 73 | for i := 0; i < b.N; i++ { 74 | _ = i32Rotate90Filter(filter, input) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /dsp/window.go: -------------------------------------------------------------------------------- 1 | package dsp 2 | 3 | import ( 4 | "math" 5 | "strconv" 6 | ) 7 | 8 | var ( 9 | BlackmanFreqCoeff = []float64{0.16 / 4, -1.0 / 4, (1 - 0.16) / 2, -1.0 / 4, 0.16 / 4} 10 | HammingFreqCoeff = []float64{(0.53836 - 1) / 2, 0.53836, (0.53836 - 1) / 2} 11 | HanningFreqCoeff = []float64{-0.25, 0.5, -0.25} 12 | 13 | BlackmanFreqCoeff32 = []float32{0.16 / 4, -1.0 / 4, (1 - 0.16) / 2, -1.0 / 4, 0.16 / 4} 14 | HammingFreqCoeff32 = []float32{(0.53836 - 1) / 2, 0.53836, (0.53836 - 1) / 2} 15 | HanningFreqCoeff32 = []float32{-0.25, 0.5, -0.25} 16 | ) 17 | 18 | func TriangleWindow(output []float64) { 19 | for n := range output { 20 | output[n] = 1 - math.Abs((float64(n)-float64(len(output)-1)/2.0)/(float64(len(output)+1)/2.0)) 21 | } 22 | } 23 | 24 | func TriangleWindowF32(output []float32) { 25 | for n := range output { 26 | output[n] = float32(1 - math.Abs((float64(n)-float64(len(output)-1)/2.0)/(float64(len(output)+1)/2.0))) 27 | } 28 | } 29 | 30 | func HammingWindow(output []float64) { 31 | window(output, []float64{0.53836, 1 - 0.53836}) 32 | } 33 | 34 | func HammingWindowF32(output []float32) { 35 | windowF32(output, []float64{0.53836, 1 - 0.53836}) 36 | } 37 | 38 | func HanningWindow(output []float64) { 39 | for n := range output { 40 | output[n] = 0.5 * (1 - math.Cos(2*math.Pi*float64(n)/float64(len(output)-1))) 41 | } 42 | } 43 | 44 | func HanningWindowF32(output []float32) { 45 | for n := range output { 46 | output[n] = float32(0.5 * (1 - math.Cos(2*math.Pi*float64(n)/float64(len(output)-1)))) 47 | } 48 | } 49 | 50 | func BlackmanWindow(output []float64) { 51 | a := 0.16 52 | window(output, []float64{(1.0 - a) / 2.0, 1.0 / 2.0, a / 2.0}) 53 | } 54 | 55 | func BlackmanWindowF32(output []float32) { 56 | a := 0.16 57 | windowF32(output, []float64{(1.0 - a) / 2.0, 1.0 / 2.0, a / 2.0}) 58 | } 59 | 60 | func NuttallWindow(output []float64) { 61 | window(output, []float64{0.355768, 0.487396, 0.144232, 0.012604}) 62 | } 63 | 64 | func NuttallWindowF32(output []float32) { 65 | windowF32(output, []float64{0.355768, 0.487396, 0.144232, 0.012604}) 66 | } 67 | 68 | func window(output []float64, a []float64) { 69 | if len(a) < 1 || len(a) > 4 { 70 | panic("invalid window length " + strconv.Itoa(len(a))) 71 | } 72 | nn := float64(len(output) - 1) 73 | for n := range output { 74 | fn := float64(n) 75 | v := a[0] 76 | if len(a) > 1 { 77 | v -= a[1] * math.Cos(2*math.Pi*fn/nn) 78 | } 79 | if len(a) > 2 { 80 | v += a[2] * math.Cos(4*math.Pi*fn/nn) 81 | } 82 | if len(a) > 3 { 83 | v -= a[3] * math.Cos(6*math.Pi*fn/nn) 84 | } 85 | output[n] = v 86 | } 87 | } 88 | 89 | func windowF32(output []float32, a []float64) { 90 | if len(a) < 1 || len(a) > 4 { 91 | panic("invalid window length " + strconv.Itoa(len(a))) 92 | } 93 | nn := float64(len(output) - 1) 94 | for n := range output { 95 | fn := float64(n) 96 | v := a[0] 97 | if len(a) > 1 { 98 | v -= a[1] * math.Cos(2*math.Pi*fn/nn) 99 | } 100 | if len(a) > 2 { 101 | v += a[2] * math.Cos(4*math.Pi*fn/nn) 102 | } 103 | if len(a) > 3 { 104 | v -= a[3] * math.Cos(6*math.Pi*fn/nn) 105 | } 106 | output[n] = float32(v) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /examples/ax25.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/hex" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "math" 10 | "os" 11 | 12 | "github.com/samuel/go-dsp/dsp" 13 | "github.com/samuel/go-dsp/dsp/ax25" 14 | ) 15 | 16 | var flagVerbose = flag.Bool("v", false, "Verbose output") 17 | 18 | func main() { 19 | flag.Parse() 20 | 21 | rd := os.Stdin 22 | if len(flag.Args()) > 0 && flag.Arg(0) != "-" { 23 | fi, err := os.Open(flag.Arg(0)) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | defer fi.Close() 28 | rd = fi 29 | } 30 | 31 | sampleRate := 44100 32 | baud := 1200 33 | window := 4 34 | interp := 1 35 | blockSize := sampleRate / baud 36 | 37 | goer := dsp.NewGoertzel([]int{1200, 2200}, sampleRate*interp, blockSize*interp) 38 | 39 | threshold := 50.0 40 | 41 | buf := make([]byte, window*2) 42 | samples := make([]float32, blockSize*interp) 43 | lastSample := float32(0.0) 44 | 45 | currentTime := float64(0.0) 46 | bitClock := 1.0 / float64(baud) 47 | windowTime := float64(window) / float64(sampleRate) 48 | timeDelta := 0.0 49 | prevBit := 0 50 | transition := false 51 | 52 | ax := ax25.NewDecoder() 53 | for { 54 | _, err := rd.Read(buf) 55 | if err == io.EOF { 56 | break 57 | } else if err != nil { 58 | log.Fatal(err) 59 | } 60 | 61 | copy(samples, samples[window*interp:]) 62 | 63 | si := len(samples) - window*interp 64 | for i := 0; i < len(buf); i += 2 { 65 | s := float32(int16(buf[i])|(int16(buf[i+1])<<8)) / 32768.0 66 | if interp > 1 { 67 | d := (s - lastSample) / float32(interp) 68 | for j := 1; j < interp; j++ { 69 | lastSample += d 70 | samples[si] = lastSample 71 | si++ 72 | } 73 | lastSample = s 74 | } 75 | samples[si] = s 76 | si++ 77 | } 78 | 79 | goer.Reset() 80 | goer.Feed(samples) 81 | mags := goer.Magnitude() 82 | diff := mags[0] - mags[1] 83 | 84 | if math.Abs(float64(diff)) > threshold { 85 | b := 1 86 | if diff < 0 { 87 | b = 0 88 | } 89 | if prevBit != b { 90 | transition = true 91 | prevBit = b 92 | // Align transitions to middle of clock tick 93 | timeDelta = bitClock/2.0 - currentTime 94 | } 95 | } 96 | 97 | currentTime += windowTime 98 | for currentTime >= bitClock { 99 | currentTime -= bitClock 100 | b := 1 101 | if transition { 102 | b = 0 103 | currentTime += timeDelta 104 | timeDelta = 0.0 105 | } 106 | frame := ax.Feed(b) 107 | if frame != nil { 108 | if *flagVerbose { 109 | fmt.Printf("%+v\n", frame) 110 | } else { 111 | fmt.Printf("%s to %s", frame.Source, frame.Destination) 112 | if len(frame.Repeaters) != 0 { 113 | fmt.Print(" via ") 114 | for i, r := range frame.Repeaters { 115 | if i != 0 { 116 | fmt.Print(",") 117 | } 118 | fmt.Print(r.String()) 119 | } 120 | } 121 | fmt.Println() 122 | } 123 | fmt.Print(hex.Dump(frame.Info)) 124 | } 125 | transition = false 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /examples/dtmf_file.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "os" 8 | 9 | "github.com/samuel/go-dsp/dsp/dtmf" 10 | ) 11 | 12 | func main() { 13 | sampleRate := 8000 14 | blockSize := 205 * sampleRate / 8000 15 | window := blockSize / 4 16 | dt := dtmf.NewStandard(sampleRate, blockSize) 17 | lastKey := -1 18 | keyCount := 0 19 | samples := make([]float32, blockSize) 20 | 21 | rd := os.Stdin 22 | if len(os.Args) > 1 && os.Args[1] != "-" { 23 | fi, err := os.Open(os.Args[1]) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | defer fi.Close() 28 | rd = fi 29 | } 30 | 31 | buf := make([]byte, window*2) 32 | 33 | for { 34 | _, err := rd.Read(buf) 35 | if err == io.EOF { 36 | break 37 | } else if err != nil { 38 | log.Fatal(err) 39 | } 40 | 41 | copy(samples, samples[window:]) 42 | 43 | si := len(samples) - window 44 | for i := 0; i < len(buf); i += 2 { 45 | s := float32(int16(buf[i])|(int16(buf[i+1])<<8)) / 32768.0 46 | samples[si] = s 47 | si++ 48 | } 49 | 50 | if k, t := dt.Feed(samples); k == lastKey && t > 0.0 { 51 | keyCount++ 52 | if keyCount == 9 { 53 | fmt.Printf("%c", dtmf.Keypad[k]) 54 | } 55 | } else { 56 | lastKey = k 57 | keyCount = 0 58 | } 59 | } 60 | fmt.Println() 61 | } 62 | -------------------------------------------------------------------------------- /examples/dtmf_live.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "os/signal" 8 | 9 | "code.google.com/p/portaudio-go/portaudio" 10 | "github.com/samuel/go-dsp/dsp/dtmf" 11 | ) 12 | 13 | func main() { 14 | sampleRate := 44100 15 | blockSize := 205 * sampleRate / 8000 16 | window := blockSize / 4 17 | dt := dtmf.NewStandard(sampleRate, blockSize) 18 | lastKey := -1 19 | keyCount := 0 20 | samples := make([]float32, blockSize) 21 | 22 | if err := portaudio.Initialize(); err != nil { 23 | log.Fatalf("Initialize: %+v", err) 24 | } 25 | defer func() { 26 | if err := portaudio.Terminate(); err != nil { 27 | log.Fatalf("Terminate: %+v", err) 28 | } 29 | }() 30 | inputBuf := make([]float32, window) 31 | stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), len(inputBuf), inputBuf) 32 | if err != nil { 33 | log.Fatalf("OpenDefaultStream: %+v", err) 34 | } 35 | defer stream.Close() 36 | if err := stream.Start(); err != nil { 37 | log.Fatalf("Start: %+v", err) 38 | } 39 | defer stream.Stop() 40 | 41 | fmt.Printf("%+v\n", stream.Info()) 42 | 43 | sig := make(chan os.Signal, 1) 44 | signal.Notify(sig, os.Interrupt, os.Kill) 45 | for { 46 | if err := stream.Read(); err != nil { 47 | log.Fatalf("Read: %+v", err) 48 | } 49 | 50 | copy(samples, samples[window:]) 51 | copy(samples[len(samples)-len(inputBuf):], inputBuf) 52 | 53 | if k, t := dt.Feed(samples); k == lastKey && t > 0.0 { 54 | keyCount++ 55 | if keyCount == 10 { 56 | fmt.Printf("%c", dtmf.Keypad[k]) 57 | } 58 | } else { 59 | lastKey = k 60 | keyCount = 0 61 | } 62 | 63 | select { 64 | case <-sig: 65 | fmt.Println() 66 | return 67 | default: 68 | } 69 | } 70 | } 71 | --------------------------------------------------------------------------------